[SCTP]: Fix build warnings with IPV6 disabled.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
115static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
116 struct pipe_inode_info *pipe, size_t len,
117 unsigned int flags);
1da177e4 118
1da177e4
LT
119/*
120 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
121 * in the operation structures but are done directly via the socketcall() multiplexor.
122 */
123
da7071d7 124static const struct file_operations socket_file_ops = {
1da177e4
LT
125 .owner = THIS_MODULE,
126 .llseek = no_llseek,
127 .aio_read = sock_aio_read,
128 .aio_write = sock_aio_write,
129 .poll = sock_poll,
130 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
131#ifdef CONFIG_COMPAT
132 .compat_ioctl = compat_sock_ioctl,
133#endif
1da177e4
LT
134 .mmap = sock_mmap,
135 .open = sock_no_open, /* special open code to disallow open via /proc */
136 .release = sock_close,
137 .fasync = sock_fasync,
5274f052
JA
138 .sendpage = sock_sendpage,
139 .splice_write = generic_splice_sendpage,
9c55e01c 140 .splice_read = sock_splice_read,
1da177e4
LT
141};
142
143/*
144 * The protocol list. Each protocol is registered in here.
145 */
146
1da177e4 147static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 148static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 149
1da177e4
LT
150/*
151 * Statistics counters of the socket lists
152 */
153
154static DEFINE_PER_CPU(int, sockets_in_use) = 0;
155
156/*
89bddce5
SH
157 * Support routines.
158 * Move socket addresses back and forth across the kernel/user
159 * divide and look after the messy bits.
1da177e4
LT
160 */
161
89bddce5 162#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
163 16 for IP, 16 for IPX,
164 24 for IPv6,
89bddce5 165 about 80 for AX.25
1da177e4
LT
166 must be at least one bigger than
167 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 168 :unix_mkname()).
1da177e4 169 */
89bddce5 170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
182int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
183{
89bddce5 184 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5
SH
209
210int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
211 int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
89bddce5
SH
216 err = get_user(len, ulen);
217 if (err)
1da177e4 218 return err;
89bddce5
SH
219 if (len > klen)
220 len = klen;
221 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 222 return -EINVAL;
89bddce5 223 if (len) {
d6fe3945
SG
224 if (audit_sockaddr(klen, kaddr))
225 return -ENOMEM;
89bddce5 226 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
227 return -EFAULT;
228 }
229 /*
89bddce5
SH
230 * "fromlen shall refer to the value before truncation.."
231 * 1003.1g
1da177e4
LT
232 */
233 return __put_user(klen, ulen);
234}
235
236#define SOCKFS_MAGIC 0x534F434B
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
247 init_waitqueue_head(&ei->socket.wait);
89bddce5 248
1da177e4
LT
249 ei->socket.fasync_list = NULL;
250 ei->socket.state = SS_UNCONNECTED;
251 ei->socket.flags = 0;
252 ei->socket.ops = NULL;
253 ei->socket.sk = NULL;
254 ei->socket.file = NULL;
1da177e4
LT
255
256 return &ei->vfs_inode;
257}
258
259static void sock_destroy_inode(struct inode *inode)
260{
261 kmem_cache_free(sock_inode_cachep,
262 container_of(inode, struct socket_alloc, vfs_inode));
263}
264
4ba9b9d0 265static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 266{
89bddce5 267 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 268
a35afb83 269 inode_init_once(&ei->vfs_inode);
1da177e4 270}
89bddce5 271
1da177e4
LT
272static int init_inodecache(void)
273{
274 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
275 sizeof(struct socket_alloc),
276 0,
277 (SLAB_HWCACHE_ALIGN |
278 SLAB_RECLAIM_ACCOUNT |
279 SLAB_MEM_SPREAD),
20c2df83 280 init_once);
1da177e4
LT
281 if (sock_inode_cachep == NULL)
282 return -ENOMEM;
283 return 0;
284}
285
286static struct super_operations sockfs_ops = {
287 .alloc_inode = sock_alloc_inode,
288 .destroy_inode =sock_destroy_inode,
289 .statfs = simple_statfs,
290};
291
454e2398 292static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
293 int flags, const char *dev_name, void *data,
294 struct vfsmount *mnt)
1da177e4 295{
454e2398
DH
296 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
297 mnt);
1da177e4
LT
298}
299
ba89966c 300static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
301
302static struct file_system_type sock_fs_type = {
303 .name = "sockfs",
304 .get_sb = sockfs_get_sb,
305 .kill_sb = kill_anon_super,
306};
89bddce5 307
1da177e4
LT
308static int sockfs_delete_dentry(struct dentry *dentry)
309{
304e61e6
ED
310 /*
311 * At creation time, we pretended this dentry was hashed
312 * (by clearing DCACHE_UNHASHED bit in d_flags)
313 * At delete time, we restore the truth : not hashed.
314 * (so that dput() can proceed correctly)
315 */
316 dentry->d_flags |= DCACHE_UNHASHED;
317 return 0;
1da177e4 318}
c23fbb6b
ED
319
320/*
321 * sockfs_dname() is called from d_path().
322 */
323static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
324{
325 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
326 dentry->d_inode->i_ino);
327}
328
1da177e4 329static struct dentry_operations sockfs_dentry_operations = {
89bddce5 330 .d_delete = sockfs_delete_dentry,
c23fbb6b 331 .d_dname = sockfs_dname,
1da177e4
LT
332};
333
334/*
335 * Obtains the first available file descriptor and sets it up for use.
336 *
39d8c1b6
DM
337 * These functions create file structures and maps them to fd space
338 * of the current process. On success it returns file descriptor
1da177e4
LT
339 * and file struct implicitly stored in sock->file.
340 * Note that another thread may close file descriptor before we return
341 * from this function. We use the fact that now we do not refer
342 * to socket after mapping. If one day we will need it, this
343 * function will increment ref. count on file by 1.
344 *
345 * In any case returned fd MAY BE not valid!
346 * This race condition is unavoidable
347 * with shared fd spaces, we cannot solve it inside kernel,
348 * but we take care of internal coherence yet.
349 */
350
39d8c1b6 351static int sock_alloc_fd(struct file **filep)
1da177e4
LT
352{
353 int fd;
1da177e4
LT
354
355 fd = get_unused_fd();
39d8c1b6 356 if (likely(fd >= 0)) {
1da177e4
LT
357 struct file *file = get_empty_filp();
358
39d8c1b6
DM
359 *filep = file;
360 if (unlikely(!file)) {
1da177e4 361 put_unused_fd(fd);
39d8c1b6 362 return -ENFILE;
1da177e4 363 }
39d8c1b6
DM
364 } else
365 *filep = NULL;
366 return fd;
367}
1da177e4 368
39d8c1b6
DM
369static int sock_attach_fd(struct socket *sock, struct file *file)
370{
ce8d2cdf 371 struct dentry *dentry;
c23fbb6b 372 struct qstr name = { .name = "" };
39d8c1b6 373
ce8d2cdf
DH
374 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
375 if (unlikely(!dentry))
39d8c1b6
DM
376 return -ENOMEM;
377
ce8d2cdf 378 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
379 /*
380 * We dont want to push this dentry into global dentry hash table.
381 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
382 * This permits a working /proc/$pid/fd/XXX on sockets
383 */
ce8d2cdf
DH
384 dentry->d_flags &= ~DCACHE_UNHASHED;
385 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
386
387 sock->file = file;
ce8d2cdf
DH
388 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
389 &socket_file_ops);
390 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
391 file->f_flags = O_RDWR;
392 file->f_pos = 0;
393 file->private_data = sock;
1da177e4 394
39d8c1b6
DM
395 return 0;
396}
397
398int sock_map_fd(struct socket *sock)
399{
400 struct file *newfile;
401 int fd = sock_alloc_fd(&newfile);
402
403 if (likely(fd >= 0)) {
404 int err = sock_attach_fd(sock, newfile);
405
406 if (unlikely(err < 0)) {
407 put_filp(newfile);
1da177e4 408 put_unused_fd(fd);
39d8c1b6 409 return err;
1da177e4 410 }
39d8c1b6 411 fd_install(fd, newfile);
1da177e4 412 }
1da177e4
LT
413 return fd;
414}
415
6cb153ca
BL
416static struct socket *sock_from_file(struct file *file, int *err)
417{
6cb153ca
BL
418 if (file->f_op == &socket_file_ops)
419 return file->private_data; /* set in sock_map_fd */
420
23bb80d2
ED
421 *err = -ENOTSOCK;
422 return NULL;
6cb153ca
BL
423}
424
1da177e4
LT
425/**
426 * sockfd_lookup - Go from a file number to its socket slot
427 * @fd: file handle
428 * @err: pointer to an error code return
429 *
430 * The file handle passed in is locked and the socket it is bound
431 * too is returned. If an error occurs the err pointer is overwritten
432 * with a negative errno code and NULL is returned. The function checks
433 * for both invalid handles and passing a handle which is not a socket.
434 *
435 * On a success the socket object pointer is returned.
436 */
437
438struct socket *sockfd_lookup(int fd, int *err)
439{
440 struct file *file;
1da177e4
LT
441 struct socket *sock;
442
89bddce5
SH
443 file = fget(fd);
444 if (!file) {
1da177e4
LT
445 *err = -EBADF;
446 return NULL;
447 }
89bddce5 448
6cb153ca
BL
449 sock = sock_from_file(file, err);
450 if (!sock)
1da177e4 451 fput(file);
6cb153ca
BL
452 return sock;
453}
1da177e4 454
6cb153ca
BL
455static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
456{
457 struct file *file;
458 struct socket *sock;
459
3672558c 460 *err = -EBADF;
6cb153ca
BL
461 file = fget_light(fd, fput_needed);
462 if (file) {
463 sock = sock_from_file(file, err);
464 if (sock)
465 return sock;
466 fput_light(file, *fput_needed);
1da177e4 467 }
6cb153ca 468 return NULL;
1da177e4
LT
469}
470
471/**
472 * sock_alloc - allocate a socket
89bddce5 473 *
1da177e4
LT
474 * Allocate a new inode and socket object. The two are bound together
475 * and initialised. The socket is then returned. If we are out of inodes
476 * NULL is returned.
477 */
478
479static struct socket *sock_alloc(void)
480{
89bddce5
SH
481 struct inode *inode;
482 struct socket *sock;
1da177e4
LT
483
484 inode = new_inode(sock_mnt->mnt_sb);
485 if (!inode)
486 return NULL;
487
488 sock = SOCKET_I(inode);
489
89bddce5 490 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
491 inode->i_uid = current->fsuid;
492 inode->i_gid = current->fsgid;
493
494 get_cpu_var(sockets_in_use)++;
495 put_cpu_var(sockets_in_use);
496 return sock;
497}
498
499/*
500 * In theory you can't get an open on this inode, but /proc provides
501 * a back door. Remember to keep it shut otherwise you'll let the
502 * creepy crawlies in.
503 */
89bddce5 504
1da177e4
LT
505static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
506{
507 return -ENXIO;
508}
509
4b6f5d20 510const struct file_operations bad_sock_fops = {
1da177e4
LT
511 .owner = THIS_MODULE,
512 .open = sock_no_open,
513};
514
515/**
516 * sock_release - close a socket
517 * @sock: socket to close
518 *
519 * The socket is released from the protocol stack if it has a release
520 * callback, and the inode is then released if the socket is bound to
89bddce5 521 * an inode not a file.
1da177e4 522 */
89bddce5 523
1da177e4
LT
524void sock_release(struct socket *sock)
525{
526 if (sock->ops) {
527 struct module *owner = sock->ops->owner;
528
529 sock->ops->release(sock);
530 sock->ops = NULL;
531 module_put(owner);
532 }
533
534 if (sock->fasync_list)
535 printk(KERN_ERR "sock_release: fasync list not empty!\n");
536
537 get_cpu_var(sockets_in_use)--;
538 put_cpu_var(sockets_in_use);
539 if (!sock->file) {
540 iput(SOCK_INODE(sock));
541 return;
542 }
89bddce5 543 sock->file = NULL;
1da177e4
LT
544}
545
89bddce5 546static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
547 struct msghdr *msg, size_t size)
548{
549 struct sock_iocb *si = kiocb_to_siocb(iocb);
550 int err;
551
552 si->sock = sock;
553 si->scm = NULL;
554 si->msg = msg;
555 si->size = size;
556
557 err = security_socket_sendmsg(sock, msg, size);
558 if (err)
559 return err;
560
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
564int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
565{
566 struct kiocb iocb;
567 struct sock_iocb siocb;
568 int ret;
569
570 init_sync_kiocb(&iocb, NULL);
571 iocb.private = &siocb;
572 ret = __sock_sendmsg(&iocb, sock, msg, size);
573 if (-EIOCBQUEUED == ret)
574 ret = wait_on_sync_kiocb(&iocb);
575 return ret;
576}
577
578int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
579 struct kvec *vec, size_t num, size_t size)
580{
581 mm_segment_t oldfs = get_fs();
582 int result;
583
584 set_fs(KERNEL_DS);
585 /*
586 * the following is safe, since for compiler definitions of kvec and
587 * iovec are identical, yielding the same in-core layout and alignment
588 */
89bddce5 589 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
590 msg->msg_iovlen = num;
591 result = sock_sendmsg(sock, msg, size);
592 set_fs(oldfs);
593 return result;
594}
595
92f37fd2
ED
596/*
597 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
598 */
599void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
600 struct sk_buff *skb)
601{
602 ktime_t kt = skb->tstamp;
603
604 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
605 struct timeval tv;
606 /* Race occurred between timestamp enabling and packet
607 receiving. Fill in the current time for now. */
608 if (kt.tv64 == 0)
609 kt = ktime_get_real();
610 skb->tstamp = kt;
611 tv = ktime_to_timeval(kt);
612 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
613 } else {
614 struct timespec ts;
615 /* Race occurred between timestamp enabling and packet
616 receiving. Fill in the current time for now. */
617 if (kt.tv64 == 0)
618 kt = ktime_get_real();
619 skb->tstamp = kt;
620 ts = ktime_to_timespec(kt);
621 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
622 }
623}
624
7c81fd8b
ACM
625EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
626
89bddce5 627static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
628 struct msghdr *msg, size_t size, int flags)
629{
630 int err;
631 struct sock_iocb *si = kiocb_to_siocb(iocb);
632
633 si->sock = sock;
634 si->scm = NULL;
635 si->msg = msg;
636 si->size = size;
637 si->flags = flags;
638
639 err = security_socket_recvmsg(sock, msg, size, flags);
640 if (err)
641 return err;
642
643 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
644}
645
89bddce5 646int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
647 size_t size, int flags)
648{
649 struct kiocb iocb;
650 struct sock_iocb siocb;
651 int ret;
652
89bddce5 653 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
654 iocb.private = &siocb;
655 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
656 if (-EIOCBQUEUED == ret)
657 ret = wait_on_sync_kiocb(&iocb);
658 return ret;
659}
660
89bddce5
SH
661int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
662 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
663{
664 mm_segment_t oldfs = get_fs();
665 int result;
666
667 set_fs(KERNEL_DS);
668 /*
669 * the following is safe, since for compiler definitions of kvec and
670 * iovec are identical, yielding the same in-core layout and alignment
671 */
89bddce5 672 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
673 result = sock_recvmsg(sock, msg, size, flags);
674 set_fs(oldfs);
675 return result;
676}
677
678static void sock_aio_dtor(struct kiocb *iocb)
679{
680 kfree(iocb->private);
681}
682
ce1d4d3e
CH
683static ssize_t sock_sendpage(struct file *file, struct page *page,
684 int offset, size_t size, loff_t *ppos, int more)
1da177e4 685{
1da177e4
LT
686 struct socket *sock;
687 int flags;
688
ce1d4d3e
CH
689 sock = file->private_data;
690
691 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
692 if (more)
693 flags |= MSG_MORE;
694
695 return sock->ops->sendpage(sock, page, offset, size, flags);
696}
1da177e4 697
9c55e01c
JA
698static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
699 struct pipe_inode_info *pipe, size_t len,
700 unsigned int flags)
701{
702 struct socket *sock = file->private_data;
703
997b37da
RDC
704 if (unlikely(!sock->ops->splice_read))
705 return -EINVAL;
706
9c55e01c
JA
707 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
708}
709
ce1d4d3e 710static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 711 struct sock_iocb *siocb)
ce1d4d3e
CH
712{
713 if (!is_sync_kiocb(iocb)) {
714 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
715 if (!siocb)
716 return NULL;
1da177e4
LT
717 iocb->ki_dtor = sock_aio_dtor;
718 }
1da177e4 719
ce1d4d3e 720 siocb->kiocb = iocb;
ce1d4d3e
CH
721 iocb->private = siocb;
722 return siocb;
1da177e4
LT
723}
724
ce1d4d3e 725static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
726 struct file *file, const struct iovec *iov,
727 unsigned long nr_segs)
ce1d4d3e
CH
728{
729 struct socket *sock = file->private_data;
730 size_t size = 0;
731 int i;
1da177e4 732
89bddce5
SH
733 for (i = 0; i < nr_segs; i++)
734 size += iov[i].iov_len;
1da177e4 735
ce1d4d3e
CH
736 msg->msg_name = NULL;
737 msg->msg_namelen = 0;
738 msg->msg_control = NULL;
739 msg->msg_controllen = 0;
89bddce5 740 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
741 msg->msg_iovlen = nr_segs;
742 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
743
744 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
745}
746
027445c3
BP
747static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
748 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
749{
750 struct sock_iocb siocb, *x;
751
1da177e4
LT
752 if (pos != 0)
753 return -ESPIPE;
027445c3
BP
754
755 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
756 return 0;
757
027445c3
BP
758
759 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
760 if (!x)
761 return -ENOMEM;
027445c3 762 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
763}
764
ce1d4d3e 765static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
766 struct file *file, const struct iovec *iov,
767 unsigned long nr_segs)
1da177e4 768{
ce1d4d3e
CH
769 struct socket *sock = file->private_data;
770 size_t size = 0;
771 int i;
1da177e4 772
89bddce5
SH
773 for (i = 0; i < nr_segs; i++)
774 size += iov[i].iov_len;
1da177e4 775
ce1d4d3e
CH
776 msg->msg_name = NULL;
777 msg->msg_namelen = 0;
778 msg->msg_control = NULL;
779 msg->msg_controllen = 0;
89bddce5 780 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
781 msg->msg_iovlen = nr_segs;
782 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
783 if (sock->type == SOCK_SEQPACKET)
784 msg->msg_flags |= MSG_EOR;
1da177e4 785
ce1d4d3e 786 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
787}
788
027445c3
BP
789static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
790 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
791{
792 struct sock_iocb siocb, *x;
1da177e4 793
ce1d4d3e
CH
794 if (pos != 0)
795 return -ESPIPE;
027445c3 796
027445c3 797 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
798 if (!x)
799 return -ENOMEM;
1da177e4 800
027445c3 801 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
802}
803
1da177e4
LT
804/*
805 * Atomic setting of ioctl hooks to avoid race
806 * with module unload.
807 */
808
4a3e2f71 809static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 810static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 811
881d966b 812void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 813{
4a3e2f71 814 mutex_lock(&br_ioctl_mutex);
1da177e4 815 br_ioctl_hook = hook;
4a3e2f71 816 mutex_unlock(&br_ioctl_mutex);
1da177e4 817}
89bddce5 818
1da177e4
LT
819EXPORT_SYMBOL(brioctl_set);
820
4a3e2f71 821static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 822static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 823
881d966b 824void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 825{
4a3e2f71 826 mutex_lock(&vlan_ioctl_mutex);
1da177e4 827 vlan_ioctl_hook = hook;
4a3e2f71 828 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 829}
89bddce5 830
1da177e4
LT
831EXPORT_SYMBOL(vlan_ioctl_set);
832
4a3e2f71 833static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 834static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 835
89bddce5 836void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 837{
4a3e2f71 838 mutex_lock(&dlci_ioctl_mutex);
1da177e4 839 dlci_ioctl_hook = hook;
4a3e2f71 840 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 841}
89bddce5 842
1da177e4
LT
843EXPORT_SYMBOL(dlci_ioctl_set);
844
845/*
846 * With an ioctl, arg may well be a user mode pointer, but we don't know
847 * what to do with it - that's up to the protocol still.
848 */
849
850static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
851{
852 struct socket *sock;
881d966b 853 struct sock *sk;
1da177e4
LT
854 void __user *argp = (void __user *)arg;
855 int pid, err;
881d966b 856 struct net *net;
1da177e4 857
b69aee04 858 sock = file->private_data;
881d966b
EB
859 sk = sock->sk;
860 net = sk->sk_net;
1da177e4 861 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 862 err = dev_ioctl(net, cmd, argp);
1da177e4 863 } else
d86b5e0e 864#ifdef CONFIG_WIRELESS_EXT
1da177e4 865 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 866 err = dev_ioctl(net, cmd, argp);
1da177e4 867 } else
89bddce5
SH
868#endif /* CONFIG_WIRELESS_EXT */
869 switch (cmd) {
1da177e4
LT
870 case FIOSETOWN:
871 case SIOCSPGRP:
872 err = -EFAULT;
873 if (get_user(pid, (int __user *)argp))
874 break;
875 err = f_setown(sock->file, pid, 1);
876 break;
877 case FIOGETOWN:
878 case SIOCGPGRP:
609d7fa9 879 err = put_user(f_getown(sock->file),
89bddce5 880 (int __user *)argp);
1da177e4
LT
881 break;
882 case SIOCGIFBR:
883 case SIOCSIFBR:
884 case SIOCBRADDBR:
885 case SIOCBRDELBR:
886 err = -ENOPKG;
887 if (!br_ioctl_hook)
888 request_module("bridge");
889
4a3e2f71 890 mutex_lock(&br_ioctl_mutex);
89bddce5 891 if (br_ioctl_hook)
881d966b 892 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 893 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
894 break;
895 case SIOCGIFVLAN:
896 case SIOCSIFVLAN:
897 err = -ENOPKG;
898 if (!vlan_ioctl_hook)
899 request_module("8021q");
900
4a3e2f71 901 mutex_lock(&vlan_ioctl_mutex);
1da177e4 902 if (vlan_ioctl_hook)
881d966b 903 err = vlan_ioctl_hook(net, argp);
4a3e2f71 904 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 905 break;
1da177e4
LT
906 case SIOCADDDLCI:
907 case SIOCDELDLCI:
908 err = -ENOPKG;
909 if (!dlci_ioctl_hook)
910 request_module("dlci");
911
912 if (dlci_ioctl_hook) {
4a3e2f71 913 mutex_lock(&dlci_ioctl_mutex);
1da177e4 914 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 915 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
916 }
917 break;
918 default:
919 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
920
921 /*
922 * If this ioctl is unknown try to hand it down
923 * to the NIC driver.
924 */
925 if (err == -ENOIOCTLCMD)
881d966b 926 err = dev_ioctl(net, cmd, argp);
1da177e4 927 break;
89bddce5 928 }
1da177e4
LT
929 return err;
930}
931
932int sock_create_lite(int family, int type, int protocol, struct socket **res)
933{
934 int err;
935 struct socket *sock = NULL;
89bddce5 936
1da177e4
LT
937 err = security_socket_create(family, type, protocol, 1);
938 if (err)
939 goto out;
940
941 sock = sock_alloc();
942 if (!sock) {
943 err = -ENOMEM;
944 goto out;
945 }
946
1da177e4 947 sock->type = type;
7420ed23
VY
948 err = security_socket_post_create(sock, family, type, protocol, 1);
949 if (err)
950 goto out_release;
951
1da177e4
LT
952out:
953 *res = sock;
954 return err;
7420ed23
VY
955out_release:
956 sock_release(sock);
957 sock = NULL;
958 goto out;
1da177e4
LT
959}
960
961/* No kernel lock held - perfect */
89bddce5 962static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
963{
964 struct socket *sock;
965
966 /*
89bddce5 967 * We can't return errors to poll, so it's either yes or no.
1da177e4 968 */
b69aee04 969 sock = file->private_data;
1da177e4
LT
970 return sock->ops->poll(file, sock, wait);
971}
972
89bddce5 973static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 974{
b69aee04 975 struct socket *sock = file->private_data;
1da177e4
LT
976
977 return sock->ops->mmap(file, sock, vma);
978}
979
20380731 980static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
981{
982 /*
89bddce5
SH
983 * It was possible the inode is NULL we were
984 * closing an unfinished socket.
1da177e4
LT
985 */
986
89bddce5 987 if (!inode) {
1da177e4
LT
988 printk(KERN_DEBUG "sock_close: NULL inode\n");
989 return 0;
990 }
991 sock_fasync(-1, filp, 0);
992 sock_release(SOCKET_I(inode));
993 return 0;
994}
995
996/*
997 * Update the socket async list
998 *
999 * Fasync_list locking strategy.
1000 *
1001 * 1. fasync_list is modified only under process context socket lock
1002 * i.e. under semaphore.
1003 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1004 * or under socket lock.
1005 * 3. fasync_list can be used from softirq context, so that
1006 * modification under socket lock have to be enhanced with
1007 * write_lock_bh(&sk->sk_callback_lock).
1008 * --ANK (990710)
1009 */
1010
1011static int sock_fasync(int fd, struct file *filp, int on)
1012{
89bddce5 1013 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1014 struct socket *sock;
1015 struct sock *sk;
1016
89bddce5 1017 if (on) {
8b3a7005 1018 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1019 if (fna == NULL)
1da177e4
LT
1020 return -ENOMEM;
1021 }
1022
b69aee04 1023 sock = filp->private_data;
1da177e4 1024
89bddce5
SH
1025 sk = sock->sk;
1026 if (sk == NULL) {
1da177e4
LT
1027 kfree(fna);
1028 return -EINVAL;
1029 }
1030
1031 lock_sock(sk);
1032
89bddce5 1033 prev = &(sock->fasync_list);
1da177e4 1034
89bddce5
SH
1035 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1036 if (fa->fa_file == filp)
1da177e4
LT
1037 break;
1038
89bddce5
SH
1039 if (on) {
1040 if (fa != NULL) {
1da177e4 1041 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1042 fa->fa_fd = fd;
1da177e4
LT
1043 write_unlock_bh(&sk->sk_callback_lock);
1044
1045 kfree(fna);
1046 goto out;
1047 }
89bddce5
SH
1048 fna->fa_file = filp;
1049 fna->fa_fd = fd;
1050 fna->magic = FASYNC_MAGIC;
1051 fna->fa_next = sock->fasync_list;
1da177e4 1052 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1053 sock->fasync_list = fna;
1da177e4 1054 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1055 } else {
1056 if (fa != NULL) {
1da177e4 1057 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1058 *prev = fa->fa_next;
1da177e4
LT
1059 write_unlock_bh(&sk->sk_callback_lock);
1060 kfree(fa);
1061 }
1062 }
1063
1064out:
1065 release_sock(sock->sk);
1066 return 0;
1067}
1068
1069/* This function may be called only under socket lock or callback_lock */
1070
1071int sock_wake_async(struct socket *sock, int how, int band)
1072{
1073 if (!sock || !sock->fasync_list)
1074 return -1;
89bddce5 1075 switch (how) {
8d8ad9d7 1076 case SOCK_WAKE_WAITD:
1da177e4
LT
1077 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1078 break;
1079 goto call_kill;
8d8ad9d7 1080 case SOCK_WAKE_SPACE:
1da177e4
LT
1081 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1082 break;
1083 /* fall through */
8d8ad9d7 1084 case SOCK_WAKE_IO:
89bddce5 1085call_kill:
1da177e4
LT
1086 __kill_fasync(sock->fasync_list, SIGIO, band);
1087 break;
8d8ad9d7 1088 case SOCK_WAKE_URG:
1da177e4
LT
1089 __kill_fasync(sock->fasync_list, SIGURG, band);
1090 }
1091 return 0;
1092}
1093
1b8d7ae4 1094static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1095 struct socket **res, int kern)
1da177e4
LT
1096{
1097 int err;
1098 struct socket *sock;
55737fda 1099 const struct net_proto_family *pf;
1da177e4
LT
1100
1101 /*
89bddce5 1102 * Check protocol is in range
1da177e4
LT
1103 */
1104 if (family < 0 || family >= NPROTO)
1105 return -EAFNOSUPPORT;
1106 if (type < 0 || type >= SOCK_MAX)
1107 return -EINVAL;
1108
1109 /* Compatibility.
1110
1111 This uglymoron is moved from INET layer to here to avoid
1112 deadlock in module load.
1113 */
1114 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1115 static int warned;
1da177e4
LT
1116 if (!warned) {
1117 warned = 1;
89bddce5
SH
1118 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1119 current->comm);
1da177e4
LT
1120 }
1121 family = PF_PACKET;
1122 }
1123
1124 err = security_socket_create(family, type, protocol, kern);
1125 if (err)
1126 return err;
89bddce5 1127
55737fda
SH
1128 /*
1129 * Allocate the socket and allow the family to set things up. if
1130 * the protocol is 0, the family is instructed to select an appropriate
1131 * default.
1132 */
1133 sock = sock_alloc();
1134 if (!sock) {
1135 if (net_ratelimit())
1136 printk(KERN_WARNING "socket: no more sockets\n");
1137 return -ENFILE; /* Not exactly a match, but its the
1138 closest posix thing */
1139 }
1140
1141 sock->type = type;
1142
1da177e4 1143#if defined(CONFIG_KMOD)
89bddce5
SH
1144 /* Attempt to load a protocol module if the find failed.
1145 *
1146 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1147 * requested real, full-featured networking support upon configuration.
1148 * Otherwise module support will break!
1149 */
55737fda 1150 if (net_families[family] == NULL)
89bddce5 1151 request_module("net-pf-%d", family);
1da177e4
LT
1152#endif
1153
55737fda
SH
1154 rcu_read_lock();
1155 pf = rcu_dereference(net_families[family]);
1156 err = -EAFNOSUPPORT;
1157 if (!pf)
1158 goto out_release;
1da177e4
LT
1159
1160 /*
1161 * We will call the ->create function, that possibly is in a loadable
1162 * module, so we have to bump that loadable module refcnt first.
1163 */
55737fda 1164 if (!try_module_get(pf->owner))
1da177e4
LT
1165 goto out_release;
1166
55737fda
SH
1167 /* Now protected by module ref count */
1168 rcu_read_unlock();
1169
1b8d7ae4 1170 err = pf->create(net, sock, protocol);
55737fda 1171 if (err < 0)
1da177e4 1172 goto out_module_put;
a79af59e 1173
1da177e4
LT
1174 /*
1175 * Now to bump the refcnt of the [loadable] module that owns this
1176 * socket at sock_release time we decrement its refcnt.
1177 */
55737fda
SH
1178 if (!try_module_get(sock->ops->owner))
1179 goto out_module_busy;
1180
1da177e4
LT
1181 /*
1182 * Now that we're done with the ->create function, the [loadable]
1183 * module can have its refcnt decremented
1184 */
55737fda 1185 module_put(pf->owner);
7420ed23
VY
1186 err = security_socket_post_create(sock, family, type, protocol, kern);
1187 if (err)
3b185525 1188 goto out_sock_release;
55737fda 1189 *res = sock;
1da177e4 1190
55737fda
SH
1191 return 0;
1192
1193out_module_busy:
1194 err = -EAFNOSUPPORT;
1da177e4 1195out_module_put:
55737fda
SH
1196 sock->ops = NULL;
1197 module_put(pf->owner);
1198out_sock_release:
1da177e4 1199 sock_release(sock);
55737fda
SH
1200 return err;
1201
1202out_release:
1203 rcu_read_unlock();
1204 goto out_sock_release;
1da177e4
LT
1205}
1206
1207int sock_create(int family, int type, int protocol, struct socket **res)
1208{
1b8d7ae4 1209 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1210}
1211
1212int sock_create_kern(int family, int type, int protocol, struct socket **res)
1213{
1b8d7ae4 1214 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1215}
1216
1217asmlinkage long sys_socket(int family, int type, int protocol)
1218{
1219 int retval;
1220 struct socket *sock;
1221
1222 retval = sock_create(family, type, protocol, &sock);
1223 if (retval < 0)
1224 goto out;
1225
1226 retval = sock_map_fd(sock);
1227 if (retval < 0)
1228 goto out_release;
1229
1230out:
1231 /* It may be already another descriptor 8) Not kernel problem. */
1232 return retval;
1233
1234out_release:
1235 sock_release(sock);
1236 return retval;
1237}
1238
1239/*
1240 * Create a pair of connected sockets.
1241 */
1242
89bddce5
SH
1243asmlinkage long sys_socketpair(int family, int type, int protocol,
1244 int __user *usockvec)
1da177e4
LT
1245{
1246 struct socket *sock1, *sock2;
1247 int fd1, fd2, err;
db349509 1248 struct file *newfile1, *newfile2;
1da177e4
LT
1249
1250 /*
1251 * Obtain the first socket and check if the underlying protocol
1252 * supports the socketpair call.
1253 */
1254
1255 err = sock_create(family, type, protocol, &sock1);
1256 if (err < 0)
1257 goto out;
1258
1259 err = sock_create(family, type, protocol, &sock2);
1260 if (err < 0)
1261 goto out_release_1;
1262
1263 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1264 if (err < 0)
1da177e4
LT
1265 goto out_release_both;
1266
db349509 1267 fd1 = sock_alloc_fd(&newfile1);
bf3c23d1
DM
1268 if (unlikely(fd1 < 0)) {
1269 err = fd1;
db349509 1270 goto out_release_both;
bf3c23d1 1271 }
1da177e4 1272
db349509
AV
1273 fd2 = sock_alloc_fd(&newfile2);
1274 if (unlikely(fd2 < 0)) {
bf3c23d1 1275 err = fd2;
db349509
AV
1276 put_filp(newfile1);
1277 put_unused_fd(fd1);
1da177e4 1278 goto out_release_both;
db349509 1279 }
1da177e4 1280
db349509
AV
1281 err = sock_attach_fd(sock1, newfile1);
1282 if (unlikely(err < 0)) {
1283 goto out_fd2;
1284 }
1285
1286 err = sock_attach_fd(sock2, newfile2);
1287 if (unlikely(err < 0)) {
1288 fput(newfile1);
1289 goto out_fd1;
1290 }
1291
1292 err = audit_fd_pair(fd1, fd2);
1293 if (err < 0) {
1294 fput(newfile1);
1295 fput(newfile2);
1296 goto out_fd;
1297 }
1da177e4 1298
db349509
AV
1299 fd_install(fd1, newfile1);
1300 fd_install(fd2, newfile2);
1da177e4
LT
1301 /* fd1 and fd2 may be already another descriptors.
1302 * Not kernel problem.
1303 */
1304
89bddce5 1305 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1306 if (!err)
1307 err = put_user(fd2, &usockvec[1]);
1308 if (!err)
1309 return 0;
1310
1311 sys_close(fd2);
1312 sys_close(fd1);
1313 return err;
1314
1da177e4 1315out_release_both:
89bddce5 1316 sock_release(sock2);
1da177e4 1317out_release_1:
89bddce5 1318 sock_release(sock1);
1da177e4
LT
1319out:
1320 return err;
db349509
AV
1321
1322out_fd2:
1323 put_filp(newfile1);
1324 sock_release(sock1);
1325out_fd1:
1326 put_filp(newfile2);
1327 sock_release(sock2);
1328out_fd:
1329 put_unused_fd(fd1);
1330 put_unused_fd(fd2);
1331 goto out;
1da177e4
LT
1332}
1333
1da177e4
LT
1334/*
1335 * Bind a name to a socket. Nothing much to do here since it's
1336 * the protocol's responsibility to handle the local address.
1337 *
1338 * We move the socket address to kernel space before we call
1339 * the protocol layer (having also checked the address is ok).
1340 */
1341
1342asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1343{
1344 struct socket *sock;
1345 char address[MAX_SOCK_ADDR];
6cb153ca 1346 int err, fput_needed;
1da177e4 1347
89bddce5 1348 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1349 if (sock) {
89bddce5
SH
1350 err = move_addr_to_kernel(umyaddr, addrlen, address);
1351 if (err >= 0) {
1352 err = security_socket_bind(sock,
1353 (struct sockaddr *)address,
1354 addrlen);
6cb153ca
BL
1355 if (!err)
1356 err = sock->ops->bind(sock,
89bddce5
SH
1357 (struct sockaddr *)
1358 address, addrlen);
1da177e4 1359 }
6cb153ca 1360 fput_light(sock->file, fput_needed);
89bddce5 1361 }
1da177e4
LT
1362 return err;
1363}
1364
1da177e4
LT
1365/*
1366 * Perform a listen. Basically, we allow the protocol to do anything
1367 * necessary for a listen, and if that works, we mark the socket as
1368 * ready for listening.
1369 */
1370
1da177e4
LT
1371asmlinkage long sys_listen(int fd, int backlog)
1372{
1373 struct socket *sock;
6cb153ca 1374 int err, fput_needed;
b8e1f9b5 1375 int somaxconn;
89bddce5
SH
1376
1377 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1378 if (sock) {
b8e1f9b5
PE
1379 somaxconn = sock->sk->sk_net->sysctl_somaxconn;
1380 if ((unsigned)backlog > somaxconn)
1381 backlog = somaxconn;
1da177e4
LT
1382
1383 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1384 if (!err)
1385 err = sock->ops->listen(sock, backlog);
1da177e4 1386
6cb153ca 1387 fput_light(sock->file, fput_needed);
1da177e4
LT
1388 }
1389 return err;
1390}
1391
1da177e4
LT
1392/*
1393 * For accept, we attempt to create a new socket, set up the link
1394 * with the client, wake up the client, then return the new
1395 * connected fd. We collect the address of the connector in kernel
1396 * space and move it to user at the very end. This is unclean because
1397 * we open the socket then return an error.
1398 *
1399 * 1003.1g adds the ability to recvmsg() to query connection pending
1400 * status to recvmsg. We need to add that support in a way thats
1401 * clean when we restucture accept also.
1402 */
1403
89bddce5
SH
1404asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1405 int __user *upeer_addrlen)
1da177e4
LT
1406{
1407 struct socket *sock, *newsock;
39d8c1b6 1408 struct file *newfile;
6cb153ca 1409 int err, len, newfd, fput_needed;
1da177e4
LT
1410 char address[MAX_SOCK_ADDR];
1411
6cb153ca 1412 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1413 if (!sock)
1414 goto out;
1415
1416 err = -ENFILE;
89bddce5 1417 if (!(newsock = sock_alloc()))
1da177e4
LT
1418 goto out_put;
1419
1420 newsock->type = sock->type;
1421 newsock->ops = sock->ops;
1422
1da177e4
LT
1423 /*
1424 * We don't need try_module_get here, as the listening socket (sock)
1425 * has the protocol module (sock->ops->owner) held.
1426 */
1427 __module_get(newsock->ops->owner);
1428
39d8c1b6
DM
1429 newfd = sock_alloc_fd(&newfile);
1430 if (unlikely(newfd < 0)) {
1431 err = newfd;
9a1875e6
DM
1432 sock_release(newsock);
1433 goto out_put;
39d8c1b6
DM
1434 }
1435
1436 err = sock_attach_fd(newsock, newfile);
1437 if (err < 0)
79f4f642 1438 goto out_fd_simple;
39d8c1b6 1439
a79af59e
FF
1440 err = security_socket_accept(sock, newsock);
1441 if (err)
39d8c1b6 1442 goto out_fd;
a79af59e 1443
1da177e4
LT
1444 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1445 if (err < 0)
39d8c1b6 1446 goto out_fd;
1da177e4
LT
1447
1448 if (upeer_sockaddr) {
89bddce5
SH
1449 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1450 &len, 2) < 0) {
1da177e4 1451 err = -ECONNABORTED;
39d8c1b6 1452 goto out_fd;
1da177e4 1453 }
89bddce5
SH
1454 err = move_addr_to_user(address, len, upeer_sockaddr,
1455 upeer_addrlen);
1da177e4 1456 if (err < 0)
39d8c1b6 1457 goto out_fd;
1da177e4
LT
1458 }
1459
1460 /* File flags are not inherited via accept() unlike another OSes. */
1461
39d8c1b6
DM
1462 fd_install(newfd, newfile);
1463 err = newfd;
1da177e4
LT
1464
1465 security_socket_post_accept(sock, newsock);
1466
1467out_put:
6cb153ca 1468 fput_light(sock->file, fput_needed);
1da177e4
LT
1469out:
1470 return err;
79f4f642
AD
1471out_fd_simple:
1472 sock_release(newsock);
1473 put_filp(newfile);
1474 put_unused_fd(newfd);
1475 goto out_put;
39d8c1b6 1476out_fd:
9606a216 1477 fput(newfile);
39d8c1b6 1478 put_unused_fd(newfd);
1da177e4
LT
1479 goto out_put;
1480}
1481
1da177e4
LT
1482/*
1483 * Attempt to connect to a socket with the server address. The address
1484 * is in user space so we verify it is OK and move it to kernel space.
1485 *
1486 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1487 * break bindings
1488 *
1489 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1490 * other SEQPACKET protocols that take time to connect() as it doesn't
1491 * include the -EINPROGRESS status for such sockets.
1492 */
1493
89bddce5
SH
1494asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1495 int addrlen)
1da177e4
LT
1496{
1497 struct socket *sock;
1498 char address[MAX_SOCK_ADDR];
6cb153ca 1499 int err, fput_needed;
1da177e4 1500
6cb153ca 1501 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1502 if (!sock)
1503 goto out;
1504 err = move_addr_to_kernel(uservaddr, addrlen, address);
1505 if (err < 0)
1506 goto out_put;
1507
89bddce5
SH
1508 err =
1509 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1510 if (err)
1511 goto out_put;
1512
89bddce5 1513 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1514 sock->file->f_flags);
1515out_put:
6cb153ca 1516 fput_light(sock->file, fput_needed);
1da177e4
LT
1517out:
1518 return err;
1519}
1520
1521/*
1522 * Get the local address ('name') of a socket object. Move the obtained
1523 * name to user space.
1524 */
1525
89bddce5
SH
1526asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1527 int __user *usockaddr_len)
1da177e4
LT
1528{
1529 struct socket *sock;
1530 char address[MAX_SOCK_ADDR];
6cb153ca 1531 int len, err, fput_needed;
89bddce5 1532
6cb153ca 1533 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1534 if (!sock)
1535 goto out;
1536
1537 err = security_socket_getsockname(sock);
1538 if (err)
1539 goto out_put;
1540
1541 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1542 if (err)
1543 goto out_put;
1544 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1545
1546out_put:
6cb153ca 1547 fput_light(sock->file, fput_needed);
1da177e4
LT
1548out:
1549 return err;
1550}
1551
1552/*
1553 * Get the remote address ('name') of a socket object. Move the obtained
1554 * name to user space.
1555 */
1556
89bddce5
SH
1557asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1558 int __user *usockaddr_len)
1da177e4
LT
1559{
1560 struct socket *sock;
1561 char address[MAX_SOCK_ADDR];
6cb153ca 1562 int len, err, fput_needed;
1da177e4 1563
89bddce5
SH
1564 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1565 if (sock != NULL) {
1da177e4
LT
1566 err = security_socket_getpeername(sock);
1567 if (err) {
6cb153ca 1568 fput_light(sock->file, fput_needed);
1da177e4
LT
1569 return err;
1570 }
1571
89bddce5
SH
1572 err =
1573 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1574 1);
1da177e4 1575 if (!err)
89bddce5
SH
1576 err = move_addr_to_user(address, len, usockaddr,
1577 usockaddr_len);
6cb153ca 1578 fput_light(sock->file, fput_needed);
1da177e4
LT
1579 }
1580 return err;
1581}
1582
1583/*
1584 * Send a datagram to a given address. We move the address into kernel
1585 * space and check the user space data area is readable before invoking
1586 * the protocol.
1587 */
1588
89bddce5
SH
1589asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1590 unsigned flags, struct sockaddr __user *addr,
1591 int addr_len)
1da177e4
LT
1592{
1593 struct socket *sock;
1594 char address[MAX_SOCK_ADDR];
1595 int err;
1596 struct msghdr msg;
1597 struct iovec iov;
6cb153ca 1598 int fput_needed;
6cb153ca 1599
de0fa95c
PE
1600 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1601 if (!sock)
4387ff75 1602 goto out;
6cb153ca 1603
89bddce5
SH
1604 iov.iov_base = buff;
1605 iov.iov_len = len;
1606 msg.msg_name = NULL;
1607 msg.msg_iov = &iov;
1608 msg.msg_iovlen = 1;
1609 msg.msg_control = NULL;
1610 msg.msg_controllen = 0;
1611 msg.msg_namelen = 0;
6cb153ca 1612 if (addr) {
1da177e4
LT
1613 err = move_addr_to_kernel(addr, addr_len, address);
1614 if (err < 0)
1615 goto out_put;
89bddce5
SH
1616 msg.msg_name = address;
1617 msg.msg_namelen = addr_len;
1da177e4
LT
1618 }
1619 if (sock->file->f_flags & O_NONBLOCK)
1620 flags |= MSG_DONTWAIT;
1621 msg.msg_flags = flags;
1622 err = sock_sendmsg(sock, &msg, len);
1623
89bddce5 1624out_put:
de0fa95c 1625 fput_light(sock->file, fput_needed);
4387ff75 1626out:
1da177e4
LT
1627 return err;
1628}
1629
1630/*
89bddce5 1631 * Send a datagram down a socket.
1da177e4
LT
1632 */
1633
89bddce5 1634asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1635{
1636 return sys_sendto(fd, buff, len, flags, NULL, 0);
1637}
1638
1639/*
89bddce5 1640 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1641 * sender. We verify the buffers are writable and if needed move the
1642 * sender address from kernel to user space.
1643 */
1644
89bddce5
SH
1645asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1646 unsigned flags, struct sockaddr __user *addr,
1647 int __user *addr_len)
1da177e4
LT
1648{
1649 struct socket *sock;
1650 struct iovec iov;
1651 struct msghdr msg;
1652 char address[MAX_SOCK_ADDR];
89bddce5 1653 int err, err2;
6cb153ca
BL
1654 int fput_needed;
1655
de0fa95c 1656 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1657 if (!sock)
de0fa95c 1658 goto out;
1da177e4 1659
89bddce5
SH
1660 msg.msg_control = NULL;
1661 msg.msg_controllen = 0;
1662 msg.msg_iovlen = 1;
1663 msg.msg_iov = &iov;
1664 iov.iov_len = size;
1665 iov.iov_base = ubuf;
1666 msg.msg_name = address;
1667 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1668 if (sock->file->f_flags & O_NONBLOCK)
1669 flags |= MSG_DONTWAIT;
89bddce5 1670 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1671
89bddce5
SH
1672 if (err >= 0 && addr != NULL) {
1673 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1674 if (err2 < 0)
1675 err = err2;
1da177e4 1676 }
de0fa95c
PE
1677
1678 fput_light(sock->file, fput_needed);
4387ff75 1679out:
1da177e4
LT
1680 return err;
1681}
1682
1683/*
89bddce5 1684 * Receive a datagram from a socket.
1da177e4
LT
1685 */
1686
89bddce5
SH
1687asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1688 unsigned flags)
1da177e4
LT
1689{
1690 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1691}
1692
1693/*
1694 * Set a socket option. Because we don't know the option lengths we have
1695 * to pass the user mode parameter for the protocols to sort out.
1696 */
1697
89bddce5
SH
1698asmlinkage long sys_setsockopt(int fd, int level, int optname,
1699 char __user *optval, int optlen)
1da177e4 1700{
6cb153ca 1701 int err, fput_needed;
1da177e4
LT
1702 struct socket *sock;
1703
1704 if (optlen < 0)
1705 return -EINVAL;
89bddce5
SH
1706
1707 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1708 if (sock != NULL) {
1709 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1710 if (err)
1711 goto out_put;
1da177e4
LT
1712
1713 if (level == SOL_SOCKET)
89bddce5
SH
1714 err =
1715 sock_setsockopt(sock, level, optname, optval,
1716 optlen);
1da177e4 1717 else
89bddce5
SH
1718 err =
1719 sock->ops->setsockopt(sock, level, optname, optval,
1720 optlen);
6cb153ca
BL
1721out_put:
1722 fput_light(sock->file, fput_needed);
1da177e4
LT
1723 }
1724 return err;
1725}
1726
1727/*
1728 * Get a socket option. Because we don't know the option lengths we have
1729 * to pass a user mode parameter for the protocols to sort out.
1730 */
1731
89bddce5
SH
1732asmlinkage long sys_getsockopt(int fd, int level, int optname,
1733 char __user *optval, int __user *optlen)
1da177e4 1734{
6cb153ca 1735 int err, fput_needed;
1da177e4
LT
1736 struct socket *sock;
1737
89bddce5
SH
1738 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1739 if (sock != NULL) {
6cb153ca
BL
1740 err = security_socket_getsockopt(sock, level, optname);
1741 if (err)
1742 goto out_put;
1da177e4
LT
1743
1744 if (level == SOL_SOCKET)
89bddce5
SH
1745 err =
1746 sock_getsockopt(sock, level, optname, optval,
1747 optlen);
1da177e4 1748 else
89bddce5
SH
1749 err =
1750 sock->ops->getsockopt(sock, level, optname, optval,
1751 optlen);
6cb153ca
BL
1752out_put:
1753 fput_light(sock->file, fput_needed);
1da177e4
LT
1754 }
1755 return err;
1756}
1757
1da177e4
LT
1758/*
1759 * Shutdown a socket.
1760 */
1761
1762asmlinkage long sys_shutdown(int fd, int how)
1763{
6cb153ca 1764 int err, fput_needed;
1da177e4
LT
1765 struct socket *sock;
1766
89bddce5
SH
1767 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1768 if (sock != NULL) {
1da177e4 1769 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1770 if (!err)
1771 err = sock->ops->shutdown(sock, how);
1772 fput_light(sock->file, fput_needed);
1da177e4
LT
1773 }
1774 return err;
1775}
1776
89bddce5 1777/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1778 * fields which are the same type (int / unsigned) on our platforms.
1779 */
1780#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1781#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1782#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1783
1da177e4
LT
1784/*
1785 * BSD sendmsg interface
1786 */
1787
1788asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1789{
89bddce5
SH
1790 struct compat_msghdr __user *msg_compat =
1791 (struct compat_msghdr __user *)msg;
1da177e4
LT
1792 struct socket *sock;
1793 char address[MAX_SOCK_ADDR];
1794 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1795 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1796 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1797 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1798 unsigned char *ctl_buf = ctl;
1799 struct msghdr msg_sys;
1800 int err, ctl_len, iov_size, total_len;
6cb153ca 1801 int fput_needed;
89bddce5 1802
1da177e4
LT
1803 err = -EFAULT;
1804 if (MSG_CMSG_COMPAT & flags) {
1805 if (get_compat_msghdr(&msg_sys, msg_compat))
1806 return -EFAULT;
89bddce5
SH
1807 }
1808 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1809 return -EFAULT;
1810
6cb153ca 1811 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1812 if (!sock)
1da177e4
LT
1813 goto out;
1814
1815 /* do not move before msg_sys is valid */
1816 err = -EMSGSIZE;
1817 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1818 goto out_put;
1819
89bddce5 1820 /* Check whether to allocate the iovec area */
1da177e4
LT
1821 err = -ENOMEM;
1822 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1823 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1824 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1825 if (!iov)
1826 goto out_put;
1827 }
1828
1829 /* This will also move the address data into kernel space */
1830 if (MSG_CMSG_COMPAT & flags) {
1831 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1832 } else
1833 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1834 if (err < 0)
1da177e4
LT
1835 goto out_freeiov;
1836 total_len = err;
1837
1838 err = -ENOBUFS;
1839
1840 if (msg_sys.msg_controllen > INT_MAX)
1841 goto out_freeiov;
89bddce5 1842 ctl_len = msg_sys.msg_controllen;
1da177e4 1843 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1844 err =
1845 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1846 sizeof(ctl));
1da177e4
LT
1847 if (err)
1848 goto out_freeiov;
1849 ctl_buf = msg_sys.msg_control;
8920e8f9 1850 ctl_len = msg_sys.msg_controllen;
1da177e4 1851 } else if (ctl_len) {
89bddce5 1852 if (ctl_len > sizeof(ctl)) {
1da177e4 1853 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1854 if (ctl_buf == NULL)
1da177e4
LT
1855 goto out_freeiov;
1856 }
1857 err = -EFAULT;
1858 /*
1859 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1860 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1861 * checking falls down on this.
1862 */
89bddce5
SH
1863 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1864 ctl_len))
1da177e4
LT
1865 goto out_freectl;
1866 msg_sys.msg_control = ctl_buf;
1867 }
1868 msg_sys.msg_flags = flags;
1869
1870 if (sock->file->f_flags & O_NONBLOCK)
1871 msg_sys.msg_flags |= MSG_DONTWAIT;
1872 err = sock_sendmsg(sock, &msg_sys, total_len);
1873
1874out_freectl:
89bddce5 1875 if (ctl_buf != ctl)
1da177e4
LT
1876 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1877out_freeiov:
1878 if (iov != iovstack)
1879 sock_kfree_s(sock->sk, iov, iov_size);
1880out_put:
6cb153ca 1881 fput_light(sock->file, fput_needed);
89bddce5 1882out:
1da177e4
LT
1883 return err;
1884}
1885
1886/*
1887 * BSD recvmsg interface
1888 */
1889
89bddce5
SH
1890asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1891 unsigned int flags)
1da177e4 1892{
89bddce5
SH
1893 struct compat_msghdr __user *msg_compat =
1894 (struct compat_msghdr __user *)msg;
1da177e4
LT
1895 struct socket *sock;
1896 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1897 struct iovec *iov = iovstack;
1da177e4
LT
1898 struct msghdr msg_sys;
1899 unsigned long cmsg_ptr;
1900 int err, iov_size, total_len, len;
6cb153ca 1901 int fput_needed;
1da177e4
LT
1902
1903 /* kernel mode address */
1904 char addr[MAX_SOCK_ADDR];
1905
1906 /* user mode address pointers */
1907 struct sockaddr __user *uaddr;
1908 int __user *uaddr_len;
89bddce5 1909
1da177e4
LT
1910 if (MSG_CMSG_COMPAT & flags) {
1911 if (get_compat_msghdr(&msg_sys, msg_compat))
1912 return -EFAULT;
89bddce5
SH
1913 }
1914 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1915 return -EFAULT;
1da177e4 1916
6cb153ca 1917 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1918 if (!sock)
1919 goto out;
1920
1921 err = -EMSGSIZE;
1922 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1923 goto out_put;
89bddce5
SH
1924
1925 /* Check whether to allocate the iovec area */
1da177e4
LT
1926 err = -ENOMEM;
1927 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1928 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1929 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1930 if (!iov)
1931 goto out_put;
1932 }
1933
1934 /*
89bddce5
SH
1935 * Save the user-mode address (verify_iovec will change the
1936 * kernel msghdr to use the kernel address space)
1da177e4 1937 */
89bddce5 1938
cfcabdcc 1939 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1940 uaddr_len = COMPAT_NAMELEN(msg);
1941 if (MSG_CMSG_COMPAT & flags) {
1942 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1943 } else
1944 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1945 if (err < 0)
1946 goto out_freeiov;
89bddce5 1947 total_len = err;
1da177e4
LT
1948
1949 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1950 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1951
1da177e4
LT
1952 if (sock->file->f_flags & O_NONBLOCK)
1953 flags |= MSG_DONTWAIT;
1954 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1955 if (err < 0)
1956 goto out_freeiov;
1957 len = err;
1958
1959 if (uaddr != NULL) {
89bddce5
SH
1960 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1961 uaddr_len);
1da177e4
LT
1962 if (err < 0)
1963 goto out_freeiov;
1964 }
37f7f421
DM
1965 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1966 COMPAT_FLAGS(msg));
1da177e4
LT
1967 if (err)
1968 goto out_freeiov;
1969 if (MSG_CMSG_COMPAT & flags)
89bddce5 1970 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1971 &msg_compat->msg_controllen);
1972 else
89bddce5 1973 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1974 &msg->msg_controllen);
1975 if (err)
1976 goto out_freeiov;
1977 err = len;
1978
1979out_freeiov:
1980 if (iov != iovstack)
1981 sock_kfree_s(sock->sk, iov, iov_size);
1982out_put:
6cb153ca 1983 fput_light(sock->file, fput_needed);
1da177e4
LT
1984out:
1985 return err;
1986}
1987
1988#ifdef __ARCH_WANT_SYS_SOCKETCALL
1989
1990/* Argument list sizes for sys_socketcall */
1991#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1992static const unsigned char nargs[18]={
1993 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1994 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1995 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1996};
1997
1da177e4
LT
1998#undef AL
1999
2000/*
89bddce5 2001 * System call vectors.
1da177e4
LT
2002 *
2003 * Argument checking cleaned up. Saved 20% in size.
2004 * This function doesn't need to set the kernel lock because
89bddce5 2005 * it is set by the callees.
1da177e4
LT
2006 */
2007
2008asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2009{
2010 unsigned long a[6];
89bddce5 2011 unsigned long a0, a1;
1da177e4
LT
2012 int err;
2013
89bddce5 2014 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2015 return -EINVAL;
2016
2017 /* copy_from_user should be SMP safe. */
2018 if (copy_from_user(a, args, nargs[call]))
2019 return -EFAULT;
3ec3b2fb 2020
89bddce5 2021 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2022 if (err)
2023 return err;
2024
89bddce5
SH
2025 a0 = a[0];
2026 a1 = a[1];
2027
2028 switch (call) {
2029 case SYS_SOCKET:
2030 err = sys_socket(a0, a1, a[2]);
2031 break;
2032 case SYS_BIND:
2033 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2034 break;
2035 case SYS_CONNECT:
2036 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2037 break;
2038 case SYS_LISTEN:
2039 err = sys_listen(a0, a1);
2040 break;
2041 case SYS_ACCEPT:
2042 err =
2043 sys_accept(a0, (struct sockaddr __user *)a1,
2044 (int __user *)a[2]);
2045 break;
2046 case SYS_GETSOCKNAME:
2047 err =
2048 sys_getsockname(a0, (struct sockaddr __user *)a1,
2049 (int __user *)a[2]);
2050 break;
2051 case SYS_GETPEERNAME:
2052 err =
2053 sys_getpeername(a0, (struct sockaddr __user *)a1,
2054 (int __user *)a[2]);
2055 break;
2056 case SYS_SOCKETPAIR:
2057 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2058 break;
2059 case SYS_SEND:
2060 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2061 break;
2062 case SYS_SENDTO:
2063 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2064 (struct sockaddr __user *)a[4], a[5]);
2065 break;
2066 case SYS_RECV:
2067 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2068 break;
2069 case SYS_RECVFROM:
2070 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2071 (struct sockaddr __user *)a[4],
2072 (int __user *)a[5]);
2073 break;
2074 case SYS_SHUTDOWN:
2075 err = sys_shutdown(a0, a1);
2076 break;
2077 case SYS_SETSOCKOPT:
2078 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2079 break;
2080 case SYS_GETSOCKOPT:
2081 err =
2082 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2083 (int __user *)a[4]);
2084 break;
2085 case SYS_SENDMSG:
2086 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2087 break;
2088 case SYS_RECVMSG:
2089 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2090 break;
2091 default:
2092 err = -EINVAL;
2093 break;
1da177e4
LT
2094 }
2095 return err;
2096}
2097
89bddce5 2098#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2099
55737fda
SH
2100/**
2101 * sock_register - add a socket protocol handler
2102 * @ops: description of protocol
2103 *
1da177e4
LT
2104 * This function is called by a protocol handler that wants to
2105 * advertise its address family, and have it linked into the
55737fda
SH
2106 * socket interface. The value ops->family coresponds to the
2107 * socket system call protocol family.
1da177e4 2108 */
f0fd27d4 2109int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2110{
2111 int err;
2112
2113 if (ops->family >= NPROTO) {
89bddce5
SH
2114 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2115 NPROTO);
1da177e4
LT
2116 return -ENOBUFS;
2117 }
55737fda
SH
2118
2119 spin_lock(&net_family_lock);
2120 if (net_families[ops->family])
2121 err = -EEXIST;
2122 else {
89bddce5 2123 net_families[ops->family] = ops;
1da177e4
LT
2124 err = 0;
2125 }
55737fda
SH
2126 spin_unlock(&net_family_lock);
2127
89bddce5 2128 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2129 return err;
2130}
2131
55737fda
SH
2132/**
2133 * sock_unregister - remove a protocol handler
2134 * @family: protocol family to remove
2135 *
1da177e4
LT
2136 * This function is called by a protocol handler that wants to
2137 * remove its address family, and have it unlinked from the
55737fda
SH
2138 * new socket creation.
2139 *
2140 * If protocol handler is a module, then it can use module reference
2141 * counts to protect against new references. If protocol handler is not
2142 * a module then it needs to provide its own protection in
2143 * the ops->create routine.
1da177e4 2144 */
f0fd27d4 2145void sock_unregister(int family)
1da177e4 2146{
f0fd27d4 2147 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2148
55737fda 2149 spin_lock(&net_family_lock);
89bddce5 2150 net_families[family] = NULL;
55737fda
SH
2151 spin_unlock(&net_family_lock);
2152
2153 synchronize_rcu();
2154
89bddce5 2155 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2156}
2157
77d76ea3 2158static int __init sock_init(void)
1da177e4
LT
2159{
2160 /*
89bddce5 2161 * Initialize sock SLAB cache.
1da177e4 2162 */
89bddce5 2163
1da177e4
LT
2164 sk_init();
2165
1da177e4 2166 /*
89bddce5 2167 * Initialize skbuff SLAB cache
1da177e4
LT
2168 */
2169 skb_init();
1da177e4
LT
2170
2171 /*
89bddce5 2172 * Initialize the protocols module.
1da177e4
LT
2173 */
2174
2175 init_inodecache();
2176 register_filesystem(&sock_fs_type);
2177 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2178
2179 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2180 */
2181
2182#ifdef CONFIG_NETFILTER
2183 netfilter_init();
2184#endif
cbeb321a
DM
2185
2186 return 0;
1da177e4
LT
2187}
2188
77d76ea3
AK
2189core_initcall(sock_init); /* early initcall */
2190
1da177e4
LT
2191#ifdef CONFIG_PROC_FS
2192void socket_seq_show(struct seq_file *seq)
2193{
2194 int cpu;
2195 int counter = 0;
2196
6f912042 2197 for_each_possible_cpu(cpu)
89bddce5 2198 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2199
2200 /* It can be negative, by the way. 8) */
2201 if (counter < 0)
2202 counter = 0;
2203
2204 seq_printf(seq, "sockets: used %d\n", counter);
2205}
89bddce5 2206#endif /* CONFIG_PROC_FS */
1da177e4 2207
89bbfc95
SP
2208#ifdef CONFIG_COMPAT
2209static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2210 unsigned long arg)
89bbfc95
SP
2211{
2212 struct socket *sock = file->private_data;
2213 int ret = -ENOIOCTLCMD;
2214
2215 if (sock->ops->compat_ioctl)
2216 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2217
2218 return ret;
2219}
2220#endif
2221
ac5a488e
SS
2222int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2223{
2224 return sock->ops->bind(sock, addr, addrlen);
2225}
2226
2227int kernel_listen(struct socket *sock, int backlog)
2228{
2229 return sock->ops->listen(sock, backlog);
2230}
2231
2232int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2233{
2234 struct sock *sk = sock->sk;
2235 int err;
2236
2237 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2238 newsock);
2239 if (err < 0)
2240 goto done;
2241
2242 err = sock->ops->accept(sock, *newsock, flags);
2243 if (err < 0) {
2244 sock_release(*newsock);
fa8705b0 2245 *newsock = NULL;
ac5a488e
SS
2246 goto done;
2247 }
2248
2249 (*newsock)->ops = sock->ops;
2250
2251done:
2252 return err;
2253}
2254
2255int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2256 int flags)
ac5a488e
SS
2257{
2258 return sock->ops->connect(sock, addr, addrlen, flags);
2259}
2260
2261int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2262 int *addrlen)
2263{
2264 return sock->ops->getname(sock, addr, addrlen, 0);
2265}
2266
2267int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2268 int *addrlen)
2269{
2270 return sock->ops->getname(sock, addr, addrlen, 1);
2271}
2272
2273int kernel_getsockopt(struct socket *sock, int level, int optname,
2274 char *optval, int *optlen)
2275{
2276 mm_segment_t oldfs = get_fs();
2277 int err;
2278
2279 set_fs(KERNEL_DS);
2280 if (level == SOL_SOCKET)
2281 err = sock_getsockopt(sock, level, optname, optval, optlen);
2282 else
2283 err = sock->ops->getsockopt(sock, level, optname, optval,
2284 optlen);
2285 set_fs(oldfs);
2286 return err;
2287}
2288
2289int kernel_setsockopt(struct socket *sock, int level, int optname,
2290 char *optval, int optlen)
2291{
2292 mm_segment_t oldfs = get_fs();
2293 int err;
2294
2295 set_fs(KERNEL_DS);
2296 if (level == SOL_SOCKET)
2297 err = sock_setsockopt(sock, level, optname, optval, optlen);
2298 else
2299 err = sock->ops->setsockopt(sock, level, optname, optval,
2300 optlen);
2301 set_fs(oldfs);
2302 return err;
2303}
2304
2305int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2306 size_t size, int flags)
2307{
2308 if (sock->ops->sendpage)
2309 return sock->ops->sendpage(sock, page, offset, size, flags);
2310
2311 return sock_no_sendpage(sock, page, offset, size, flags);
2312}
2313
2314int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2315{
2316 mm_segment_t oldfs = get_fs();
2317 int err;
2318
2319 set_fs(KERNEL_DS);
2320 err = sock->ops->ioctl(sock, cmd, arg);
2321 set_fs(oldfs);
2322
2323 return err;
2324}
2325
91cf45f0
TM
2326int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2327{
2328 return sock->ops->shutdown(sock, how);
2329}
2330
1da177e4
LT
2331/* ABI emulation layers need these two */
2332EXPORT_SYMBOL(move_addr_to_kernel);
2333EXPORT_SYMBOL(move_addr_to_user);
2334EXPORT_SYMBOL(sock_create);
2335EXPORT_SYMBOL(sock_create_kern);
2336EXPORT_SYMBOL(sock_create_lite);
2337EXPORT_SYMBOL(sock_map_fd);
2338EXPORT_SYMBOL(sock_recvmsg);
2339EXPORT_SYMBOL(sock_register);
2340EXPORT_SYMBOL(sock_release);
2341EXPORT_SYMBOL(sock_sendmsg);
2342EXPORT_SYMBOL(sock_unregister);
2343EXPORT_SYMBOL(sock_wake_async);
2344EXPORT_SYMBOL(sockfd_lookup);
2345EXPORT_SYMBOL(kernel_sendmsg);
2346EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2347EXPORT_SYMBOL(kernel_bind);
2348EXPORT_SYMBOL(kernel_listen);
2349EXPORT_SYMBOL(kernel_accept);
2350EXPORT_SYMBOL(kernel_connect);
2351EXPORT_SYMBOL(kernel_getsockname);
2352EXPORT_SYMBOL(kernel_getpeername);
2353EXPORT_SYMBOL(kernel_getsockopt);
2354EXPORT_SYMBOL(kernel_setsockopt);
2355EXPORT_SYMBOL(kernel_sendpage);
2356EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2357EXPORT_SYMBOL(kernel_sock_shutdown);