[DCCP]: Add support for abortive release
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
115static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
116 struct pipe_inode_info *pipe, size_t len,
117 unsigned int flags);
1da177e4 118
1da177e4
LT
119/*
120 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
121 * in the operation structures but are done directly via the socketcall() multiplexor.
122 */
123
da7071d7 124static const struct file_operations socket_file_ops = {
1da177e4
LT
125 .owner = THIS_MODULE,
126 .llseek = no_llseek,
127 .aio_read = sock_aio_read,
128 .aio_write = sock_aio_write,
129 .poll = sock_poll,
130 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
131#ifdef CONFIG_COMPAT
132 .compat_ioctl = compat_sock_ioctl,
133#endif
1da177e4
LT
134 .mmap = sock_mmap,
135 .open = sock_no_open, /* special open code to disallow open via /proc */
136 .release = sock_close,
137 .fasync = sock_fasync,
5274f052
JA
138 .sendpage = sock_sendpage,
139 .splice_write = generic_splice_sendpage,
9c55e01c 140 .splice_read = sock_splice_read,
1da177e4
LT
141};
142
143/*
144 * The protocol list. Each protocol is registered in here.
145 */
146
1da177e4 147static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 148static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 149
1da177e4
LT
150/*
151 * Statistics counters of the socket lists
152 */
153
154static DEFINE_PER_CPU(int, sockets_in_use) = 0;
155
156/*
89bddce5
SH
157 * Support routines.
158 * Move socket addresses back and forth across the kernel/user
159 * divide and look after the messy bits.
1da177e4
LT
160 */
161
89bddce5 162#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
163 16 for IP, 16 for IPX,
164 24 for IPv6,
89bddce5 165 about 80 for AX.25
1da177e4
LT
166 must be at least one bigger than
167 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 168 :unix_mkname()).
1da177e4 169 */
89bddce5 170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
182int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
183{
89bddce5 184 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5
SH
209
210int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
211 int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
89bddce5
SH
216 err = get_user(len, ulen);
217 if (err)
1da177e4 218 return err;
89bddce5
SH
219 if (len > klen)
220 len = klen;
221 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 222 return -EINVAL;
89bddce5 223 if (len) {
d6fe3945
SG
224 if (audit_sockaddr(klen, kaddr))
225 return -ENOMEM;
89bddce5 226 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
227 return -EFAULT;
228 }
229 /*
89bddce5
SH
230 * "fromlen shall refer to the value before truncation.."
231 * 1003.1g
1da177e4
LT
232 */
233 return __put_user(klen, ulen);
234}
235
236#define SOCKFS_MAGIC 0x534F434B
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
247 init_waitqueue_head(&ei->socket.wait);
89bddce5 248
1da177e4
LT
249 ei->socket.fasync_list = NULL;
250 ei->socket.state = SS_UNCONNECTED;
251 ei->socket.flags = 0;
252 ei->socket.ops = NULL;
253 ei->socket.sk = NULL;
254 ei->socket.file = NULL;
1da177e4
LT
255
256 return &ei->vfs_inode;
257}
258
259static void sock_destroy_inode(struct inode *inode)
260{
261 kmem_cache_free(sock_inode_cachep,
262 container_of(inode, struct socket_alloc, vfs_inode));
263}
264
4ba9b9d0 265static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 266{
89bddce5 267 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 268
a35afb83 269 inode_init_once(&ei->vfs_inode);
1da177e4 270}
89bddce5 271
1da177e4
LT
272static int init_inodecache(void)
273{
274 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
275 sizeof(struct socket_alloc),
276 0,
277 (SLAB_HWCACHE_ALIGN |
278 SLAB_RECLAIM_ACCOUNT |
279 SLAB_MEM_SPREAD),
20c2df83 280 init_once);
1da177e4
LT
281 if (sock_inode_cachep == NULL)
282 return -ENOMEM;
283 return 0;
284}
285
286static struct super_operations sockfs_ops = {
287 .alloc_inode = sock_alloc_inode,
288 .destroy_inode =sock_destroy_inode,
289 .statfs = simple_statfs,
290};
291
454e2398 292static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
293 int flags, const char *dev_name, void *data,
294 struct vfsmount *mnt)
1da177e4 295{
454e2398
DH
296 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
297 mnt);
1da177e4
LT
298}
299
ba89966c 300static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
301
302static struct file_system_type sock_fs_type = {
303 .name = "sockfs",
304 .get_sb = sockfs_get_sb,
305 .kill_sb = kill_anon_super,
306};
89bddce5 307
1da177e4
LT
308static int sockfs_delete_dentry(struct dentry *dentry)
309{
304e61e6
ED
310 /*
311 * At creation time, we pretended this dentry was hashed
312 * (by clearing DCACHE_UNHASHED bit in d_flags)
313 * At delete time, we restore the truth : not hashed.
314 * (so that dput() can proceed correctly)
315 */
316 dentry->d_flags |= DCACHE_UNHASHED;
317 return 0;
1da177e4 318}
c23fbb6b
ED
319
320/*
321 * sockfs_dname() is called from d_path().
322 */
323static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
324{
325 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
326 dentry->d_inode->i_ino);
327}
328
1da177e4 329static struct dentry_operations sockfs_dentry_operations = {
89bddce5 330 .d_delete = sockfs_delete_dentry,
c23fbb6b 331 .d_dname = sockfs_dname,
1da177e4
LT
332};
333
334/*
335 * Obtains the first available file descriptor and sets it up for use.
336 *
39d8c1b6
DM
337 * These functions create file structures and maps them to fd space
338 * of the current process. On success it returns file descriptor
1da177e4
LT
339 * and file struct implicitly stored in sock->file.
340 * Note that another thread may close file descriptor before we return
341 * from this function. We use the fact that now we do not refer
342 * to socket after mapping. If one day we will need it, this
343 * function will increment ref. count on file by 1.
344 *
345 * In any case returned fd MAY BE not valid!
346 * This race condition is unavoidable
347 * with shared fd spaces, we cannot solve it inside kernel,
348 * but we take care of internal coherence yet.
349 */
350
39d8c1b6 351static int sock_alloc_fd(struct file **filep)
1da177e4
LT
352{
353 int fd;
1da177e4
LT
354
355 fd = get_unused_fd();
39d8c1b6 356 if (likely(fd >= 0)) {
1da177e4
LT
357 struct file *file = get_empty_filp();
358
39d8c1b6
DM
359 *filep = file;
360 if (unlikely(!file)) {
1da177e4 361 put_unused_fd(fd);
39d8c1b6 362 return -ENFILE;
1da177e4 363 }
39d8c1b6
DM
364 } else
365 *filep = NULL;
366 return fd;
367}
1da177e4 368
39d8c1b6
DM
369static int sock_attach_fd(struct socket *sock, struct file *file)
370{
ce8d2cdf 371 struct dentry *dentry;
c23fbb6b 372 struct qstr name = { .name = "" };
39d8c1b6 373
ce8d2cdf
DH
374 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
375 if (unlikely(!dentry))
39d8c1b6
DM
376 return -ENOMEM;
377
ce8d2cdf 378 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
379 /*
380 * We dont want to push this dentry into global dentry hash table.
381 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
382 * This permits a working /proc/$pid/fd/XXX on sockets
383 */
ce8d2cdf
DH
384 dentry->d_flags &= ~DCACHE_UNHASHED;
385 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
386
387 sock->file = file;
ce8d2cdf
DH
388 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
389 &socket_file_ops);
390 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
391 file->f_flags = O_RDWR;
392 file->f_pos = 0;
393 file->private_data = sock;
1da177e4 394
39d8c1b6
DM
395 return 0;
396}
397
398int sock_map_fd(struct socket *sock)
399{
400 struct file *newfile;
401 int fd = sock_alloc_fd(&newfile);
402
403 if (likely(fd >= 0)) {
404 int err = sock_attach_fd(sock, newfile);
405
406 if (unlikely(err < 0)) {
407 put_filp(newfile);
1da177e4 408 put_unused_fd(fd);
39d8c1b6 409 return err;
1da177e4 410 }
39d8c1b6 411 fd_install(fd, newfile);
1da177e4 412 }
1da177e4
LT
413 return fd;
414}
415
6cb153ca
BL
416static struct socket *sock_from_file(struct file *file, int *err)
417{
6cb153ca
BL
418 if (file->f_op == &socket_file_ops)
419 return file->private_data; /* set in sock_map_fd */
420
23bb80d2
ED
421 *err = -ENOTSOCK;
422 return NULL;
6cb153ca
BL
423}
424
1da177e4
LT
425/**
426 * sockfd_lookup - Go from a file number to its socket slot
427 * @fd: file handle
428 * @err: pointer to an error code return
429 *
430 * The file handle passed in is locked and the socket it is bound
431 * too is returned. If an error occurs the err pointer is overwritten
432 * with a negative errno code and NULL is returned. The function checks
433 * for both invalid handles and passing a handle which is not a socket.
434 *
435 * On a success the socket object pointer is returned.
436 */
437
438struct socket *sockfd_lookup(int fd, int *err)
439{
440 struct file *file;
1da177e4
LT
441 struct socket *sock;
442
89bddce5
SH
443 file = fget(fd);
444 if (!file) {
1da177e4
LT
445 *err = -EBADF;
446 return NULL;
447 }
89bddce5 448
6cb153ca
BL
449 sock = sock_from_file(file, err);
450 if (!sock)
1da177e4 451 fput(file);
6cb153ca
BL
452 return sock;
453}
1da177e4 454
6cb153ca
BL
455static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
456{
457 struct file *file;
458 struct socket *sock;
459
3672558c 460 *err = -EBADF;
6cb153ca
BL
461 file = fget_light(fd, fput_needed);
462 if (file) {
463 sock = sock_from_file(file, err);
464 if (sock)
465 return sock;
466 fput_light(file, *fput_needed);
1da177e4 467 }
6cb153ca 468 return NULL;
1da177e4
LT
469}
470
471/**
472 * sock_alloc - allocate a socket
89bddce5 473 *
1da177e4
LT
474 * Allocate a new inode and socket object. The two are bound together
475 * and initialised. The socket is then returned. If we are out of inodes
476 * NULL is returned.
477 */
478
479static struct socket *sock_alloc(void)
480{
89bddce5
SH
481 struct inode *inode;
482 struct socket *sock;
1da177e4
LT
483
484 inode = new_inode(sock_mnt->mnt_sb);
485 if (!inode)
486 return NULL;
487
488 sock = SOCKET_I(inode);
489
89bddce5 490 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
491 inode->i_uid = current->fsuid;
492 inode->i_gid = current->fsgid;
493
494 get_cpu_var(sockets_in_use)++;
495 put_cpu_var(sockets_in_use);
496 return sock;
497}
498
499/*
500 * In theory you can't get an open on this inode, but /proc provides
501 * a back door. Remember to keep it shut otherwise you'll let the
502 * creepy crawlies in.
503 */
89bddce5 504
1da177e4
LT
505static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
506{
507 return -ENXIO;
508}
509
4b6f5d20 510const struct file_operations bad_sock_fops = {
1da177e4
LT
511 .owner = THIS_MODULE,
512 .open = sock_no_open,
513};
514
515/**
516 * sock_release - close a socket
517 * @sock: socket to close
518 *
519 * The socket is released from the protocol stack if it has a release
520 * callback, and the inode is then released if the socket is bound to
89bddce5 521 * an inode not a file.
1da177e4 522 */
89bddce5 523
1da177e4
LT
524void sock_release(struct socket *sock)
525{
526 if (sock->ops) {
527 struct module *owner = sock->ops->owner;
528
529 sock->ops->release(sock);
530 sock->ops = NULL;
531 module_put(owner);
532 }
533
534 if (sock->fasync_list)
535 printk(KERN_ERR "sock_release: fasync list not empty!\n");
536
537 get_cpu_var(sockets_in_use)--;
538 put_cpu_var(sockets_in_use);
539 if (!sock->file) {
540 iput(SOCK_INODE(sock));
541 return;
542 }
89bddce5 543 sock->file = NULL;
1da177e4
LT
544}
545
89bddce5 546static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
547 struct msghdr *msg, size_t size)
548{
549 struct sock_iocb *si = kiocb_to_siocb(iocb);
550 int err;
551
552 si->sock = sock;
553 si->scm = NULL;
554 si->msg = msg;
555 si->size = size;
556
557 err = security_socket_sendmsg(sock, msg, size);
558 if (err)
559 return err;
560
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
564int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
565{
566 struct kiocb iocb;
567 struct sock_iocb siocb;
568 int ret;
569
570 init_sync_kiocb(&iocb, NULL);
571 iocb.private = &siocb;
572 ret = __sock_sendmsg(&iocb, sock, msg, size);
573 if (-EIOCBQUEUED == ret)
574 ret = wait_on_sync_kiocb(&iocb);
575 return ret;
576}
577
578int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
579 struct kvec *vec, size_t num, size_t size)
580{
581 mm_segment_t oldfs = get_fs();
582 int result;
583
584 set_fs(KERNEL_DS);
585 /*
586 * the following is safe, since for compiler definitions of kvec and
587 * iovec are identical, yielding the same in-core layout and alignment
588 */
89bddce5 589 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
590 msg->msg_iovlen = num;
591 result = sock_sendmsg(sock, msg, size);
592 set_fs(oldfs);
593 return result;
594}
595
92f37fd2
ED
596/*
597 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
598 */
599void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
600 struct sk_buff *skb)
601{
602 ktime_t kt = skb->tstamp;
603
604 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
605 struct timeval tv;
606 /* Race occurred between timestamp enabling and packet
607 receiving. Fill in the current time for now. */
608 if (kt.tv64 == 0)
609 kt = ktime_get_real();
610 skb->tstamp = kt;
611 tv = ktime_to_timeval(kt);
612 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
613 } else {
614 struct timespec ts;
615 /* Race occurred between timestamp enabling and packet
616 receiving. Fill in the current time for now. */
617 if (kt.tv64 == 0)
618 kt = ktime_get_real();
619 skb->tstamp = kt;
620 ts = ktime_to_timespec(kt);
621 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
622 }
623}
624
7c81fd8b
ACM
625EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
626
89bddce5 627static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
628 struct msghdr *msg, size_t size, int flags)
629{
630 int err;
631 struct sock_iocb *si = kiocb_to_siocb(iocb);
632
633 si->sock = sock;
634 si->scm = NULL;
635 si->msg = msg;
636 si->size = size;
637 si->flags = flags;
638
639 err = security_socket_recvmsg(sock, msg, size, flags);
640 if (err)
641 return err;
642
643 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
644}
645
89bddce5 646int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
647 size_t size, int flags)
648{
649 struct kiocb iocb;
650 struct sock_iocb siocb;
651 int ret;
652
89bddce5 653 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
654 iocb.private = &siocb;
655 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
656 if (-EIOCBQUEUED == ret)
657 ret = wait_on_sync_kiocb(&iocb);
658 return ret;
659}
660
89bddce5
SH
661int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
662 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
663{
664 mm_segment_t oldfs = get_fs();
665 int result;
666
667 set_fs(KERNEL_DS);
668 /*
669 * the following is safe, since for compiler definitions of kvec and
670 * iovec are identical, yielding the same in-core layout and alignment
671 */
89bddce5 672 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
673 result = sock_recvmsg(sock, msg, size, flags);
674 set_fs(oldfs);
675 return result;
676}
677
678static void sock_aio_dtor(struct kiocb *iocb)
679{
680 kfree(iocb->private);
681}
682
ce1d4d3e
CH
683static ssize_t sock_sendpage(struct file *file, struct page *page,
684 int offset, size_t size, loff_t *ppos, int more)
1da177e4 685{
1da177e4
LT
686 struct socket *sock;
687 int flags;
688
ce1d4d3e
CH
689 sock = file->private_data;
690
691 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
692 if (more)
693 flags |= MSG_MORE;
694
695 return sock->ops->sendpage(sock, page, offset, size, flags);
696}
1da177e4 697
9c55e01c
JA
698static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
699 struct pipe_inode_info *pipe, size_t len,
700 unsigned int flags)
701{
702 struct socket *sock = file->private_data;
703
704 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
705}
706
ce1d4d3e 707static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 708 struct sock_iocb *siocb)
ce1d4d3e
CH
709{
710 if (!is_sync_kiocb(iocb)) {
711 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
712 if (!siocb)
713 return NULL;
1da177e4
LT
714 iocb->ki_dtor = sock_aio_dtor;
715 }
1da177e4 716
ce1d4d3e 717 siocb->kiocb = iocb;
ce1d4d3e
CH
718 iocb->private = siocb;
719 return siocb;
1da177e4
LT
720}
721
ce1d4d3e 722static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
723 struct file *file, const struct iovec *iov,
724 unsigned long nr_segs)
ce1d4d3e
CH
725{
726 struct socket *sock = file->private_data;
727 size_t size = 0;
728 int i;
1da177e4 729
89bddce5
SH
730 for (i = 0; i < nr_segs; i++)
731 size += iov[i].iov_len;
1da177e4 732
ce1d4d3e
CH
733 msg->msg_name = NULL;
734 msg->msg_namelen = 0;
735 msg->msg_control = NULL;
736 msg->msg_controllen = 0;
89bddce5 737 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
738 msg->msg_iovlen = nr_segs;
739 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
740
741 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
742}
743
027445c3
BP
744static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
745 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
746{
747 struct sock_iocb siocb, *x;
748
1da177e4
LT
749 if (pos != 0)
750 return -ESPIPE;
027445c3
BP
751
752 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
753 return 0;
754
027445c3
BP
755
756 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
757 if (!x)
758 return -ENOMEM;
027445c3 759 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
760}
761
ce1d4d3e 762static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
763 struct file *file, const struct iovec *iov,
764 unsigned long nr_segs)
1da177e4 765{
ce1d4d3e
CH
766 struct socket *sock = file->private_data;
767 size_t size = 0;
768 int i;
1da177e4 769
89bddce5
SH
770 for (i = 0; i < nr_segs; i++)
771 size += iov[i].iov_len;
1da177e4 772
ce1d4d3e
CH
773 msg->msg_name = NULL;
774 msg->msg_namelen = 0;
775 msg->msg_control = NULL;
776 msg->msg_controllen = 0;
89bddce5 777 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
778 msg->msg_iovlen = nr_segs;
779 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
780 if (sock->type == SOCK_SEQPACKET)
781 msg->msg_flags |= MSG_EOR;
1da177e4 782
ce1d4d3e 783 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
784}
785
027445c3
BP
786static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
787 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
788{
789 struct sock_iocb siocb, *x;
1da177e4 790
ce1d4d3e
CH
791 if (pos != 0)
792 return -ESPIPE;
027445c3 793
027445c3 794 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
795 if (!x)
796 return -ENOMEM;
1da177e4 797
027445c3 798 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
799}
800
1da177e4
LT
801/*
802 * Atomic setting of ioctl hooks to avoid race
803 * with module unload.
804 */
805
4a3e2f71 806static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 807static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 808
881d966b 809void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 810{
4a3e2f71 811 mutex_lock(&br_ioctl_mutex);
1da177e4 812 br_ioctl_hook = hook;
4a3e2f71 813 mutex_unlock(&br_ioctl_mutex);
1da177e4 814}
89bddce5 815
1da177e4
LT
816EXPORT_SYMBOL(brioctl_set);
817
4a3e2f71 818static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 819static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 820
881d966b 821void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 822{
4a3e2f71 823 mutex_lock(&vlan_ioctl_mutex);
1da177e4 824 vlan_ioctl_hook = hook;
4a3e2f71 825 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 826}
89bddce5 827
1da177e4
LT
828EXPORT_SYMBOL(vlan_ioctl_set);
829
4a3e2f71 830static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 831static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 832
89bddce5 833void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 834{
4a3e2f71 835 mutex_lock(&dlci_ioctl_mutex);
1da177e4 836 dlci_ioctl_hook = hook;
4a3e2f71 837 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 838}
89bddce5 839
1da177e4
LT
840EXPORT_SYMBOL(dlci_ioctl_set);
841
842/*
843 * With an ioctl, arg may well be a user mode pointer, but we don't know
844 * what to do with it - that's up to the protocol still.
845 */
846
847static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
848{
849 struct socket *sock;
881d966b 850 struct sock *sk;
1da177e4
LT
851 void __user *argp = (void __user *)arg;
852 int pid, err;
881d966b 853 struct net *net;
1da177e4 854
b69aee04 855 sock = file->private_data;
881d966b
EB
856 sk = sock->sk;
857 net = sk->sk_net;
1da177e4 858 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 859 err = dev_ioctl(net, cmd, argp);
1da177e4 860 } else
d86b5e0e 861#ifdef CONFIG_WIRELESS_EXT
1da177e4 862 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 863 err = dev_ioctl(net, cmd, argp);
1da177e4 864 } else
89bddce5
SH
865#endif /* CONFIG_WIRELESS_EXT */
866 switch (cmd) {
1da177e4
LT
867 case FIOSETOWN:
868 case SIOCSPGRP:
869 err = -EFAULT;
870 if (get_user(pid, (int __user *)argp))
871 break;
872 err = f_setown(sock->file, pid, 1);
873 break;
874 case FIOGETOWN:
875 case SIOCGPGRP:
609d7fa9 876 err = put_user(f_getown(sock->file),
89bddce5 877 (int __user *)argp);
1da177e4
LT
878 break;
879 case SIOCGIFBR:
880 case SIOCSIFBR:
881 case SIOCBRADDBR:
882 case SIOCBRDELBR:
883 err = -ENOPKG;
884 if (!br_ioctl_hook)
885 request_module("bridge");
886
4a3e2f71 887 mutex_lock(&br_ioctl_mutex);
89bddce5 888 if (br_ioctl_hook)
881d966b 889 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 890 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
891 break;
892 case SIOCGIFVLAN:
893 case SIOCSIFVLAN:
894 err = -ENOPKG;
895 if (!vlan_ioctl_hook)
896 request_module("8021q");
897
4a3e2f71 898 mutex_lock(&vlan_ioctl_mutex);
1da177e4 899 if (vlan_ioctl_hook)
881d966b 900 err = vlan_ioctl_hook(net, argp);
4a3e2f71 901 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 902 break;
1da177e4
LT
903 case SIOCADDDLCI:
904 case SIOCDELDLCI:
905 err = -ENOPKG;
906 if (!dlci_ioctl_hook)
907 request_module("dlci");
908
909 if (dlci_ioctl_hook) {
4a3e2f71 910 mutex_lock(&dlci_ioctl_mutex);
1da177e4 911 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 912 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
913 }
914 break;
915 default:
916 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
917
918 /*
919 * If this ioctl is unknown try to hand it down
920 * to the NIC driver.
921 */
922 if (err == -ENOIOCTLCMD)
881d966b 923 err = dev_ioctl(net, cmd, argp);
1da177e4 924 break;
89bddce5 925 }
1da177e4
LT
926 return err;
927}
928
929int sock_create_lite(int family, int type, int protocol, struct socket **res)
930{
931 int err;
932 struct socket *sock = NULL;
89bddce5 933
1da177e4
LT
934 err = security_socket_create(family, type, protocol, 1);
935 if (err)
936 goto out;
937
938 sock = sock_alloc();
939 if (!sock) {
940 err = -ENOMEM;
941 goto out;
942 }
943
1da177e4 944 sock->type = type;
7420ed23
VY
945 err = security_socket_post_create(sock, family, type, protocol, 1);
946 if (err)
947 goto out_release;
948
1da177e4
LT
949out:
950 *res = sock;
951 return err;
7420ed23
VY
952out_release:
953 sock_release(sock);
954 sock = NULL;
955 goto out;
1da177e4
LT
956}
957
958/* No kernel lock held - perfect */
89bddce5 959static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
960{
961 struct socket *sock;
962
963 /*
89bddce5 964 * We can't return errors to poll, so it's either yes or no.
1da177e4 965 */
b69aee04 966 sock = file->private_data;
1da177e4
LT
967 return sock->ops->poll(file, sock, wait);
968}
969
89bddce5 970static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 971{
b69aee04 972 struct socket *sock = file->private_data;
1da177e4
LT
973
974 return sock->ops->mmap(file, sock, vma);
975}
976
20380731 977static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
978{
979 /*
89bddce5
SH
980 * It was possible the inode is NULL we were
981 * closing an unfinished socket.
1da177e4
LT
982 */
983
89bddce5 984 if (!inode) {
1da177e4
LT
985 printk(KERN_DEBUG "sock_close: NULL inode\n");
986 return 0;
987 }
988 sock_fasync(-1, filp, 0);
989 sock_release(SOCKET_I(inode));
990 return 0;
991}
992
993/*
994 * Update the socket async list
995 *
996 * Fasync_list locking strategy.
997 *
998 * 1. fasync_list is modified only under process context socket lock
999 * i.e. under semaphore.
1000 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1001 * or under socket lock.
1002 * 3. fasync_list can be used from softirq context, so that
1003 * modification under socket lock have to be enhanced with
1004 * write_lock_bh(&sk->sk_callback_lock).
1005 * --ANK (990710)
1006 */
1007
1008static int sock_fasync(int fd, struct file *filp, int on)
1009{
89bddce5 1010 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1011 struct socket *sock;
1012 struct sock *sk;
1013
89bddce5 1014 if (on) {
8b3a7005 1015 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1016 if (fna == NULL)
1da177e4
LT
1017 return -ENOMEM;
1018 }
1019
b69aee04 1020 sock = filp->private_data;
1da177e4 1021
89bddce5
SH
1022 sk = sock->sk;
1023 if (sk == NULL) {
1da177e4
LT
1024 kfree(fna);
1025 return -EINVAL;
1026 }
1027
1028 lock_sock(sk);
1029
89bddce5 1030 prev = &(sock->fasync_list);
1da177e4 1031
89bddce5
SH
1032 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1033 if (fa->fa_file == filp)
1da177e4
LT
1034 break;
1035
89bddce5
SH
1036 if (on) {
1037 if (fa != NULL) {
1da177e4 1038 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1039 fa->fa_fd = fd;
1da177e4
LT
1040 write_unlock_bh(&sk->sk_callback_lock);
1041
1042 kfree(fna);
1043 goto out;
1044 }
89bddce5
SH
1045 fna->fa_file = filp;
1046 fna->fa_fd = fd;
1047 fna->magic = FASYNC_MAGIC;
1048 fna->fa_next = sock->fasync_list;
1da177e4 1049 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1050 sock->fasync_list = fna;
1da177e4 1051 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1052 } else {
1053 if (fa != NULL) {
1da177e4 1054 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1055 *prev = fa->fa_next;
1da177e4
LT
1056 write_unlock_bh(&sk->sk_callback_lock);
1057 kfree(fa);
1058 }
1059 }
1060
1061out:
1062 release_sock(sock->sk);
1063 return 0;
1064}
1065
1066/* This function may be called only under socket lock or callback_lock */
1067
1068int sock_wake_async(struct socket *sock, int how, int band)
1069{
1070 if (!sock || !sock->fasync_list)
1071 return -1;
89bddce5 1072 switch (how) {
1da177e4 1073 case 1:
89bddce5 1074
1da177e4
LT
1075 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1076 break;
1077 goto call_kill;
1078 case 2:
1079 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1080 break;
1081 /* fall through */
1082 case 0:
89bddce5 1083call_kill:
1da177e4
LT
1084 __kill_fasync(sock->fasync_list, SIGIO, band);
1085 break;
1086 case 3:
1087 __kill_fasync(sock->fasync_list, SIGURG, band);
1088 }
1089 return 0;
1090}
1091
1b8d7ae4 1092static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1093 struct socket **res, int kern)
1da177e4
LT
1094{
1095 int err;
1096 struct socket *sock;
55737fda 1097 const struct net_proto_family *pf;
1da177e4
LT
1098
1099 /*
89bddce5 1100 * Check protocol is in range
1da177e4
LT
1101 */
1102 if (family < 0 || family >= NPROTO)
1103 return -EAFNOSUPPORT;
1104 if (type < 0 || type >= SOCK_MAX)
1105 return -EINVAL;
1106
1107 /* Compatibility.
1108
1109 This uglymoron is moved from INET layer to here to avoid
1110 deadlock in module load.
1111 */
1112 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1113 static int warned;
1da177e4
LT
1114 if (!warned) {
1115 warned = 1;
89bddce5
SH
1116 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1117 current->comm);
1da177e4
LT
1118 }
1119 family = PF_PACKET;
1120 }
1121
1122 err = security_socket_create(family, type, protocol, kern);
1123 if (err)
1124 return err;
89bddce5 1125
55737fda
SH
1126 /*
1127 * Allocate the socket and allow the family to set things up. if
1128 * the protocol is 0, the family is instructed to select an appropriate
1129 * default.
1130 */
1131 sock = sock_alloc();
1132 if (!sock) {
1133 if (net_ratelimit())
1134 printk(KERN_WARNING "socket: no more sockets\n");
1135 return -ENFILE; /* Not exactly a match, but its the
1136 closest posix thing */
1137 }
1138
1139 sock->type = type;
1140
1da177e4 1141#if defined(CONFIG_KMOD)
89bddce5
SH
1142 /* Attempt to load a protocol module if the find failed.
1143 *
1144 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1145 * requested real, full-featured networking support upon configuration.
1146 * Otherwise module support will break!
1147 */
55737fda 1148 if (net_families[family] == NULL)
89bddce5 1149 request_module("net-pf-%d", family);
1da177e4
LT
1150#endif
1151
55737fda
SH
1152 rcu_read_lock();
1153 pf = rcu_dereference(net_families[family]);
1154 err = -EAFNOSUPPORT;
1155 if (!pf)
1156 goto out_release;
1da177e4
LT
1157
1158 /*
1159 * We will call the ->create function, that possibly is in a loadable
1160 * module, so we have to bump that loadable module refcnt first.
1161 */
55737fda 1162 if (!try_module_get(pf->owner))
1da177e4
LT
1163 goto out_release;
1164
55737fda
SH
1165 /* Now protected by module ref count */
1166 rcu_read_unlock();
1167
1b8d7ae4 1168 err = pf->create(net, sock, protocol);
55737fda 1169 if (err < 0)
1da177e4 1170 goto out_module_put;
a79af59e 1171
1da177e4
LT
1172 /*
1173 * Now to bump the refcnt of the [loadable] module that owns this
1174 * socket at sock_release time we decrement its refcnt.
1175 */
55737fda
SH
1176 if (!try_module_get(sock->ops->owner))
1177 goto out_module_busy;
1178
1da177e4
LT
1179 /*
1180 * Now that we're done with the ->create function, the [loadable]
1181 * module can have its refcnt decremented
1182 */
55737fda 1183 module_put(pf->owner);
7420ed23
VY
1184 err = security_socket_post_create(sock, family, type, protocol, kern);
1185 if (err)
3b185525 1186 goto out_sock_release;
55737fda 1187 *res = sock;
1da177e4 1188
55737fda
SH
1189 return 0;
1190
1191out_module_busy:
1192 err = -EAFNOSUPPORT;
1da177e4 1193out_module_put:
55737fda
SH
1194 sock->ops = NULL;
1195 module_put(pf->owner);
1196out_sock_release:
1da177e4 1197 sock_release(sock);
55737fda
SH
1198 return err;
1199
1200out_release:
1201 rcu_read_unlock();
1202 goto out_sock_release;
1da177e4
LT
1203}
1204
1205int sock_create(int family, int type, int protocol, struct socket **res)
1206{
1b8d7ae4 1207 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1208}
1209
1210int sock_create_kern(int family, int type, int protocol, struct socket **res)
1211{
1b8d7ae4 1212 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1213}
1214
1215asmlinkage long sys_socket(int family, int type, int protocol)
1216{
1217 int retval;
1218 struct socket *sock;
1219
1220 retval = sock_create(family, type, protocol, &sock);
1221 if (retval < 0)
1222 goto out;
1223
1224 retval = sock_map_fd(sock);
1225 if (retval < 0)
1226 goto out_release;
1227
1228out:
1229 /* It may be already another descriptor 8) Not kernel problem. */
1230 return retval;
1231
1232out_release:
1233 sock_release(sock);
1234 return retval;
1235}
1236
1237/*
1238 * Create a pair of connected sockets.
1239 */
1240
89bddce5
SH
1241asmlinkage long sys_socketpair(int family, int type, int protocol,
1242 int __user *usockvec)
1da177e4
LT
1243{
1244 struct socket *sock1, *sock2;
1245 int fd1, fd2, err;
db349509 1246 struct file *newfile1, *newfile2;
1da177e4
LT
1247
1248 /*
1249 * Obtain the first socket and check if the underlying protocol
1250 * supports the socketpair call.
1251 */
1252
1253 err = sock_create(family, type, protocol, &sock1);
1254 if (err < 0)
1255 goto out;
1256
1257 err = sock_create(family, type, protocol, &sock2);
1258 if (err < 0)
1259 goto out_release_1;
1260
1261 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1262 if (err < 0)
1da177e4
LT
1263 goto out_release_both;
1264
db349509 1265 fd1 = sock_alloc_fd(&newfile1);
bf3c23d1
DM
1266 if (unlikely(fd1 < 0)) {
1267 err = fd1;
db349509 1268 goto out_release_both;
bf3c23d1 1269 }
1da177e4 1270
db349509
AV
1271 fd2 = sock_alloc_fd(&newfile2);
1272 if (unlikely(fd2 < 0)) {
bf3c23d1 1273 err = fd2;
db349509
AV
1274 put_filp(newfile1);
1275 put_unused_fd(fd1);
1da177e4 1276 goto out_release_both;
db349509 1277 }
1da177e4 1278
db349509
AV
1279 err = sock_attach_fd(sock1, newfile1);
1280 if (unlikely(err < 0)) {
1281 goto out_fd2;
1282 }
1283
1284 err = sock_attach_fd(sock2, newfile2);
1285 if (unlikely(err < 0)) {
1286 fput(newfile1);
1287 goto out_fd1;
1288 }
1289
1290 err = audit_fd_pair(fd1, fd2);
1291 if (err < 0) {
1292 fput(newfile1);
1293 fput(newfile2);
1294 goto out_fd;
1295 }
1da177e4 1296
db349509
AV
1297 fd_install(fd1, newfile1);
1298 fd_install(fd2, newfile2);
1da177e4
LT
1299 /* fd1 and fd2 may be already another descriptors.
1300 * Not kernel problem.
1301 */
1302
89bddce5 1303 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1304 if (!err)
1305 err = put_user(fd2, &usockvec[1]);
1306 if (!err)
1307 return 0;
1308
1309 sys_close(fd2);
1310 sys_close(fd1);
1311 return err;
1312
1da177e4 1313out_release_both:
89bddce5 1314 sock_release(sock2);
1da177e4 1315out_release_1:
89bddce5 1316 sock_release(sock1);
1da177e4
LT
1317out:
1318 return err;
db349509
AV
1319
1320out_fd2:
1321 put_filp(newfile1);
1322 sock_release(sock1);
1323out_fd1:
1324 put_filp(newfile2);
1325 sock_release(sock2);
1326out_fd:
1327 put_unused_fd(fd1);
1328 put_unused_fd(fd2);
1329 goto out;
1da177e4
LT
1330}
1331
1da177e4
LT
1332/*
1333 * Bind a name to a socket. Nothing much to do here since it's
1334 * the protocol's responsibility to handle the local address.
1335 *
1336 * We move the socket address to kernel space before we call
1337 * the protocol layer (having also checked the address is ok).
1338 */
1339
1340asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1341{
1342 struct socket *sock;
1343 char address[MAX_SOCK_ADDR];
6cb153ca 1344 int err, fput_needed;
1da177e4 1345
89bddce5 1346 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1347 if (sock) {
89bddce5
SH
1348 err = move_addr_to_kernel(umyaddr, addrlen, address);
1349 if (err >= 0) {
1350 err = security_socket_bind(sock,
1351 (struct sockaddr *)address,
1352 addrlen);
6cb153ca
BL
1353 if (!err)
1354 err = sock->ops->bind(sock,
89bddce5
SH
1355 (struct sockaddr *)
1356 address, addrlen);
1da177e4 1357 }
6cb153ca 1358 fput_light(sock->file, fput_needed);
89bddce5 1359 }
1da177e4
LT
1360 return err;
1361}
1362
1da177e4
LT
1363/*
1364 * Perform a listen. Basically, we allow the protocol to do anything
1365 * necessary for a listen, and if that works, we mark the socket as
1366 * ready for listening.
1367 */
1368
7a42c217 1369int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1370
1371asmlinkage long sys_listen(int fd, int backlog)
1372{
1373 struct socket *sock;
6cb153ca 1374 int err, fput_needed;
89bddce5
SH
1375
1376 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1377 if (sock) {
1378 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1379 backlog = sysctl_somaxconn;
1380
1381 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1382 if (!err)
1383 err = sock->ops->listen(sock, backlog);
1da177e4 1384
6cb153ca 1385 fput_light(sock->file, fput_needed);
1da177e4
LT
1386 }
1387 return err;
1388}
1389
1da177e4
LT
1390/*
1391 * For accept, we attempt to create a new socket, set up the link
1392 * with the client, wake up the client, then return the new
1393 * connected fd. We collect the address of the connector in kernel
1394 * space and move it to user at the very end. This is unclean because
1395 * we open the socket then return an error.
1396 *
1397 * 1003.1g adds the ability to recvmsg() to query connection pending
1398 * status to recvmsg. We need to add that support in a way thats
1399 * clean when we restucture accept also.
1400 */
1401
89bddce5
SH
1402asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1403 int __user *upeer_addrlen)
1da177e4
LT
1404{
1405 struct socket *sock, *newsock;
39d8c1b6 1406 struct file *newfile;
6cb153ca 1407 int err, len, newfd, fput_needed;
1da177e4
LT
1408 char address[MAX_SOCK_ADDR];
1409
6cb153ca 1410 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1411 if (!sock)
1412 goto out;
1413
1414 err = -ENFILE;
89bddce5 1415 if (!(newsock = sock_alloc()))
1da177e4
LT
1416 goto out_put;
1417
1418 newsock->type = sock->type;
1419 newsock->ops = sock->ops;
1420
1da177e4
LT
1421 /*
1422 * We don't need try_module_get here, as the listening socket (sock)
1423 * has the protocol module (sock->ops->owner) held.
1424 */
1425 __module_get(newsock->ops->owner);
1426
39d8c1b6
DM
1427 newfd = sock_alloc_fd(&newfile);
1428 if (unlikely(newfd < 0)) {
1429 err = newfd;
9a1875e6
DM
1430 sock_release(newsock);
1431 goto out_put;
39d8c1b6
DM
1432 }
1433
1434 err = sock_attach_fd(newsock, newfile);
1435 if (err < 0)
79f4f642 1436 goto out_fd_simple;
39d8c1b6 1437
a79af59e
FF
1438 err = security_socket_accept(sock, newsock);
1439 if (err)
39d8c1b6 1440 goto out_fd;
a79af59e 1441
1da177e4
LT
1442 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1443 if (err < 0)
39d8c1b6 1444 goto out_fd;
1da177e4
LT
1445
1446 if (upeer_sockaddr) {
89bddce5
SH
1447 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1448 &len, 2) < 0) {
1da177e4 1449 err = -ECONNABORTED;
39d8c1b6 1450 goto out_fd;
1da177e4 1451 }
89bddce5
SH
1452 err = move_addr_to_user(address, len, upeer_sockaddr,
1453 upeer_addrlen);
1da177e4 1454 if (err < 0)
39d8c1b6 1455 goto out_fd;
1da177e4
LT
1456 }
1457
1458 /* File flags are not inherited via accept() unlike another OSes. */
1459
39d8c1b6
DM
1460 fd_install(newfd, newfile);
1461 err = newfd;
1da177e4
LT
1462
1463 security_socket_post_accept(sock, newsock);
1464
1465out_put:
6cb153ca 1466 fput_light(sock->file, fput_needed);
1da177e4
LT
1467out:
1468 return err;
79f4f642
AD
1469out_fd_simple:
1470 sock_release(newsock);
1471 put_filp(newfile);
1472 put_unused_fd(newfd);
1473 goto out_put;
39d8c1b6 1474out_fd:
9606a216 1475 fput(newfile);
39d8c1b6 1476 put_unused_fd(newfd);
1da177e4
LT
1477 goto out_put;
1478}
1479
1da177e4
LT
1480/*
1481 * Attempt to connect to a socket with the server address. The address
1482 * is in user space so we verify it is OK and move it to kernel space.
1483 *
1484 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1485 * break bindings
1486 *
1487 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1488 * other SEQPACKET protocols that take time to connect() as it doesn't
1489 * include the -EINPROGRESS status for such sockets.
1490 */
1491
89bddce5
SH
1492asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1493 int addrlen)
1da177e4
LT
1494{
1495 struct socket *sock;
1496 char address[MAX_SOCK_ADDR];
6cb153ca 1497 int err, fput_needed;
1da177e4 1498
6cb153ca 1499 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1500 if (!sock)
1501 goto out;
1502 err = move_addr_to_kernel(uservaddr, addrlen, address);
1503 if (err < 0)
1504 goto out_put;
1505
89bddce5
SH
1506 err =
1507 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1508 if (err)
1509 goto out_put;
1510
89bddce5 1511 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1512 sock->file->f_flags);
1513out_put:
6cb153ca 1514 fput_light(sock->file, fput_needed);
1da177e4
LT
1515out:
1516 return err;
1517}
1518
1519/*
1520 * Get the local address ('name') of a socket object. Move the obtained
1521 * name to user space.
1522 */
1523
89bddce5
SH
1524asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1525 int __user *usockaddr_len)
1da177e4
LT
1526{
1527 struct socket *sock;
1528 char address[MAX_SOCK_ADDR];
6cb153ca 1529 int len, err, fput_needed;
89bddce5 1530
6cb153ca 1531 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1532 if (!sock)
1533 goto out;
1534
1535 err = security_socket_getsockname(sock);
1536 if (err)
1537 goto out_put;
1538
1539 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1540 if (err)
1541 goto out_put;
1542 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1543
1544out_put:
6cb153ca 1545 fput_light(sock->file, fput_needed);
1da177e4
LT
1546out:
1547 return err;
1548}
1549
1550/*
1551 * Get the remote address ('name') of a socket object. Move the obtained
1552 * name to user space.
1553 */
1554
89bddce5
SH
1555asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1556 int __user *usockaddr_len)
1da177e4
LT
1557{
1558 struct socket *sock;
1559 char address[MAX_SOCK_ADDR];
6cb153ca 1560 int len, err, fput_needed;
1da177e4 1561
89bddce5
SH
1562 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1563 if (sock != NULL) {
1da177e4
LT
1564 err = security_socket_getpeername(sock);
1565 if (err) {
6cb153ca 1566 fput_light(sock->file, fput_needed);
1da177e4
LT
1567 return err;
1568 }
1569
89bddce5
SH
1570 err =
1571 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1572 1);
1da177e4 1573 if (!err)
89bddce5
SH
1574 err = move_addr_to_user(address, len, usockaddr,
1575 usockaddr_len);
6cb153ca 1576 fput_light(sock->file, fput_needed);
1da177e4
LT
1577 }
1578 return err;
1579}
1580
1581/*
1582 * Send a datagram to a given address. We move the address into kernel
1583 * space and check the user space data area is readable before invoking
1584 * the protocol.
1585 */
1586
89bddce5
SH
1587asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1588 unsigned flags, struct sockaddr __user *addr,
1589 int addr_len)
1da177e4
LT
1590{
1591 struct socket *sock;
1592 char address[MAX_SOCK_ADDR];
1593 int err;
1594 struct msghdr msg;
1595 struct iovec iov;
6cb153ca 1596 int fput_needed;
6cb153ca 1597
de0fa95c
PE
1598 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1599 if (!sock)
4387ff75 1600 goto out;
6cb153ca 1601
89bddce5
SH
1602 iov.iov_base = buff;
1603 iov.iov_len = len;
1604 msg.msg_name = NULL;
1605 msg.msg_iov = &iov;
1606 msg.msg_iovlen = 1;
1607 msg.msg_control = NULL;
1608 msg.msg_controllen = 0;
1609 msg.msg_namelen = 0;
6cb153ca 1610 if (addr) {
1da177e4
LT
1611 err = move_addr_to_kernel(addr, addr_len, address);
1612 if (err < 0)
1613 goto out_put;
89bddce5
SH
1614 msg.msg_name = address;
1615 msg.msg_namelen = addr_len;
1da177e4
LT
1616 }
1617 if (sock->file->f_flags & O_NONBLOCK)
1618 flags |= MSG_DONTWAIT;
1619 msg.msg_flags = flags;
1620 err = sock_sendmsg(sock, &msg, len);
1621
89bddce5 1622out_put:
de0fa95c 1623 fput_light(sock->file, fput_needed);
4387ff75 1624out:
1da177e4
LT
1625 return err;
1626}
1627
1628/*
89bddce5 1629 * Send a datagram down a socket.
1da177e4
LT
1630 */
1631
89bddce5 1632asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1633{
1634 return sys_sendto(fd, buff, len, flags, NULL, 0);
1635}
1636
1637/*
89bddce5 1638 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1639 * sender. We verify the buffers are writable and if needed move the
1640 * sender address from kernel to user space.
1641 */
1642
89bddce5
SH
1643asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1644 unsigned flags, struct sockaddr __user *addr,
1645 int __user *addr_len)
1da177e4
LT
1646{
1647 struct socket *sock;
1648 struct iovec iov;
1649 struct msghdr msg;
1650 char address[MAX_SOCK_ADDR];
89bddce5 1651 int err, err2;
6cb153ca
BL
1652 int fput_needed;
1653
de0fa95c 1654 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1655 if (!sock)
de0fa95c 1656 goto out;
1da177e4 1657
89bddce5
SH
1658 msg.msg_control = NULL;
1659 msg.msg_controllen = 0;
1660 msg.msg_iovlen = 1;
1661 msg.msg_iov = &iov;
1662 iov.iov_len = size;
1663 iov.iov_base = ubuf;
1664 msg.msg_name = address;
1665 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1666 if (sock->file->f_flags & O_NONBLOCK)
1667 flags |= MSG_DONTWAIT;
89bddce5 1668 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1669
89bddce5
SH
1670 if (err >= 0 && addr != NULL) {
1671 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1672 if (err2 < 0)
1673 err = err2;
1da177e4 1674 }
de0fa95c
PE
1675
1676 fput_light(sock->file, fput_needed);
4387ff75 1677out:
1da177e4
LT
1678 return err;
1679}
1680
1681/*
89bddce5 1682 * Receive a datagram from a socket.
1da177e4
LT
1683 */
1684
89bddce5
SH
1685asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1686 unsigned flags)
1da177e4
LT
1687{
1688 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1689}
1690
1691/*
1692 * Set a socket option. Because we don't know the option lengths we have
1693 * to pass the user mode parameter for the protocols to sort out.
1694 */
1695
89bddce5
SH
1696asmlinkage long sys_setsockopt(int fd, int level, int optname,
1697 char __user *optval, int optlen)
1da177e4 1698{
6cb153ca 1699 int err, fput_needed;
1da177e4
LT
1700 struct socket *sock;
1701
1702 if (optlen < 0)
1703 return -EINVAL;
89bddce5
SH
1704
1705 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1706 if (sock != NULL) {
1707 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1708 if (err)
1709 goto out_put;
1da177e4
LT
1710
1711 if (level == SOL_SOCKET)
89bddce5
SH
1712 err =
1713 sock_setsockopt(sock, level, optname, optval,
1714 optlen);
1da177e4 1715 else
89bddce5
SH
1716 err =
1717 sock->ops->setsockopt(sock, level, optname, optval,
1718 optlen);
6cb153ca
BL
1719out_put:
1720 fput_light(sock->file, fput_needed);
1da177e4
LT
1721 }
1722 return err;
1723}
1724
1725/*
1726 * Get a socket option. Because we don't know the option lengths we have
1727 * to pass a user mode parameter for the protocols to sort out.
1728 */
1729
89bddce5
SH
1730asmlinkage long sys_getsockopt(int fd, int level, int optname,
1731 char __user *optval, int __user *optlen)
1da177e4 1732{
6cb153ca 1733 int err, fput_needed;
1da177e4
LT
1734 struct socket *sock;
1735
89bddce5
SH
1736 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1737 if (sock != NULL) {
6cb153ca
BL
1738 err = security_socket_getsockopt(sock, level, optname);
1739 if (err)
1740 goto out_put;
1da177e4
LT
1741
1742 if (level == SOL_SOCKET)
89bddce5
SH
1743 err =
1744 sock_getsockopt(sock, level, optname, optval,
1745 optlen);
1da177e4 1746 else
89bddce5
SH
1747 err =
1748 sock->ops->getsockopt(sock, level, optname, optval,
1749 optlen);
6cb153ca
BL
1750out_put:
1751 fput_light(sock->file, fput_needed);
1da177e4
LT
1752 }
1753 return err;
1754}
1755
1da177e4
LT
1756/*
1757 * Shutdown a socket.
1758 */
1759
1760asmlinkage long sys_shutdown(int fd, int how)
1761{
6cb153ca 1762 int err, fput_needed;
1da177e4
LT
1763 struct socket *sock;
1764
89bddce5
SH
1765 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1766 if (sock != NULL) {
1da177e4 1767 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1768 if (!err)
1769 err = sock->ops->shutdown(sock, how);
1770 fput_light(sock->file, fput_needed);
1da177e4
LT
1771 }
1772 return err;
1773}
1774
89bddce5 1775/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1776 * fields which are the same type (int / unsigned) on our platforms.
1777 */
1778#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1779#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1780#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1781
1da177e4
LT
1782/*
1783 * BSD sendmsg interface
1784 */
1785
1786asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1787{
89bddce5
SH
1788 struct compat_msghdr __user *msg_compat =
1789 (struct compat_msghdr __user *)msg;
1da177e4
LT
1790 struct socket *sock;
1791 char address[MAX_SOCK_ADDR];
1792 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1793 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1794 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1795 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1796 unsigned char *ctl_buf = ctl;
1797 struct msghdr msg_sys;
1798 int err, ctl_len, iov_size, total_len;
6cb153ca 1799 int fput_needed;
89bddce5 1800
1da177e4
LT
1801 err = -EFAULT;
1802 if (MSG_CMSG_COMPAT & flags) {
1803 if (get_compat_msghdr(&msg_sys, msg_compat))
1804 return -EFAULT;
89bddce5
SH
1805 }
1806 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1807 return -EFAULT;
1808
6cb153ca 1809 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1810 if (!sock)
1da177e4
LT
1811 goto out;
1812
1813 /* do not move before msg_sys is valid */
1814 err = -EMSGSIZE;
1815 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1816 goto out_put;
1817
89bddce5 1818 /* Check whether to allocate the iovec area */
1da177e4
LT
1819 err = -ENOMEM;
1820 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1821 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1822 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1823 if (!iov)
1824 goto out_put;
1825 }
1826
1827 /* This will also move the address data into kernel space */
1828 if (MSG_CMSG_COMPAT & flags) {
1829 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1830 } else
1831 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1832 if (err < 0)
1da177e4
LT
1833 goto out_freeiov;
1834 total_len = err;
1835
1836 err = -ENOBUFS;
1837
1838 if (msg_sys.msg_controllen > INT_MAX)
1839 goto out_freeiov;
89bddce5 1840 ctl_len = msg_sys.msg_controllen;
1da177e4 1841 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1842 err =
1843 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1844 sizeof(ctl));
1da177e4
LT
1845 if (err)
1846 goto out_freeiov;
1847 ctl_buf = msg_sys.msg_control;
8920e8f9 1848 ctl_len = msg_sys.msg_controllen;
1da177e4 1849 } else if (ctl_len) {
89bddce5 1850 if (ctl_len > sizeof(ctl)) {
1da177e4 1851 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1852 if (ctl_buf == NULL)
1da177e4
LT
1853 goto out_freeiov;
1854 }
1855 err = -EFAULT;
1856 /*
1857 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1858 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1859 * checking falls down on this.
1860 */
89bddce5
SH
1861 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1862 ctl_len))
1da177e4
LT
1863 goto out_freectl;
1864 msg_sys.msg_control = ctl_buf;
1865 }
1866 msg_sys.msg_flags = flags;
1867
1868 if (sock->file->f_flags & O_NONBLOCK)
1869 msg_sys.msg_flags |= MSG_DONTWAIT;
1870 err = sock_sendmsg(sock, &msg_sys, total_len);
1871
1872out_freectl:
89bddce5 1873 if (ctl_buf != ctl)
1da177e4
LT
1874 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1875out_freeiov:
1876 if (iov != iovstack)
1877 sock_kfree_s(sock->sk, iov, iov_size);
1878out_put:
6cb153ca 1879 fput_light(sock->file, fput_needed);
89bddce5 1880out:
1da177e4
LT
1881 return err;
1882}
1883
1884/*
1885 * BSD recvmsg interface
1886 */
1887
89bddce5
SH
1888asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1889 unsigned int flags)
1da177e4 1890{
89bddce5
SH
1891 struct compat_msghdr __user *msg_compat =
1892 (struct compat_msghdr __user *)msg;
1da177e4
LT
1893 struct socket *sock;
1894 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1895 struct iovec *iov = iovstack;
1da177e4
LT
1896 struct msghdr msg_sys;
1897 unsigned long cmsg_ptr;
1898 int err, iov_size, total_len, len;
6cb153ca 1899 int fput_needed;
1da177e4
LT
1900
1901 /* kernel mode address */
1902 char addr[MAX_SOCK_ADDR];
1903
1904 /* user mode address pointers */
1905 struct sockaddr __user *uaddr;
1906 int __user *uaddr_len;
89bddce5 1907
1da177e4
LT
1908 if (MSG_CMSG_COMPAT & flags) {
1909 if (get_compat_msghdr(&msg_sys, msg_compat))
1910 return -EFAULT;
89bddce5
SH
1911 }
1912 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1913 return -EFAULT;
1da177e4 1914
6cb153ca 1915 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1916 if (!sock)
1917 goto out;
1918
1919 err = -EMSGSIZE;
1920 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1921 goto out_put;
89bddce5
SH
1922
1923 /* Check whether to allocate the iovec area */
1da177e4
LT
1924 err = -ENOMEM;
1925 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1926 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1927 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1928 if (!iov)
1929 goto out_put;
1930 }
1931
1932 /*
89bddce5
SH
1933 * Save the user-mode address (verify_iovec will change the
1934 * kernel msghdr to use the kernel address space)
1da177e4 1935 */
89bddce5 1936
cfcabdcc 1937 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1938 uaddr_len = COMPAT_NAMELEN(msg);
1939 if (MSG_CMSG_COMPAT & flags) {
1940 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1941 } else
1942 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1943 if (err < 0)
1944 goto out_freeiov;
89bddce5 1945 total_len = err;
1da177e4
LT
1946
1947 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1948 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1949
1da177e4
LT
1950 if (sock->file->f_flags & O_NONBLOCK)
1951 flags |= MSG_DONTWAIT;
1952 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1953 if (err < 0)
1954 goto out_freeiov;
1955 len = err;
1956
1957 if (uaddr != NULL) {
89bddce5
SH
1958 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1959 uaddr_len);
1da177e4
LT
1960 if (err < 0)
1961 goto out_freeiov;
1962 }
37f7f421
DM
1963 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1964 COMPAT_FLAGS(msg));
1da177e4
LT
1965 if (err)
1966 goto out_freeiov;
1967 if (MSG_CMSG_COMPAT & flags)
89bddce5 1968 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1969 &msg_compat->msg_controllen);
1970 else
89bddce5 1971 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1972 &msg->msg_controllen);
1973 if (err)
1974 goto out_freeiov;
1975 err = len;
1976
1977out_freeiov:
1978 if (iov != iovstack)
1979 sock_kfree_s(sock->sk, iov, iov_size);
1980out_put:
6cb153ca 1981 fput_light(sock->file, fput_needed);
1da177e4
LT
1982out:
1983 return err;
1984}
1985
1986#ifdef __ARCH_WANT_SYS_SOCKETCALL
1987
1988/* Argument list sizes for sys_socketcall */
1989#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1990static const unsigned char nargs[18]={
1991 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1992 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1993 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1994};
1995
1da177e4
LT
1996#undef AL
1997
1998/*
89bddce5 1999 * System call vectors.
1da177e4
LT
2000 *
2001 * Argument checking cleaned up. Saved 20% in size.
2002 * This function doesn't need to set the kernel lock because
89bddce5 2003 * it is set by the callees.
1da177e4
LT
2004 */
2005
2006asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2007{
2008 unsigned long a[6];
89bddce5 2009 unsigned long a0, a1;
1da177e4
LT
2010 int err;
2011
89bddce5 2012 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2013 return -EINVAL;
2014
2015 /* copy_from_user should be SMP safe. */
2016 if (copy_from_user(a, args, nargs[call]))
2017 return -EFAULT;
3ec3b2fb 2018
89bddce5 2019 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2020 if (err)
2021 return err;
2022
89bddce5
SH
2023 a0 = a[0];
2024 a1 = a[1];
2025
2026 switch (call) {
2027 case SYS_SOCKET:
2028 err = sys_socket(a0, a1, a[2]);
2029 break;
2030 case SYS_BIND:
2031 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2032 break;
2033 case SYS_CONNECT:
2034 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2035 break;
2036 case SYS_LISTEN:
2037 err = sys_listen(a0, a1);
2038 break;
2039 case SYS_ACCEPT:
2040 err =
2041 sys_accept(a0, (struct sockaddr __user *)a1,
2042 (int __user *)a[2]);
2043 break;
2044 case SYS_GETSOCKNAME:
2045 err =
2046 sys_getsockname(a0, (struct sockaddr __user *)a1,
2047 (int __user *)a[2]);
2048 break;
2049 case SYS_GETPEERNAME:
2050 err =
2051 sys_getpeername(a0, (struct sockaddr __user *)a1,
2052 (int __user *)a[2]);
2053 break;
2054 case SYS_SOCKETPAIR:
2055 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2056 break;
2057 case SYS_SEND:
2058 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2059 break;
2060 case SYS_SENDTO:
2061 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2062 (struct sockaddr __user *)a[4], a[5]);
2063 break;
2064 case SYS_RECV:
2065 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2066 break;
2067 case SYS_RECVFROM:
2068 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2069 (struct sockaddr __user *)a[4],
2070 (int __user *)a[5]);
2071 break;
2072 case SYS_SHUTDOWN:
2073 err = sys_shutdown(a0, a1);
2074 break;
2075 case SYS_SETSOCKOPT:
2076 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2077 break;
2078 case SYS_GETSOCKOPT:
2079 err =
2080 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2081 (int __user *)a[4]);
2082 break;
2083 case SYS_SENDMSG:
2084 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2085 break;
2086 case SYS_RECVMSG:
2087 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2088 break;
2089 default:
2090 err = -EINVAL;
2091 break;
1da177e4
LT
2092 }
2093 return err;
2094}
2095
89bddce5 2096#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2097
55737fda
SH
2098/**
2099 * sock_register - add a socket protocol handler
2100 * @ops: description of protocol
2101 *
1da177e4
LT
2102 * This function is called by a protocol handler that wants to
2103 * advertise its address family, and have it linked into the
55737fda
SH
2104 * socket interface. The value ops->family coresponds to the
2105 * socket system call protocol family.
1da177e4 2106 */
f0fd27d4 2107int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2108{
2109 int err;
2110
2111 if (ops->family >= NPROTO) {
89bddce5
SH
2112 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2113 NPROTO);
1da177e4
LT
2114 return -ENOBUFS;
2115 }
55737fda
SH
2116
2117 spin_lock(&net_family_lock);
2118 if (net_families[ops->family])
2119 err = -EEXIST;
2120 else {
89bddce5 2121 net_families[ops->family] = ops;
1da177e4
LT
2122 err = 0;
2123 }
55737fda
SH
2124 spin_unlock(&net_family_lock);
2125
89bddce5 2126 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2127 return err;
2128}
2129
55737fda
SH
2130/**
2131 * sock_unregister - remove a protocol handler
2132 * @family: protocol family to remove
2133 *
1da177e4
LT
2134 * This function is called by a protocol handler that wants to
2135 * remove its address family, and have it unlinked from the
55737fda
SH
2136 * new socket creation.
2137 *
2138 * If protocol handler is a module, then it can use module reference
2139 * counts to protect against new references. If protocol handler is not
2140 * a module then it needs to provide its own protection in
2141 * the ops->create routine.
1da177e4 2142 */
f0fd27d4 2143void sock_unregister(int family)
1da177e4 2144{
f0fd27d4 2145 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2146
55737fda 2147 spin_lock(&net_family_lock);
89bddce5 2148 net_families[family] = NULL;
55737fda
SH
2149 spin_unlock(&net_family_lock);
2150
2151 synchronize_rcu();
2152
89bddce5 2153 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2154}
2155
77d76ea3 2156static int __init sock_init(void)
1da177e4
LT
2157{
2158 /*
89bddce5 2159 * Initialize sock SLAB cache.
1da177e4 2160 */
89bddce5 2161
1da177e4
LT
2162 sk_init();
2163
1da177e4 2164 /*
89bddce5 2165 * Initialize skbuff SLAB cache
1da177e4
LT
2166 */
2167 skb_init();
1da177e4
LT
2168
2169 /*
89bddce5 2170 * Initialize the protocols module.
1da177e4
LT
2171 */
2172
2173 init_inodecache();
2174 register_filesystem(&sock_fs_type);
2175 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2176
2177 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2178 */
2179
2180#ifdef CONFIG_NETFILTER
2181 netfilter_init();
2182#endif
cbeb321a
DM
2183
2184 return 0;
1da177e4
LT
2185}
2186
77d76ea3
AK
2187core_initcall(sock_init); /* early initcall */
2188
1da177e4
LT
2189#ifdef CONFIG_PROC_FS
2190void socket_seq_show(struct seq_file *seq)
2191{
2192 int cpu;
2193 int counter = 0;
2194
6f912042 2195 for_each_possible_cpu(cpu)
89bddce5 2196 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2197
2198 /* It can be negative, by the way. 8) */
2199 if (counter < 0)
2200 counter = 0;
2201
2202 seq_printf(seq, "sockets: used %d\n", counter);
2203}
89bddce5 2204#endif /* CONFIG_PROC_FS */
1da177e4 2205
89bbfc95
SP
2206#ifdef CONFIG_COMPAT
2207static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2208 unsigned long arg)
89bbfc95
SP
2209{
2210 struct socket *sock = file->private_data;
2211 int ret = -ENOIOCTLCMD;
2212
2213 if (sock->ops->compat_ioctl)
2214 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2215
2216 return ret;
2217}
2218#endif
2219
ac5a488e
SS
2220int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2221{
2222 return sock->ops->bind(sock, addr, addrlen);
2223}
2224
2225int kernel_listen(struct socket *sock, int backlog)
2226{
2227 return sock->ops->listen(sock, backlog);
2228}
2229
2230int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2231{
2232 struct sock *sk = sock->sk;
2233 int err;
2234
2235 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2236 newsock);
2237 if (err < 0)
2238 goto done;
2239
2240 err = sock->ops->accept(sock, *newsock, flags);
2241 if (err < 0) {
2242 sock_release(*newsock);
fa8705b0 2243 *newsock = NULL;
ac5a488e
SS
2244 goto done;
2245 }
2246
2247 (*newsock)->ops = sock->ops;
2248
2249done:
2250 return err;
2251}
2252
2253int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2254 int flags)
ac5a488e
SS
2255{
2256 return sock->ops->connect(sock, addr, addrlen, flags);
2257}
2258
2259int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2260 int *addrlen)
2261{
2262 return sock->ops->getname(sock, addr, addrlen, 0);
2263}
2264
2265int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2266 int *addrlen)
2267{
2268 return sock->ops->getname(sock, addr, addrlen, 1);
2269}
2270
2271int kernel_getsockopt(struct socket *sock, int level, int optname,
2272 char *optval, int *optlen)
2273{
2274 mm_segment_t oldfs = get_fs();
2275 int err;
2276
2277 set_fs(KERNEL_DS);
2278 if (level == SOL_SOCKET)
2279 err = sock_getsockopt(sock, level, optname, optval, optlen);
2280 else
2281 err = sock->ops->getsockopt(sock, level, optname, optval,
2282 optlen);
2283 set_fs(oldfs);
2284 return err;
2285}
2286
2287int kernel_setsockopt(struct socket *sock, int level, int optname,
2288 char *optval, int optlen)
2289{
2290 mm_segment_t oldfs = get_fs();
2291 int err;
2292
2293 set_fs(KERNEL_DS);
2294 if (level == SOL_SOCKET)
2295 err = sock_setsockopt(sock, level, optname, optval, optlen);
2296 else
2297 err = sock->ops->setsockopt(sock, level, optname, optval,
2298 optlen);
2299 set_fs(oldfs);
2300 return err;
2301}
2302
2303int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2304 size_t size, int flags)
2305{
2306 if (sock->ops->sendpage)
2307 return sock->ops->sendpage(sock, page, offset, size, flags);
2308
2309 return sock_no_sendpage(sock, page, offset, size, flags);
2310}
2311
2312int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2313{
2314 mm_segment_t oldfs = get_fs();
2315 int err;
2316
2317 set_fs(KERNEL_DS);
2318 err = sock->ops->ioctl(sock, cmd, arg);
2319 set_fs(oldfs);
2320
2321 return err;
2322}
2323
91cf45f0
TM
2324int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2325{
2326 return sock->ops->shutdown(sock, how);
2327}
2328
1da177e4
LT
2329/* ABI emulation layers need these two */
2330EXPORT_SYMBOL(move_addr_to_kernel);
2331EXPORT_SYMBOL(move_addr_to_user);
2332EXPORT_SYMBOL(sock_create);
2333EXPORT_SYMBOL(sock_create_kern);
2334EXPORT_SYMBOL(sock_create_lite);
2335EXPORT_SYMBOL(sock_map_fd);
2336EXPORT_SYMBOL(sock_recvmsg);
2337EXPORT_SYMBOL(sock_register);
2338EXPORT_SYMBOL(sock_release);
2339EXPORT_SYMBOL(sock_sendmsg);
2340EXPORT_SYMBOL(sock_unregister);
2341EXPORT_SYMBOL(sock_wake_async);
2342EXPORT_SYMBOL(sockfd_lookup);
2343EXPORT_SYMBOL(kernel_sendmsg);
2344EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2345EXPORT_SYMBOL(kernel_bind);
2346EXPORT_SYMBOL(kernel_listen);
2347EXPORT_SYMBOL(kernel_accept);
2348EXPORT_SYMBOL(kernel_connect);
2349EXPORT_SYMBOL(kernel_getsockname);
2350EXPORT_SYMBOL(kernel_getpeername);
2351EXPORT_SYMBOL(kernel_getsockopt);
2352EXPORT_SYMBOL(kernel_setsockopt);
2353EXPORT_SYMBOL(kernel_sendpage);
2354EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2355EXPORT_SYMBOL(kernel_sock_shutdown);