[PATCH] Vectorize aio_read/aio_write fileop methods
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
80#include <linux/divert.h>
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4
LT
112static int sock_fasync(int fd, struct file *filp, int on);
113static ssize_t sock_readv(struct file *file, const struct iovec *vector,
114 unsigned long count, loff_t *ppos);
115static ssize_t sock_writev(struct file *file, const struct iovec *vector,
89bddce5 116 unsigned long count, loff_t *ppos);
1da177e4
LT
117static ssize_t sock_sendpage(struct file *file, struct page *page,
118 int offset, size_t size, loff_t *ppos, int more);
119
1da177e4
LT
120/*
121 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
122 * in the operation structures but are done directly via the socketcall() multiplexor.
123 */
124
125static struct file_operations socket_file_ops = {
126 .owner = THIS_MODULE,
127 .llseek = no_llseek,
128 .aio_read = sock_aio_read,
129 .aio_write = sock_aio_write,
130 .poll = sock_poll,
131 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133 .compat_ioctl = compat_sock_ioctl,
134#endif
1da177e4
LT
135 .mmap = sock_mmap,
136 .open = sock_no_open, /* special open code to disallow open via /proc */
137 .release = sock_close,
138 .fasync = sock_fasync,
139 .readv = sock_readv,
140 .writev = sock_writev,
5274f052
JA
141 .sendpage = sock_sendpage,
142 .splice_write = generic_splice_sendpage,
1da177e4
LT
143};
144
145/*
146 * The protocol list. Each protocol is registered in here.
147 */
148
1da177e4 149static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 150static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 151
1da177e4
LT
152/*
153 * Statistics counters of the socket lists
154 */
155
156static DEFINE_PER_CPU(int, sockets_in_use) = 0;
157
158/*
89bddce5
SH
159 * Support routines.
160 * Move socket addresses back and forth across the kernel/user
161 * divide and look after the messy bits.
1da177e4
LT
162 */
163
89bddce5 164#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
165 16 for IP, 16 for IPX,
166 24 for IPv6,
89bddce5 167 about 80 for AX.25
1da177e4
LT
168 must be at least one bigger than
169 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 170 :unix_mkname()).
1da177e4 171 */
89bddce5 172
1da177e4
LT
173/**
174 * move_addr_to_kernel - copy a socket address into kernel space
175 * @uaddr: Address in user space
176 * @kaddr: Address in kernel space
177 * @ulen: Length in user space
178 *
179 * The address is copied into kernel space. If the provided address is
180 * too long an error code of -EINVAL is returned. If the copy gives
181 * invalid addresses -EFAULT is returned. On a success 0 is returned.
182 */
183
184int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
185{
89bddce5 186 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 187 return -EINVAL;
89bddce5 188 if (ulen == 0)
1da177e4 189 return 0;
89bddce5 190 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 191 return -EFAULT;
3ec3b2fb 192 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
193}
194
195/**
196 * move_addr_to_user - copy an address to user space
197 * @kaddr: kernel space address
198 * @klen: length of address in kernel
199 * @uaddr: user space address
200 * @ulen: pointer to user length field
201 *
202 * The value pointed to by ulen on entry is the buffer length available.
203 * This is overwritten with the buffer space used. -EINVAL is returned
204 * if an overlong buffer is specified or a negative buffer size. -EFAULT
205 * is returned if either the buffer or the length field are not
206 * accessible.
207 * After copying the data up to the limit the user specifies, the true
208 * length of the data is written over the length limit the user
209 * specified. Zero is returned for a success.
210 */
89bddce5
SH
211
212int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
213 int __user *ulen)
1da177e4
LT
214{
215 int err;
216 int len;
217
89bddce5
SH
218 err = get_user(len, ulen);
219 if (err)
1da177e4 220 return err;
89bddce5
SH
221 if (len > klen)
222 len = klen;
223 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 224 return -EINVAL;
89bddce5 225 if (len) {
d6fe3945
SG
226 if (audit_sockaddr(klen, kaddr))
227 return -ENOMEM;
89bddce5 228 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
229 return -EFAULT;
230 }
231 /*
89bddce5
SH
232 * "fromlen shall refer to the value before truncation.."
233 * 1003.1g
1da177e4
LT
234 */
235 return __put_user(klen, ulen);
236}
237
238#define SOCKFS_MAGIC 0x534F434B
239
89bddce5 240static kmem_cache_t *sock_inode_cachep __read_mostly;
1da177e4
LT
241
242static struct inode *sock_alloc_inode(struct super_block *sb)
243{
244 struct socket_alloc *ei;
89bddce5
SH
245
246 ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
1da177e4
LT
247 if (!ei)
248 return NULL;
249 init_waitqueue_head(&ei->socket.wait);
89bddce5 250
1da177e4
LT
251 ei->socket.fasync_list = NULL;
252 ei->socket.state = SS_UNCONNECTED;
253 ei->socket.flags = 0;
254 ei->socket.ops = NULL;
255 ei->socket.sk = NULL;
256 ei->socket.file = NULL;
1da177e4
LT
257
258 return &ei->vfs_inode;
259}
260
261static void sock_destroy_inode(struct inode *inode)
262{
263 kmem_cache_free(sock_inode_cachep,
264 container_of(inode, struct socket_alloc, vfs_inode));
265}
266
89bddce5 267static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1da177e4 268{
89bddce5 269 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 270
89bddce5
SH
271 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
272 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
273 inode_init_once(&ei->vfs_inode);
274}
89bddce5 275
1da177e4
LT
276static int init_inodecache(void)
277{
278 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
279 sizeof(struct socket_alloc),
280 0,
281 (SLAB_HWCACHE_ALIGN |
282 SLAB_RECLAIM_ACCOUNT |
283 SLAB_MEM_SPREAD),
284 init_once,
285 NULL);
1da177e4
LT
286 if (sock_inode_cachep == NULL)
287 return -ENOMEM;
288 return 0;
289}
290
291static struct super_operations sockfs_ops = {
292 .alloc_inode = sock_alloc_inode,
293 .destroy_inode =sock_destroy_inode,
294 .statfs = simple_statfs,
295};
296
454e2398 297static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
298 int flags, const char *dev_name, void *data,
299 struct vfsmount *mnt)
1da177e4 300{
454e2398
DH
301 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
302 mnt);
1da177e4
LT
303}
304
ba89966c 305static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
306
307static struct file_system_type sock_fs_type = {
308 .name = "sockfs",
309 .get_sb = sockfs_get_sb,
310 .kill_sb = kill_anon_super,
311};
89bddce5 312
1da177e4
LT
313static int sockfs_delete_dentry(struct dentry *dentry)
314{
315 return 1;
316}
317static struct dentry_operations sockfs_dentry_operations = {
89bddce5 318 .d_delete = sockfs_delete_dentry,
1da177e4
LT
319};
320
321/*
322 * Obtains the first available file descriptor and sets it up for use.
323 *
39d8c1b6
DM
324 * These functions create file structures and maps them to fd space
325 * of the current process. On success it returns file descriptor
1da177e4
LT
326 * and file struct implicitly stored in sock->file.
327 * Note that another thread may close file descriptor before we return
328 * from this function. We use the fact that now we do not refer
329 * to socket after mapping. If one day we will need it, this
330 * function will increment ref. count on file by 1.
331 *
332 * In any case returned fd MAY BE not valid!
333 * This race condition is unavoidable
334 * with shared fd spaces, we cannot solve it inside kernel,
335 * but we take care of internal coherence yet.
336 */
337
39d8c1b6 338static int sock_alloc_fd(struct file **filep)
1da177e4
LT
339{
340 int fd;
1da177e4
LT
341
342 fd = get_unused_fd();
39d8c1b6 343 if (likely(fd >= 0)) {
1da177e4
LT
344 struct file *file = get_empty_filp();
345
39d8c1b6
DM
346 *filep = file;
347 if (unlikely(!file)) {
1da177e4 348 put_unused_fd(fd);
39d8c1b6 349 return -ENFILE;
1da177e4 350 }
39d8c1b6
DM
351 } else
352 *filep = NULL;
353 return fd;
354}
1da177e4 355
39d8c1b6
DM
356static int sock_attach_fd(struct socket *sock, struct file *file)
357{
358 struct qstr this;
359 char name[32];
360
361 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
362 this.name = name;
363 this.hash = SOCK_INODE(sock)->i_ino;
364
365 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
366 if (unlikely(!file->f_dentry))
367 return -ENOMEM;
368
369 file->f_dentry->d_op = &sockfs_dentry_operations;
370 d_add(file->f_dentry, SOCK_INODE(sock));
371 file->f_vfsmnt = mntget(sock_mnt);
372 file->f_mapping = file->f_dentry->d_inode->i_mapping;
373
374 sock->file = file;
375 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
376 file->f_mode = FMODE_READ | FMODE_WRITE;
377 file->f_flags = O_RDWR;
378 file->f_pos = 0;
379 file->private_data = sock;
1da177e4 380
39d8c1b6
DM
381 return 0;
382}
383
384int sock_map_fd(struct socket *sock)
385{
386 struct file *newfile;
387 int fd = sock_alloc_fd(&newfile);
388
389 if (likely(fd >= 0)) {
390 int err = sock_attach_fd(sock, newfile);
391
392 if (unlikely(err < 0)) {
393 put_filp(newfile);
1da177e4 394 put_unused_fd(fd);
39d8c1b6 395 return err;
1da177e4 396 }
39d8c1b6 397 fd_install(fd, newfile);
1da177e4 398 }
1da177e4
LT
399 return fd;
400}
401
6cb153ca
BL
402static struct socket *sock_from_file(struct file *file, int *err)
403{
404 struct inode *inode;
405 struct socket *sock;
406
407 if (file->f_op == &socket_file_ops)
408 return file->private_data; /* set in sock_map_fd */
409
410 inode = file->f_dentry->d_inode;
411 if (!S_ISSOCK(inode->i_mode)) {
412 *err = -ENOTSOCK;
413 return NULL;
414 }
415
416 sock = SOCKET_I(inode);
417 if (sock->file != file) {
418 printk(KERN_ERR "socki_lookup: socket file changed!\n");
419 sock->file = file;
420 }
421 return sock;
422}
423
1da177e4
LT
424/**
425 * sockfd_lookup - Go from a file number to its socket slot
426 * @fd: file handle
427 * @err: pointer to an error code return
428 *
429 * The file handle passed in is locked and the socket it is bound
430 * too is returned. If an error occurs the err pointer is overwritten
431 * with a negative errno code and NULL is returned. The function checks
432 * for both invalid handles and passing a handle which is not a socket.
433 *
434 * On a success the socket object pointer is returned.
435 */
436
437struct socket *sockfd_lookup(int fd, int *err)
438{
439 struct file *file;
1da177e4
LT
440 struct socket *sock;
441
89bddce5
SH
442 file = fget(fd);
443 if (!file) {
1da177e4
LT
444 *err = -EBADF;
445 return NULL;
446 }
89bddce5 447
6cb153ca
BL
448 sock = sock_from_file(file, err);
449 if (!sock)
1da177e4 450 fput(file);
6cb153ca
BL
451 return sock;
452}
1da177e4 453
6cb153ca
BL
454static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
455{
456 struct file *file;
457 struct socket *sock;
458
3672558c 459 *err = -EBADF;
6cb153ca
BL
460 file = fget_light(fd, fput_needed);
461 if (file) {
462 sock = sock_from_file(file, err);
463 if (sock)
464 return sock;
465 fput_light(file, *fput_needed);
1da177e4 466 }
6cb153ca 467 return NULL;
1da177e4
LT
468}
469
470/**
471 * sock_alloc - allocate a socket
89bddce5 472 *
1da177e4
LT
473 * Allocate a new inode and socket object. The two are bound together
474 * and initialised. The socket is then returned. If we are out of inodes
475 * NULL is returned.
476 */
477
478static struct socket *sock_alloc(void)
479{
89bddce5
SH
480 struct inode *inode;
481 struct socket *sock;
1da177e4
LT
482
483 inode = new_inode(sock_mnt->mnt_sb);
484 if (!inode)
485 return NULL;
486
487 sock = SOCKET_I(inode);
488
89bddce5 489 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
490 inode->i_uid = current->fsuid;
491 inode->i_gid = current->fsgid;
492
493 get_cpu_var(sockets_in_use)++;
494 put_cpu_var(sockets_in_use);
495 return sock;
496}
497
498/*
499 * In theory you can't get an open on this inode, but /proc provides
500 * a back door. Remember to keep it shut otherwise you'll let the
501 * creepy crawlies in.
502 */
89bddce5 503
1da177e4
LT
504static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
505{
506 return -ENXIO;
507}
508
4b6f5d20 509const struct file_operations bad_sock_fops = {
1da177e4
LT
510 .owner = THIS_MODULE,
511 .open = sock_no_open,
512};
513
514/**
515 * sock_release - close a socket
516 * @sock: socket to close
517 *
518 * The socket is released from the protocol stack if it has a release
519 * callback, and the inode is then released if the socket is bound to
89bddce5 520 * an inode not a file.
1da177e4 521 */
89bddce5 522
1da177e4
LT
523void sock_release(struct socket *sock)
524{
525 if (sock->ops) {
526 struct module *owner = sock->ops->owner;
527
528 sock->ops->release(sock);
529 sock->ops = NULL;
530 module_put(owner);
531 }
532
533 if (sock->fasync_list)
534 printk(KERN_ERR "sock_release: fasync list not empty!\n");
535
536 get_cpu_var(sockets_in_use)--;
537 put_cpu_var(sockets_in_use);
538 if (!sock->file) {
539 iput(SOCK_INODE(sock));
540 return;
541 }
89bddce5 542 sock->file = NULL;
1da177e4
LT
543}
544
89bddce5 545static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
546 struct msghdr *msg, size_t size)
547{
548 struct sock_iocb *si = kiocb_to_siocb(iocb);
549 int err;
550
551 si->sock = sock;
552 si->scm = NULL;
553 si->msg = msg;
554 si->size = size;
555
556 err = security_socket_sendmsg(sock, msg, size);
557 if (err)
558 return err;
559
560 return sock->ops->sendmsg(iocb, sock, msg, size);
561}
562
563int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
564{
565 struct kiocb iocb;
566 struct sock_iocb siocb;
567 int ret;
568
569 init_sync_kiocb(&iocb, NULL);
570 iocb.private = &siocb;
571 ret = __sock_sendmsg(&iocb, sock, msg, size);
572 if (-EIOCBQUEUED == ret)
573 ret = wait_on_sync_kiocb(&iocb);
574 return ret;
575}
576
577int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
578 struct kvec *vec, size_t num, size_t size)
579{
580 mm_segment_t oldfs = get_fs();
581 int result;
582
583 set_fs(KERNEL_DS);
584 /*
585 * the following is safe, since for compiler definitions of kvec and
586 * iovec are identical, yielding the same in-core layout and alignment
587 */
89bddce5 588 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
589 msg->msg_iovlen = num;
590 result = sock_sendmsg(sock, msg, size);
591 set_fs(oldfs);
592 return result;
593}
594
89bddce5 595static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
596 struct msghdr *msg, size_t size, int flags)
597{
598 int err;
599 struct sock_iocb *si = kiocb_to_siocb(iocb);
600
601 si->sock = sock;
602 si->scm = NULL;
603 si->msg = msg;
604 si->size = size;
605 si->flags = flags;
606
607 err = security_socket_recvmsg(sock, msg, size, flags);
608 if (err)
609 return err;
610
611 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
612}
613
89bddce5 614int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
615 size_t size, int flags)
616{
617 struct kiocb iocb;
618 struct sock_iocb siocb;
619 int ret;
620
89bddce5 621 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
622 iocb.private = &siocb;
623 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
624 if (-EIOCBQUEUED == ret)
625 ret = wait_on_sync_kiocb(&iocb);
626 return ret;
627}
628
89bddce5
SH
629int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
630 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
631{
632 mm_segment_t oldfs = get_fs();
633 int result;
634
635 set_fs(KERNEL_DS);
636 /*
637 * the following is safe, since for compiler definitions of kvec and
638 * iovec are identical, yielding the same in-core layout and alignment
639 */
89bddce5 640 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
641 result = sock_recvmsg(sock, msg, size, flags);
642 set_fs(oldfs);
643 return result;
644}
645
646static void sock_aio_dtor(struct kiocb *iocb)
647{
648 kfree(iocb->private);
649}
650
ce1d4d3e
CH
651static ssize_t sock_sendpage(struct file *file, struct page *page,
652 int offset, size_t size, loff_t *ppos, int more)
1da177e4 653{
1da177e4
LT
654 struct socket *sock;
655 int flags;
656
ce1d4d3e
CH
657 sock = file->private_data;
658
659 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
660 if (more)
661 flags |= MSG_MORE;
662
663 return sock->ops->sendpage(sock, page, offset, size, flags);
664}
1da177e4 665
ce1d4d3e 666static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 667 struct sock_iocb *siocb)
ce1d4d3e
CH
668{
669 if (!is_sync_kiocb(iocb)) {
670 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
671 if (!siocb)
672 return NULL;
1da177e4
LT
673 iocb->ki_dtor = sock_aio_dtor;
674 }
1da177e4 675
ce1d4d3e 676 siocb->kiocb = iocb;
ce1d4d3e
CH
677 iocb->private = siocb;
678 return siocb;
1da177e4
LT
679}
680
ce1d4d3e 681static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
682 struct file *file, const struct iovec *iov,
683 unsigned long nr_segs)
ce1d4d3e
CH
684{
685 struct socket *sock = file->private_data;
686 size_t size = 0;
687 int i;
1da177e4 688
89bddce5
SH
689 for (i = 0; i < nr_segs; i++)
690 size += iov[i].iov_len;
1da177e4 691
ce1d4d3e
CH
692 msg->msg_name = NULL;
693 msg->msg_namelen = 0;
694 msg->msg_control = NULL;
695 msg->msg_controllen = 0;
89bddce5 696 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
697 msg->msg_iovlen = nr_segs;
698 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
699
700 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
701}
702
703static ssize_t sock_readv(struct file *file, const struct iovec *iov,
704 unsigned long nr_segs, loff_t *ppos)
1da177e4 705{
ce1d4d3e
CH
706 struct kiocb iocb;
707 struct sock_iocb siocb;
708 struct msghdr msg;
709 int ret;
710
89bddce5 711 init_sync_kiocb(&iocb, NULL);
ce1d4d3e
CH
712 iocb.private = &siocb;
713
027445c3 714 ret = do_sock_read(&msg, &iocb, file, iov, nr_segs);
ce1d4d3e
CH
715 if (-EIOCBQUEUED == ret)
716 ret = wait_on_sync_kiocb(&iocb);
717 return ret;
718}
719
027445c3
BP
720static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
721 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
722{
723 struct sock_iocb siocb, *x;
724
1da177e4
LT
725 if (pos != 0)
726 return -ESPIPE;
027445c3
BP
727
728 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
729 return 0;
730
027445c3
BP
731
732 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
733 if (!x)
734 return -ENOMEM;
027445c3 735 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
736}
737
ce1d4d3e 738static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
739 struct file *file, const struct iovec *iov,
740 unsigned long nr_segs)
1da177e4 741{
ce1d4d3e
CH
742 struct socket *sock = file->private_data;
743 size_t size = 0;
744 int i;
1da177e4 745
89bddce5
SH
746 for (i = 0; i < nr_segs; i++)
747 size += iov[i].iov_len;
1da177e4 748
ce1d4d3e
CH
749 msg->msg_name = NULL;
750 msg->msg_namelen = 0;
751 msg->msg_control = NULL;
752 msg->msg_controllen = 0;
89bddce5 753 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
754 msg->msg_iovlen = nr_segs;
755 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
756 if (sock->type == SOCK_SEQPACKET)
757 msg->msg_flags |= MSG_EOR;
1da177e4 758
ce1d4d3e 759 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
760}
761
ce1d4d3e
CH
762static ssize_t sock_writev(struct file *file, const struct iovec *iov,
763 unsigned long nr_segs, loff_t *ppos)
1da177e4
LT
764{
765 struct msghdr msg;
ce1d4d3e
CH
766 struct kiocb iocb;
767 struct sock_iocb siocb;
768 int ret;
1da177e4 769
ce1d4d3e
CH
770 init_sync_kiocb(&iocb, NULL);
771 iocb.private = &siocb;
1da177e4 772
027445c3 773 ret = do_sock_write(&msg, &iocb, file, iov, nr_segs);
ce1d4d3e
CH
774 if (-EIOCBQUEUED == ret)
775 ret = wait_on_sync_kiocb(&iocb);
776 return ret;
777}
1da177e4 778
027445c3
BP
779static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
780 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
781{
782 struct sock_iocb siocb, *x;
1da177e4 783
ce1d4d3e
CH
784 if (pos != 0)
785 return -ESPIPE;
027445c3
BP
786
787 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 788 return 0;
1da177e4 789
027445c3 790 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
791 if (!x)
792 return -ENOMEM;
1da177e4 793
027445c3 794 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
795}
796
1da177e4
LT
797/*
798 * Atomic setting of ioctl hooks to avoid race
799 * with module unload.
800 */
801
4a3e2f71 802static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 803static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 804
89bddce5 805void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 806{
4a3e2f71 807 mutex_lock(&br_ioctl_mutex);
1da177e4 808 br_ioctl_hook = hook;
4a3e2f71 809 mutex_unlock(&br_ioctl_mutex);
1da177e4 810}
89bddce5 811
1da177e4
LT
812EXPORT_SYMBOL(brioctl_set);
813
4a3e2f71 814static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 815static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 816
89bddce5 817void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 818{
4a3e2f71 819 mutex_lock(&vlan_ioctl_mutex);
1da177e4 820 vlan_ioctl_hook = hook;
4a3e2f71 821 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 822}
89bddce5 823
1da177e4
LT
824EXPORT_SYMBOL(vlan_ioctl_set);
825
4a3e2f71 826static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 827static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 828
89bddce5 829void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 830{
4a3e2f71 831 mutex_lock(&dlci_ioctl_mutex);
1da177e4 832 dlci_ioctl_hook = hook;
4a3e2f71 833 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 834}
89bddce5 835
1da177e4
LT
836EXPORT_SYMBOL(dlci_ioctl_set);
837
838/*
839 * With an ioctl, arg may well be a user mode pointer, but we don't know
840 * what to do with it - that's up to the protocol still.
841 */
842
843static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
844{
845 struct socket *sock;
846 void __user *argp = (void __user *)arg;
847 int pid, err;
848
b69aee04 849 sock = file->private_data;
1da177e4
LT
850 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
851 err = dev_ioctl(cmd, argp);
852 } else
d86b5e0e 853#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
854 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
855 err = dev_ioctl(cmd, argp);
856 } else
89bddce5
SH
857#endif /* CONFIG_WIRELESS_EXT */
858 switch (cmd) {
1da177e4
LT
859 case FIOSETOWN:
860 case SIOCSPGRP:
861 err = -EFAULT;
862 if (get_user(pid, (int __user *)argp))
863 break;
864 err = f_setown(sock->file, pid, 1);
865 break;
866 case FIOGETOWN:
867 case SIOCGPGRP:
89bddce5
SH
868 err = put_user(sock->file->f_owner.pid,
869 (int __user *)argp);
1da177e4
LT
870 break;
871 case SIOCGIFBR:
872 case SIOCSIFBR:
873 case SIOCBRADDBR:
874 case SIOCBRDELBR:
875 err = -ENOPKG;
876 if (!br_ioctl_hook)
877 request_module("bridge");
878
4a3e2f71 879 mutex_lock(&br_ioctl_mutex);
89bddce5 880 if (br_ioctl_hook)
1da177e4 881 err = br_ioctl_hook(cmd, argp);
4a3e2f71 882 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
883 break;
884 case SIOCGIFVLAN:
885 case SIOCSIFVLAN:
886 err = -ENOPKG;
887 if (!vlan_ioctl_hook)
888 request_module("8021q");
889
4a3e2f71 890 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
891 if (vlan_ioctl_hook)
892 err = vlan_ioctl_hook(argp);
4a3e2f71 893 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
894 break;
895 case SIOCGIFDIVERT:
896 case SIOCSIFDIVERT:
89bddce5 897 /* Convert this to call through a hook */
1da177e4
LT
898 err = divert_ioctl(cmd, argp);
899 break;
900 case SIOCADDDLCI:
901 case SIOCDELDLCI:
902 err = -ENOPKG;
903 if (!dlci_ioctl_hook)
904 request_module("dlci");
905
906 if (dlci_ioctl_hook) {
4a3e2f71 907 mutex_lock(&dlci_ioctl_mutex);
1da177e4 908 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 909 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
910 }
911 break;
912 default:
913 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
914
915 /*
916 * If this ioctl is unknown try to hand it down
917 * to the NIC driver.
918 */
919 if (err == -ENOIOCTLCMD)
920 err = dev_ioctl(cmd, argp);
1da177e4 921 break;
89bddce5 922 }
1da177e4
LT
923 return err;
924}
925
926int sock_create_lite(int family, int type, int protocol, struct socket **res)
927{
928 int err;
929 struct socket *sock = NULL;
89bddce5 930
1da177e4
LT
931 err = security_socket_create(family, type, protocol, 1);
932 if (err)
933 goto out;
934
935 sock = sock_alloc();
936 if (!sock) {
937 err = -ENOMEM;
938 goto out;
939 }
940
1da177e4 941 sock->type = type;
7420ed23
VY
942 err = security_socket_post_create(sock, family, type, protocol, 1);
943 if (err)
944 goto out_release;
945
1da177e4
LT
946out:
947 *res = sock;
948 return err;
7420ed23
VY
949out_release:
950 sock_release(sock);
951 sock = NULL;
952 goto out;
1da177e4
LT
953}
954
955/* No kernel lock held - perfect */
89bddce5 956static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
957{
958 struct socket *sock;
959
960 /*
89bddce5 961 * We can't return errors to poll, so it's either yes or no.
1da177e4 962 */
b69aee04 963 sock = file->private_data;
1da177e4
LT
964 return sock->ops->poll(file, sock, wait);
965}
966
89bddce5 967static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 968{
b69aee04 969 struct socket *sock = file->private_data;
1da177e4
LT
970
971 return sock->ops->mmap(file, sock, vma);
972}
973
20380731 974static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
975{
976 /*
89bddce5
SH
977 * It was possible the inode is NULL we were
978 * closing an unfinished socket.
1da177e4
LT
979 */
980
89bddce5 981 if (!inode) {
1da177e4
LT
982 printk(KERN_DEBUG "sock_close: NULL inode\n");
983 return 0;
984 }
985 sock_fasync(-1, filp, 0);
986 sock_release(SOCKET_I(inode));
987 return 0;
988}
989
990/*
991 * Update the socket async list
992 *
993 * Fasync_list locking strategy.
994 *
995 * 1. fasync_list is modified only under process context socket lock
996 * i.e. under semaphore.
997 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
998 * or under socket lock.
999 * 3. fasync_list can be used from softirq context, so that
1000 * modification under socket lock have to be enhanced with
1001 * write_lock_bh(&sk->sk_callback_lock).
1002 * --ANK (990710)
1003 */
1004
1005static int sock_fasync(int fd, struct file *filp, int on)
1006{
89bddce5 1007 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1008 struct socket *sock;
1009 struct sock *sk;
1010
89bddce5 1011 if (on) {
8b3a7005 1012 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1013 if (fna == NULL)
1da177e4
LT
1014 return -ENOMEM;
1015 }
1016
b69aee04 1017 sock = filp->private_data;
1da177e4 1018
89bddce5
SH
1019 sk = sock->sk;
1020 if (sk == NULL) {
1da177e4
LT
1021 kfree(fna);
1022 return -EINVAL;
1023 }
1024
1025 lock_sock(sk);
1026
89bddce5 1027 prev = &(sock->fasync_list);
1da177e4 1028
89bddce5
SH
1029 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1030 if (fa->fa_file == filp)
1da177e4
LT
1031 break;
1032
89bddce5
SH
1033 if (on) {
1034 if (fa != NULL) {
1da177e4 1035 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1036 fa->fa_fd = fd;
1da177e4
LT
1037 write_unlock_bh(&sk->sk_callback_lock);
1038
1039 kfree(fna);
1040 goto out;
1041 }
89bddce5
SH
1042 fna->fa_file = filp;
1043 fna->fa_fd = fd;
1044 fna->magic = FASYNC_MAGIC;
1045 fna->fa_next = sock->fasync_list;
1da177e4 1046 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1047 sock->fasync_list = fna;
1da177e4 1048 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1049 } else {
1050 if (fa != NULL) {
1da177e4 1051 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1052 *prev = fa->fa_next;
1da177e4
LT
1053 write_unlock_bh(&sk->sk_callback_lock);
1054 kfree(fa);
1055 }
1056 }
1057
1058out:
1059 release_sock(sock->sk);
1060 return 0;
1061}
1062
1063/* This function may be called only under socket lock or callback_lock */
1064
1065int sock_wake_async(struct socket *sock, int how, int band)
1066{
1067 if (!sock || !sock->fasync_list)
1068 return -1;
89bddce5 1069 switch (how) {
1da177e4 1070 case 1:
89bddce5 1071
1da177e4
LT
1072 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1073 break;
1074 goto call_kill;
1075 case 2:
1076 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1077 break;
1078 /* fall through */
1079 case 0:
89bddce5 1080call_kill:
1da177e4
LT
1081 __kill_fasync(sock->fasync_list, SIGIO, band);
1082 break;
1083 case 3:
1084 __kill_fasync(sock->fasync_list, SIGURG, band);
1085 }
1086 return 0;
1087}
1088
89bddce5
SH
1089static int __sock_create(int family, int type, int protocol,
1090 struct socket **res, int kern)
1da177e4
LT
1091{
1092 int err;
1093 struct socket *sock;
55737fda 1094 const struct net_proto_family *pf;
1da177e4
LT
1095
1096 /*
89bddce5 1097 * Check protocol is in range
1da177e4
LT
1098 */
1099 if (family < 0 || family >= NPROTO)
1100 return -EAFNOSUPPORT;
1101 if (type < 0 || type >= SOCK_MAX)
1102 return -EINVAL;
1103
1104 /* Compatibility.
1105
1106 This uglymoron is moved from INET layer to here to avoid
1107 deadlock in module load.
1108 */
1109 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1110 static int warned;
1da177e4
LT
1111 if (!warned) {
1112 warned = 1;
89bddce5
SH
1113 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1114 current->comm);
1da177e4
LT
1115 }
1116 family = PF_PACKET;
1117 }
1118
1119 err = security_socket_create(family, type, protocol, kern);
1120 if (err)
1121 return err;
89bddce5 1122
55737fda
SH
1123 /*
1124 * Allocate the socket and allow the family to set things up. if
1125 * the protocol is 0, the family is instructed to select an appropriate
1126 * default.
1127 */
1128 sock = sock_alloc();
1129 if (!sock) {
1130 if (net_ratelimit())
1131 printk(KERN_WARNING "socket: no more sockets\n");
1132 return -ENFILE; /* Not exactly a match, but its the
1133 closest posix thing */
1134 }
1135
1136 sock->type = type;
1137
1da177e4 1138#if defined(CONFIG_KMOD)
89bddce5
SH
1139 /* Attempt to load a protocol module if the find failed.
1140 *
1141 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1142 * requested real, full-featured networking support upon configuration.
1143 * Otherwise module support will break!
1144 */
55737fda 1145 if (net_families[family] == NULL)
89bddce5 1146 request_module("net-pf-%d", family);
1da177e4
LT
1147#endif
1148
55737fda
SH
1149 rcu_read_lock();
1150 pf = rcu_dereference(net_families[family]);
1151 err = -EAFNOSUPPORT;
1152 if (!pf)
1153 goto out_release;
1da177e4
LT
1154
1155 /*
1156 * We will call the ->create function, that possibly is in a loadable
1157 * module, so we have to bump that loadable module refcnt first.
1158 */
55737fda 1159 if (!try_module_get(pf->owner))
1da177e4
LT
1160 goto out_release;
1161
55737fda
SH
1162 /* Now protected by module ref count */
1163 rcu_read_unlock();
1164
1165 err = pf->create(sock, protocol);
1166 if (err < 0)
1da177e4 1167 goto out_module_put;
a79af59e 1168
1da177e4
LT
1169 /*
1170 * Now to bump the refcnt of the [loadable] module that owns this
1171 * socket at sock_release time we decrement its refcnt.
1172 */
55737fda
SH
1173 if (!try_module_get(sock->ops->owner))
1174 goto out_module_busy;
1175
1da177e4
LT
1176 /*
1177 * Now that we're done with the ->create function, the [loadable]
1178 * module can have its refcnt decremented
1179 */
55737fda 1180 module_put(pf->owner);
7420ed23
VY
1181 err = security_socket_post_create(sock, family, type, protocol, kern);
1182 if (err)
1183 goto out_release;
55737fda 1184 *res = sock;
1da177e4 1185
55737fda
SH
1186 return 0;
1187
1188out_module_busy:
1189 err = -EAFNOSUPPORT;
1da177e4 1190out_module_put:
55737fda
SH
1191 sock->ops = NULL;
1192 module_put(pf->owner);
1193out_sock_release:
1da177e4 1194 sock_release(sock);
55737fda
SH
1195 return err;
1196
1197out_release:
1198 rcu_read_unlock();
1199 goto out_sock_release;
1da177e4
LT
1200}
1201
1202int sock_create(int family, int type, int protocol, struct socket **res)
1203{
1204 return __sock_create(family, type, protocol, res, 0);
1205}
1206
1207int sock_create_kern(int family, int type, int protocol, struct socket **res)
1208{
1209 return __sock_create(family, type, protocol, res, 1);
1210}
1211
1212asmlinkage long sys_socket(int family, int type, int protocol)
1213{
1214 int retval;
1215 struct socket *sock;
1216
1217 retval = sock_create(family, type, protocol, &sock);
1218 if (retval < 0)
1219 goto out;
1220
1221 retval = sock_map_fd(sock);
1222 if (retval < 0)
1223 goto out_release;
1224
1225out:
1226 /* It may be already another descriptor 8) Not kernel problem. */
1227 return retval;
1228
1229out_release:
1230 sock_release(sock);
1231 return retval;
1232}
1233
1234/*
1235 * Create a pair of connected sockets.
1236 */
1237
89bddce5
SH
1238asmlinkage long sys_socketpair(int family, int type, int protocol,
1239 int __user *usockvec)
1da177e4
LT
1240{
1241 struct socket *sock1, *sock2;
1242 int fd1, fd2, err;
1243
1244 /*
1245 * Obtain the first socket and check if the underlying protocol
1246 * supports the socketpair call.
1247 */
1248
1249 err = sock_create(family, type, protocol, &sock1);
1250 if (err < 0)
1251 goto out;
1252
1253 err = sock_create(family, type, protocol, &sock2);
1254 if (err < 0)
1255 goto out_release_1;
1256
1257 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1258 if (err < 0)
1da177e4
LT
1259 goto out_release_both;
1260
1261 fd1 = fd2 = -1;
1262
1263 err = sock_map_fd(sock1);
1264 if (err < 0)
1265 goto out_release_both;
1266 fd1 = err;
1267
1268 err = sock_map_fd(sock2);
1269 if (err < 0)
1270 goto out_close_1;
1271 fd2 = err;
1272
1273 /* fd1 and fd2 may be already another descriptors.
1274 * Not kernel problem.
1275 */
1276
89bddce5 1277 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1278 if (!err)
1279 err = put_user(fd2, &usockvec[1]);
1280 if (!err)
1281 return 0;
1282
1283 sys_close(fd2);
1284 sys_close(fd1);
1285 return err;
1286
1287out_close_1:
89bddce5 1288 sock_release(sock2);
1da177e4
LT
1289 sys_close(fd1);
1290 return err;
1291
1292out_release_both:
89bddce5 1293 sock_release(sock2);
1da177e4 1294out_release_1:
89bddce5 1295 sock_release(sock1);
1da177e4
LT
1296out:
1297 return err;
1298}
1299
1da177e4
LT
1300/*
1301 * Bind a name to a socket. Nothing much to do here since it's
1302 * the protocol's responsibility to handle the local address.
1303 *
1304 * We move the socket address to kernel space before we call
1305 * the protocol layer (having also checked the address is ok).
1306 */
1307
1308asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1309{
1310 struct socket *sock;
1311 char address[MAX_SOCK_ADDR];
6cb153ca 1312 int err, fput_needed;
1da177e4 1313
89bddce5
SH
1314 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1315 if(sock) {
1316 err = move_addr_to_kernel(umyaddr, addrlen, address);
1317 if (err >= 0) {
1318 err = security_socket_bind(sock,
1319 (struct sockaddr *)address,
1320 addrlen);
6cb153ca
BL
1321 if (!err)
1322 err = sock->ops->bind(sock,
89bddce5
SH
1323 (struct sockaddr *)
1324 address, addrlen);
1da177e4 1325 }
6cb153ca 1326 fput_light(sock->file, fput_needed);
89bddce5 1327 }
1da177e4
LT
1328 return err;
1329}
1330
1da177e4
LT
1331/*
1332 * Perform a listen. Basically, we allow the protocol to do anything
1333 * necessary for a listen, and if that works, we mark the socket as
1334 * ready for listening.
1335 */
1336
7a42c217 1337int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1338
1339asmlinkage long sys_listen(int fd, int backlog)
1340{
1341 struct socket *sock;
6cb153ca 1342 int err, fput_needed;
89bddce5
SH
1343
1344 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1345 if (sock) {
1346 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1347 backlog = sysctl_somaxconn;
1348
1349 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1350 if (!err)
1351 err = sock->ops->listen(sock, backlog);
1da177e4 1352
6cb153ca 1353 fput_light(sock->file, fput_needed);
1da177e4
LT
1354 }
1355 return err;
1356}
1357
1da177e4
LT
1358/*
1359 * For accept, we attempt to create a new socket, set up the link
1360 * with the client, wake up the client, then return the new
1361 * connected fd. We collect the address of the connector in kernel
1362 * space and move it to user at the very end. This is unclean because
1363 * we open the socket then return an error.
1364 *
1365 * 1003.1g adds the ability to recvmsg() to query connection pending
1366 * status to recvmsg. We need to add that support in a way thats
1367 * clean when we restucture accept also.
1368 */
1369
89bddce5
SH
1370asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1371 int __user *upeer_addrlen)
1da177e4
LT
1372{
1373 struct socket *sock, *newsock;
39d8c1b6 1374 struct file *newfile;
6cb153ca 1375 int err, len, newfd, fput_needed;
1da177e4
LT
1376 char address[MAX_SOCK_ADDR];
1377
6cb153ca 1378 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1379 if (!sock)
1380 goto out;
1381
1382 err = -ENFILE;
89bddce5 1383 if (!(newsock = sock_alloc()))
1da177e4
LT
1384 goto out_put;
1385
1386 newsock->type = sock->type;
1387 newsock->ops = sock->ops;
1388
1da177e4
LT
1389 /*
1390 * We don't need try_module_get here, as the listening socket (sock)
1391 * has the protocol module (sock->ops->owner) held.
1392 */
1393 __module_get(newsock->ops->owner);
1394
39d8c1b6
DM
1395 newfd = sock_alloc_fd(&newfile);
1396 if (unlikely(newfd < 0)) {
1397 err = newfd;
9a1875e6
DM
1398 sock_release(newsock);
1399 goto out_put;
39d8c1b6
DM
1400 }
1401
1402 err = sock_attach_fd(newsock, newfile);
1403 if (err < 0)
1404 goto out_fd;
1405
a79af59e
FF
1406 err = security_socket_accept(sock, newsock);
1407 if (err)
39d8c1b6 1408 goto out_fd;
a79af59e 1409
1da177e4
LT
1410 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1411 if (err < 0)
39d8c1b6 1412 goto out_fd;
1da177e4
LT
1413
1414 if (upeer_sockaddr) {
89bddce5
SH
1415 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1416 &len, 2) < 0) {
1da177e4 1417 err = -ECONNABORTED;
39d8c1b6 1418 goto out_fd;
1da177e4 1419 }
89bddce5
SH
1420 err = move_addr_to_user(address, len, upeer_sockaddr,
1421 upeer_addrlen);
1da177e4 1422 if (err < 0)
39d8c1b6 1423 goto out_fd;
1da177e4
LT
1424 }
1425
1426 /* File flags are not inherited via accept() unlike another OSes. */
1427
39d8c1b6
DM
1428 fd_install(newfd, newfile);
1429 err = newfd;
1da177e4
LT
1430
1431 security_socket_post_accept(sock, newsock);
1432
1433out_put:
6cb153ca 1434 fput_light(sock->file, fput_needed);
1da177e4
LT
1435out:
1436 return err;
39d8c1b6 1437out_fd:
9606a216 1438 fput(newfile);
39d8c1b6 1439 put_unused_fd(newfd);
1da177e4
LT
1440 goto out_put;
1441}
1442
1da177e4
LT
1443/*
1444 * Attempt to connect to a socket with the server address. The address
1445 * is in user space so we verify it is OK and move it to kernel space.
1446 *
1447 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1448 * break bindings
1449 *
1450 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1451 * other SEQPACKET protocols that take time to connect() as it doesn't
1452 * include the -EINPROGRESS status for such sockets.
1453 */
1454
89bddce5
SH
1455asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1456 int addrlen)
1da177e4
LT
1457{
1458 struct socket *sock;
1459 char address[MAX_SOCK_ADDR];
6cb153ca 1460 int err, fput_needed;
1da177e4 1461
6cb153ca 1462 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1463 if (!sock)
1464 goto out;
1465 err = move_addr_to_kernel(uservaddr, addrlen, address);
1466 if (err < 0)
1467 goto out_put;
1468
89bddce5
SH
1469 err =
1470 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1471 if (err)
1472 goto out_put;
1473
89bddce5 1474 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1475 sock->file->f_flags);
1476out_put:
6cb153ca 1477 fput_light(sock->file, fput_needed);
1da177e4
LT
1478out:
1479 return err;
1480}
1481
1482/*
1483 * Get the local address ('name') of a socket object. Move the obtained
1484 * name to user space.
1485 */
1486
89bddce5
SH
1487asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1488 int __user *usockaddr_len)
1da177e4
LT
1489{
1490 struct socket *sock;
1491 char address[MAX_SOCK_ADDR];
6cb153ca 1492 int len, err, fput_needed;
89bddce5 1493
6cb153ca 1494 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1495 if (!sock)
1496 goto out;
1497
1498 err = security_socket_getsockname(sock);
1499 if (err)
1500 goto out_put;
1501
1502 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1503 if (err)
1504 goto out_put;
1505 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1506
1507out_put:
6cb153ca 1508 fput_light(sock->file, fput_needed);
1da177e4
LT
1509out:
1510 return err;
1511}
1512
1513/*
1514 * Get the remote address ('name') of a socket object. Move the obtained
1515 * name to user space.
1516 */
1517
89bddce5
SH
1518asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1519 int __user *usockaddr_len)
1da177e4
LT
1520{
1521 struct socket *sock;
1522 char address[MAX_SOCK_ADDR];
6cb153ca 1523 int len, err, fput_needed;
1da177e4 1524
89bddce5
SH
1525 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1526 if (sock != NULL) {
1da177e4
LT
1527 err = security_socket_getpeername(sock);
1528 if (err) {
6cb153ca 1529 fput_light(sock->file, fput_needed);
1da177e4
LT
1530 return err;
1531 }
1532
89bddce5
SH
1533 err =
1534 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1535 1);
1da177e4 1536 if (!err)
89bddce5
SH
1537 err = move_addr_to_user(address, len, usockaddr,
1538 usockaddr_len);
6cb153ca 1539 fput_light(sock->file, fput_needed);
1da177e4
LT
1540 }
1541 return err;
1542}
1543
1544/*
1545 * Send a datagram to a given address. We move the address into kernel
1546 * space and check the user space data area is readable before invoking
1547 * the protocol.
1548 */
1549
89bddce5
SH
1550asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1551 unsigned flags, struct sockaddr __user *addr,
1552 int addr_len)
1da177e4
LT
1553{
1554 struct socket *sock;
1555 char address[MAX_SOCK_ADDR];
1556 int err;
1557 struct msghdr msg;
1558 struct iovec iov;
6cb153ca
BL
1559 int fput_needed;
1560 struct file *sock_file;
1561
1562 sock_file = fget_light(fd, &fput_needed);
1563 if (!sock_file)
1564 return -EBADF;
1565
1566 sock = sock_from_file(sock_file, &err);
1da177e4 1567 if (!sock)
6cb153ca 1568 goto out_put;
89bddce5
SH
1569 iov.iov_base = buff;
1570 iov.iov_len = len;
1571 msg.msg_name = NULL;
1572 msg.msg_iov = &iov;
1573 msg.msg_iovlen = 1;
1574 msg.msg_control = NULL;
1575 msg.msg_controllen = 0;
1576 msg.msg_namelen = 0;
6cb153ca 1577 if (addr) {
1da177e4
LT
1578 err = move_addr_to_kernel(addr, addr_len, address);
1579 if (err < 0)
1580 goto out_put;
89bddce5
SH
1581 msg.msg_name = address;
1582 msg.msg_namelen = addr_len;
1da177e4
LT
1583 }
1584 if (sock->file->f_flags & O_NONBLOCK)
1585 flags |= MSG_DONTWAIT;
1586 msg.msg_flags = flags;
1587 err = sock_sendmsg(sock, &msg, len);
1588
89bddce5 1589out_put:
6cb153ca 1590 fput_light(sock_file, fput_needed);
1da177e4
LT
1591 return err;
1592}
1593
1594/*
89bddce5 1595 * Send a datagram down a socket.
1da177e4
LT
1596 */
1597
89bddce5 1598asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1599{
1600 return sys_sendto(fd, buff, len, flags, NULL, 0);
1601}
1602
1603/*
89bddce5 1604 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1605 * sender. We verify the buffers are writable and if needed move the
1606 * sender address from kernel to user space.
1607 */
1608
89bddce5
SH
1609asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1610 unsigned flags, struct sockaddr __user *addr,
1611 int __user *addr_len)
1da177e4
LT
1612{
1613 struct socket *sock;
1614 struct iovec iov;
1615 struct msghdr msg;
1616 char address[MAX_SOCK_ADDR];
89bddce5 1617 int err, err2;
6cb153ca
BL
1618 struct file *sock_file;
1619 int fput_needed;
1620
1621 sock_file = fget_light(fd, &fput_needed);
1622 if (!sock_file)
1623 return -EBADF;
1da177e4 1624
6cb153ca 1625 sock = sock_from_file(sock_file, &err);
1da177e4
LT
1626 if (!sock)
1627 goto out;
1628
89bddce5
SH
1629 msg.msg_control = NULL;
1630 msg.msg_controllen = 0;
1631 msg.msg_iovlen = 1;
1632 msg.msg_iov = &iov;
1633 iov.iov_len = size;
1634 iov.iov_base = ubuf;
1635 msg.msg_name = address;
1636 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1637 if (sock->file->f_flags & O_NONBLOCK)
1638 flags |= MSG_DONTWAIT;
89bddce5 1639 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1640
89bddce5
SH
1641 if (err >= 0 && addr != NULL) {
1642 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1643 if (err2 < 0)
1644 err = err2;
1da177e4 1645 }
1da177e4 1646out:
6cb153ca 1647 fput_light(sock_file, fput_needed);
1da177e4
LT
1648 return err;
1649}
1650
1651/*
89bddce5 1652 * Receive a datagram from a socket.
1da177e4
LT
1653 */
1654
89bddce5
SH
1655asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1656 unsigned flags)
1da177e4
LT
1657{
1658 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1659}
1660
1661/*
1662 * Set a socket option. Because we don't know the option lengths we have
1663 * to pass the user mode parameter for the protocols to sort out.
1664 */
1665
89bddce5
SH
1666asmlinkage long sys_setsockopt(int fd, int level, int optname,
1667 char __user *optval, int optlen)
1da177e4 1668{
6cb153ca 1669 int err, fput_needed;
1da177e4
LT
1670 struct socket *sock;
1671
1672 if (optlen < 0)
1673 return -EINVAL;
89bddce5
SH
1674
1675 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1676 if (sock != NULL) {
1677 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1678 if (err)
1679 goto out_put;
1da177e4
LT
1680
1681 if (level == SOL_SOCKET)
89bddce5
SH
1682 err =
1683 sock_setsockopt(sock, level, optname, optval,
1684 optlen);
1da177e4 1685 else
89bddce5
SH
1686 err =
1687 sock->ops->setsockopt(sock, level, optname, optval,
1688 optlen);
6cb153ca
BL
1689out_put:
1690 fput_light(sock->file, fput_needed);
1da177e4
LT
1691 }
1692 return err;
1693}
1694
1695/*
1696 * Get a socket option. Because we don't know the option lengths we have
1697 * to pass a user mode parameter for the protocols to sort out.
1698 */
1699
89bddce5
SH
1700asmlinkage long sys_getsockopt(int fd, int level, int optname,
1701 char __user *optval, int __user *optlen)
1da177e4 1702{
6cb153ca 1703 int err, fput_needed;
1da177e4
LT
1704 struct socket *sock;
1705
89bddce5
SH
1706 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1707 if (sock != NULL) {
6cb153ca
BL
1708 err = security_socket_getsockopt(sock, level, optname);
1709 if (err)
1710 goto out_put;
1da177e4
LT
1711
1712 if (level == SOL_SOCKET)
89bddce5
SH
1713 err =
1714 sock_getsockopt(sock, level, optname, optval,
1715 optlen);
1da177e4 1716 else
89bddce5
SH
1717 err =
1718 sock->ops->getsockopt(sock, level, optname, optval,
1719 optlen);
6cb153ca
BL
1720out_put:
1721 fput_light(sock->file, fput_needed);
1da177e4
LT
1722 }
1723 return err;
1724}
1725
1da177e4
LT
1726/*
1727 * Shutdown a socket.
1728 */
1729
1730asmlinkage long sys_shutdown(int fd, int how)
1731{
6cb153ca 1732 int err, fput_needed;
1da177e4
LT
1733 struct socket *sock;
1734
89bddce5
SH
1735 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1736 if (sock != NULL) {
1da177e4 1737 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1738 if (!err)
1739 err = sock->ops->shutdown(sock, how);
1740 fput_light(sock->file, fput_needed);
1da177e4
LT
1741 }
1742 return err;
1743}
1744
89bddce5 1745/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1746 * fields which are the same type (int / unsigned) on our platforms.
1747 */
1748#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1749#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1750#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1751
1da177e4
LT
1752/*
1753 * BSD sendmsg interface
1754 */
1755
1756asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1757{
89bddce5
SH
1758 struct compat_msghdr __user *msg_compat =
1759 (struct compat_msghdr __user *)msg;
1da177e4
LT
1760 struct socket *sock;
1761 char address[MAX_SOCK_ADDR];
1762 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1763 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1764 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1765 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1766 unsigned char *ctl_buf = ctl;
1767 struct msghdr msg_sys;
1768 int err, ctl_len, iov_size, total_len;
6cb153ca 1769 int fput_needed;
89bddce5 1770
1da177e4
LT
1771 err = -EFAULT;
1772 if (MSG_CMSG_COMPAT & flags) {
1773 if (get_compat_msghdr(&msg_sys, msg_compat))
1774 return -EFAULT;
89bddce5
SH
1775 }
1776 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1777 return -EFAULT;
1778
6cb153ca 1779 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1780 if (!sock)
1da177e4
LT
1781 goto out;
1782
1783 /* do not move before msg_sys is valid */
1784 err = -EMSGSIZE;
1785 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1786 goto out_put;
1787
89bddce5 1788 /* Check whether to allocate the iovec area */
1da177e4
LT
1789 err = -ENOMEM;
1790 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1791 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1792 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1793 if (!iov)
1794 goto out_put;
1795 }
1796
1797 /* This will also move the address data into kernel space */
1798 if (MSG_CMSG_COMPAT & flags) {
1799 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1800 } else
1801 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1802 if (err < 0)
1da177e4
LT
1803 goto out_freeiov;
1804 total_len = err;
1805
1806 err = -ENOBUFS;
1807
1808 if (msg_sys.msg_controllen > INT_MAX)
1809 goto out_freeiov;
89bddce5 1810 ctl_len = msg_sys.msg_controllen;
1da177e4 1811 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1812 err =
1813 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1814 sizeof(ctl));
1da177e4
LT
1815 if (err)
1816 goto out_freeiov;
1817 ctl_buf = msg_sys.msg_control;
8920e8f9 1818 ctl_len = msg_sys.msg_controllen;
1da177e4 1819 } else if (ctl_len) {
89bddce5 1820 if (ctl_len > sizeof(ctl)) {
1da177e4 1821 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1822 if (ctl_buf == NULL)
1da177e4
LT
1823 goto out_freeiov;
1824 }
1825 err = -EFAULT;
1826 /*
1827 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1828 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1829 * checking falls down on this.
1830 */
89bddce5
SH
1831 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1832 ctl_len))
1da177e4
LT
1833 goto out_freectl;
1834 msg_sys.msg_control = ctl_buf;
1835 }
1836 msg_sys.msg_flags = flags;
1837
1838 if (sock->file->f_flags & O_NONBLOCK)
1839 msg_sys.msg_flags |= MSG_DONTWAIT;
1840 err = sock_sendmsg(sock, &msg_sys, total_len);
1841
1842out_freectl:
89bddce5 1843 if (ctl_buf != ctl)
1da177e4
LT
1844 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1845out_freeiov:
1846 if (iov != iovstack)
1847 sock_kfree_s(sock->sk, iov, iov_size);
1848out_put:
6cb153ca 1849 fput_light(sock->file, fput_needed);
89bddce5 1850out:
1da177e4
LT
1851 return err;
1852}
1853
1854/*
1855 * BSD recvmsg interface
1856 */
1857
89bddce5
SH
1858asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1859 unsigned int flags)
1da177e4 1860{
89bddce5
SH
1861 struct compat_msghdr __user *msg_compat =
1862 (struct compat_msghdr __user *)msg;
1da177e4
LT
1863 struct socket *sock;
1864 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1865 struct iovec *iov = iovstack;
1da177e4
LT
1866 struct msghdr msg_sys;
1867 unsigned long cmsg_ptr;
1868 int err, iov_size, total_len, len;
6cb153ca 1869 int fput_needed;
1da177e4
LT
1870
1871 /* kernel mode address */
1872 char addr[MAX_SOCK_ADDR];
1873
1874 /* user mode address pointers */
1875 struct sockaddr __user *uaddr;
1876 int __user *uaddr_len;
89bddce5 1877
1da177e4
LT
1878 if (MSG_CMSG_COMPAT & flags) {
1879 if (get_compat_msghdr(&msg_sys, msg_compat))
1880 return -EFAULT;
89bddce5
SH
1881 }
1882 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1883 return -EFAULT;
1da177e4 1884
6cb153ca 1885 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1886 if (!sock)
1887 goto out;
1888
1889 err = -EMSGSIZE;
1890 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1891 goto out_put;
89bddce5
SH
1892
1893 /* Check whether to allocate the iovec area */
1da177e4
LT
1894 err = -ENOMEM;
1895 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1896 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1897 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1898 if (!iov)
1899 goto out_put;
1900 }
1901
1902 /*
89bddce5
SH
1903 * Save the user-mode address (verify_iovec will change the
1904 * kernel msghdr to use the kernel address space)
1da177e4 1905 */
89bddce5
SH
1906
1907 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1908 uaddr_len = COMPAT_NAMELEN(msg);
1909 if (MSG_CMSG_COMPAT & flags) {
1910 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1911 } else
1912 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1913 if (err < 0)
1914 goto out_freeiov;
89bddce5 1915 total_len = err;
1da177e4
LT
1916
1917 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1918 msg_sys.msg_flags = 0;
1919 if (MSG_CMSG_COMPAT & flags)
1920 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1921
1da177e4
LT
1922 if (sock->file->f_flags & O_NONBLOCK)
1923 flags |= MSG_DONTWAIT;
1924 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1925 if (err < 0)
1926 goto out_freeiov;
1927 len = err;
1928
1929 if (uaddr != NULL) {
89bddce5
SH
1930 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1931 uaddr_len);
1da177e4
LT
1932 if (err < 0)
1933 goto out_freeiov;
1934 }
37f7f421
DM
1935 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1936 COMPAT_FLAGS(msg));
1da177e4
LT
1937 if (err)
1938 goto out_freeiov;
1939 if (MSG_CMSG_COMPAT & flags)
89bddce5 1940 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1941 &msg_compat->msg_controllen);
1942 else
89bddce5 1943 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1944 &msg->msg_controllen);
1945 if (err)
1946 goto out_freeiov;
1947 err = len;
1948
1949out_freeiov:
1950 if (iov != iovstack)
1951 sock_kfree_s(sock->sk, iov, iov_size);
1952out_put:
6cb153ca 1953 fput_light(sock->file, fput_needed);
1da177e4
LT
1954out:
1955 return err;
1956}
1957
1958#ifdef __ARCH_WANT_SYS_SOCKETCALL
1959
1960/* Argument list sizes for sys_socketcall */
1961#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1962static const unsigned char nargs[18]={
1963 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1964 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1965 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1966};
1967
1da177e4
LT
1968#undef AL
1969
1970/*
89bddce5 1971 * System call vectors.
1da177e4
LT
1972 *
1973 * Argument checking cleaned up. Saved 20% in size.
1974 * This function doesn't need to set the kernel lock because
89bddce5 1975 * it is set by the callees.
1da177e4
LT
1976 */
1977
1978asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1979{
1980 unsigned long a[6];
89bddce5 1981 unsigned long a0, a1;
1da177e4
LT
1982 int err;
1983
89bddce5 1984 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
1985 return -EINVAL;
1986
1987 /* copy_from_user should be SMP safe. */
1988 if (copy_from_user(a, args, nargs[call]))
1989 return -EFAULT;
3ec3b2fb 1990
89bddce5 1991 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
1992 if (err)
1993 return err;
1994
89bddce5
SH
1995 a0 = a[0];
1996 a1 = a[1];
1997
1998 switch (call) {
1999 case SYS_SOCKET:
2000 err = sys_socket(a0, a1, a[2]);
2001 break;
2002 case SYS_BIND:
2003 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2004 break;
2005 case SYS_CONNECT:
2006 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2007 break;
2008 case SYS_LISTEN:
2009 err = sys_listen(a0, a1);
2010 break;
2011 case SYS_ACCEPT:
2012 err =
2013 sys_accept(a0, (struct sockaddr __user *)a1,
2014 (int __user *)a[2]);
2015 break;
2016 case SYS_GETSOCKNAME:
2017 err =
2018 sys_getsockname(a0, (struct sockaddr __user *)a1,
2019 (int __user *)a[2]);
2020 break;
2021 case SYS_GETPEERNAME:
2022 err =
2023 sys_getpeername(a0, (struct sockaddr __user *)a1,
2024 (int __user *)a[2]);
2025 break;
2026 case SYS_SOCKETPAIR:
2027 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2028 break;
2029 case SYS_SEND:
2030 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2031 break;
2032 case SYS_SENDTO:
2033 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2034 (struct sockaddr __user *)a[4], a[5]);
2035 break;
2036 case SYS_RECV:
2037 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2038 break;
2039 case SYS_RECVFROM:
2040 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2041 (struct sockaddr __user *)a[4],
2042 (int __user *)a[5]);
2043 break;
2044 case SYS_SHUTDOWN:
2045 err = sys_shutdown(a0, a1);
2046 break;
2047 case SYS_SETSOCKOPT:
2048 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2049 break;
2050 case SYS_GETSOCKOPT:
2051 err =
2052 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2053 (int __user *)a[4]);
2054 break;
2055 case SYS_SENDMSG:
2056 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2057 break;
2058 case SYS_RECVMSG:
2059 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2060 break;
2061 default:
2062 err = -EINVAL;
2063 break;
1da177e4
LT
2064 }
2065 return err;
2066}
2067
89bddce5 2068#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2069
55737fda
SH
2070/**
2071 * sock_register - add a socket protocol handler
2072 * @ops: description of protocol
2073 *
1da177e4
LT
2074 * This function is called by a protocol handler that wants to
2075 * advertise its address family, and have it linked into the
55737fda
SH
2076 * socket interface. The value ops->family coresponds to the
2077 * socket system call protocol family.
1da177e4 2078 */
f0fd27d4 2079int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2080{
2081 int err;
2082
2083 if (ops->family >= NPROTO) {
89bddce5
SH
2084 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2085 NPROTO);
1da177e4
LT
2086 return -ENOBUFS;
2087 }
55737fda
SH
2088
2089 spin_lock(&net_family_lock);
2090 if (net_families[ops->family])
2091 err = -EEXIST;
2092 else {
89bddce5 2093 net_families[ops->family] = ops;
1da177e4
LT
2094 err = 0;
2095 }
55737fda
SH
2096 spin_unlock(&net_family_lock);
2097
89bddce5 2098 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2099 return err;
2100}
2101
55737fda
SH
2102/**
2103 * sock_unregister - remove a protocol handler
2104 * @family: protocol family to remove
2105 *
1da177e4
LT
2106 * This function is called by a protocol handler that wants to
2107 * remove its address family, and have it unlinked from the
55737fda
SH
2108 * new socket creation.
2109 *
2110 * If protocol handler is a module, then it can use module reference
2111 * counts to protect against new references. If protocol handler is not
2112 * a module then it needs to provide its own protection in
2113 * the ops->create routine.
1da177e4 2114 */
f0fd27d4 2115void sock_unregister(int family)
1da177e4 2116{
f0fd27d4 2117 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2118
55737fda 2119 spin_lock(&net_family_lock);
89bddce5 2120 net_families[family] = NULL;
55737fda
SH
2121 spin_unlock(&net_family_lock);
2122
2123 synchronize_rcu();
2124
89bddce5 2125 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2126}
2127
77d76ea3 2128static int __init sock_init(void)
1da177e4
LT
2129{
2130 /*
89bddce5 2131 * Initialize sock SLAB cache.
1da177e4 2132 */
89bddce5 2133
1da177e4
LT
2134 sk_init();
2135
1da177e4 2136 /*
89bddce5 2137 * Initialize skbuff SLAB cache
1da177e4
LT
2138 */
2139 skb_init();
1da177e4
LT
2140
2141 /*
89bddce5 2142 * Initialize the protocols module.
1da177e4
LT
2143 */
2144
2145 init_inodecache();
2146 register_filesystem(&sock_fs_type);
2147 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2148
2149 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2150 */
2151
2152#ifdef CONFIG_NETFILTER
2153 netfilter_init();
2154#endif
cbeb321a
DM
2155
2156 return 0;
1da177e4
LT
2157}
2158
77d76ea3
AK
2159core_initcall(sock_init); /* early initcall */
2160
1da177e4
LT
2161#ifdef CONFIG_PROC_FS
2162void socket_seq_show(struct seq_file *seq)
2163{
2164 int cpu;
2165 int counter = 0;
2166
6f912042 2167 for_each_possible_cpu(cpu)
89bddce5 2168 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2169
2170 /* It can be negative, by the way. 8) */
2171 if (counter < 0)
2172 counter = 0;
2173
2174 seq_printf(seq, "sockets: used %d\n", counter);
2175}
89bddce5 2176#endif /* CONFIG_PROC_FS */
1da177e4 2177
89bbfc95
SP
2178#ifdef CONFIG_COMPAT
2179static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2180 unsigned long arg)
89bbfc95
SP
2181{
2182 struct socket *sock = file->private_data;
2183 int ret = -ENOIOCTLCMD;
2184
2185 if (sock->ops->compat_ioctl)
2186 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2187
2188 return ret;
2189}
2190#endif
2191
ac5a488e
SS
2192int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2193{
2194 return sock->ops->bind(sock, addr, addrlen);
2195}
2196
2197int kernel_listen(struct socket *sock, int backlog)
2198{
2199 return sock->ops->listen(sock, backlog);
2200}
2201
2202int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2203{
2204 struct sock *sk = sock->sk;
2205 int err;
2206
2207 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2208 newsock);
2209 if (err < 0)
2210 goto done;
2211
2212 err = sock->ops->accept(sock, *newsock, flags);
2213 if (err < 0) {
2214 sock_release(*newsock);
2215 goto done;
2216 }
2217
2218 (*newsock)->ops = sock->ops;
2219
2220done:
2221 return err;
2222}
2223
2224int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2225 int flags)
2226{
2227 return sock->ops->connect(sock, addr, addrlen, flags);
2228}
2229
2230int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2231 int *addrlen)
2232{
2233 return sock->ops->getname(sock, addr, addrlen, 0);
2234}
2235
2236int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2237 int *addrlen)
2238{
2239 return sock->ops->getname(sock, addr, addrlen, 1);
2240}
2241
2242int kernel_getsockopt(struct socket *sock, int level, int optname,
2243 char *optval, int *optlen)
2244{
2245 mm_segment_t oldfs = get_fs();
2246 int err;
2247
2248 set_fs(KERNEL_DS);
2249 if (level == SOL_SOCKET)
2250 err = sock_getsockopt(sock, level, optname, optval, optlen);
2251 else
2252 err = sock->ops->getsockopt(sock, level, optname, optval,
2253 optlen);
2254 set_fs(oldfs);
2255 return err;
2256}
2257
2258int kernel_setsockopt(struct socket *sock, int level, int optname,
2259 char *optval, int optlen)
2260{
2261 mm_segment_t oldfs = get_fs();
2262 int err;
2263
2264 set_fs(KERNEL_DS);
2265 if (level == SOL_SOCKET)
2266 err = sock_setsockopt(sock, level, optname, optval, optlen);
2267 else
2268 err = sock->ops->setsockopt(sock, level, optname, optval,
2269 optlen);
2270 set_fs(oldfs);
2271 return err;
2272}
2273
2274int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2275 size_t size, int flags)
2276{
2277 if (sock->ops->sendpage)
2278 return sock->ops->sendpage(sock, page, offset, size, flags);
2279
2280 return sock_no_sendpage(sock, page, offset, size, flags);
2281}
2282
2283int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2284{
2285 mm_segment_t oldfs = get_fs();
2286 int err;
2287
2288 set_fs(KERNEL_DS);
2289 err = sock->ops->ioctl(sock, cmd, arg);
2290 set_fs(oldfs);
2291
2292 return err;
2293}
2294
1da177e4
LT
2295/* ABI emulation layers need these two */
2296EXPORT_SYMBOL(move_addr_to_kernel);
2297EXPORT_SYMBOL(move_addr_to_user);
2298EXPORT_SYMBOL(sock_create);
2299EXPORT_SYMBOL(sock_create_kern);
2300EXPORT_SYMBOL(sock_create_lite);
2301EXPORT_SYMBOL(sock_map_fd);
2302EXPORT_SYMBOL(sock_recvmsg);
2303EXPORT_SYMBOL(sock_register);
2304EXPORT_SYMBOL(sock_release);
2305EXPORT_SYMBOL(sock_sendmsg);
2306EXPORT_SYMBOL(sock_unregister);
2307EXPORT_SYMBOL(sock_wake_async);
2308EXPORT_SYMBOL(sockfd_lookup);
2309EXPORT_SYMBOL(kernel_sendmsg);
2310EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2311EXPORT_SYMBOL(kernel_bind);
2312EXPORT_SYMBOL(kernel_listen);
2313EXPORT_SYMBOL(kernel_accept);
2314EXPORT_SYMBOL(kernel_connect);
2315EXPORT_SYMBOL(kernel_getsockname);
2316EXPORT_SYMBOL(kernel_getpeername);
2317EXPORT_SYMBOL(kernel_getsockopt);
2318EXPORT_SYMBOL(kernel_setsockopt);
2319EXPORT_SYMBOL(kernel_sendpage);
2320EXPORT_SYMBOL(kernel_sock_ioctl);