[PATCH] vt: Make vt_pid a struct pid (making it pid wrap around safe).
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
80#include <linux/divert.h>
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
115
1da177e4
LT
116/*
117 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
118 * in the operation structures but are done directly via the socketcall() multiplexor.
119 */
120
121static struct file_operations socket_file_ops = {
122 .owner = THIS_MODULE,
123 .llseek = no_llseek,
124 .aio_read = sock_aio_read,
125 .aio_write = sock_aio_write,
126 .poll = sock_poll,
127 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
128#ifdef CONFIG_COMPAT
129 .compat_ioctl = compat_sock_ioctl,
130#endif
1da177e4
LT
131 .mmap = sock_mmap,
132 .open = sock_no_open, /* special open code to disallow open via /proc */
133 .release = sock_close,
134 .fasync = sock_fasync,
5274f052
JA
135 .sendpage = sock_sendpage,
136 .splice_write = generic_splice_sendpage,
1da177e4
LT
137};
138
139/*
140 * The protocol list. Each protocol is registered in here.
141 */
142
1da177e4 143static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 144static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 145
1da177e4
LT
146/*
147 * Statistics counters of the socket lists
148 */
149
150static DEFINE_PER_CPU(int, sockets_in_use) = 0;
151
152/*
89bddce5
SH
153 * Support routines.
154 * Move socket addresses back and forth across the kernel/user
155 * divide and look after the messy bits.
1da177e4
LT
156 */
157
89bddce5 158#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
159 16 for IP, 16 for IPX,
160 24 for IPv6,
89bddce5 161 about 80 for AX.25
1da177e4
LT
162 must be at least one bigger than
163 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 164 :unix_mkname()).
1da177e4 165 */
89bddce5 166
1da177e4
LT
167/**
168 * move_addr_to_kernel - copy a socket address into kernel space
169 * @uaddr: Address in user space
170 * @kaddr: Address in kernel space
171 * @ulen: Length in user space
172 *
173 * The address is copied into kernel space. If the provided address is
174 * too long an error code of -EINVAL is returned. If the copy gives
175 * invalid addresses -EFAULT is returned. On a success 0 is returned.
176 */
177
178int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
179{
89bddce5 180 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 181 return -EINVAL;
89bddce5 182 if (ulen == 0)
1da177e4 183 return 0;
89bddce5 184 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 185 return -EFAULT;
3ec3b2fb 186 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
187}
188
189/**
190 * move_addr_to_user - copy an address to user space
191 * @kaddr: kernel space address
192 * @klen: length of address in kernel
193 * @uaddr: user space address
194 * @ulen: pointer to user length field
195 *
196 * The value pointed to by ulen on entry is the buffer length available.
197 * This is overwritten with the buffer space used. -EINVAL is returned
198 * if an overlong buffer is specified or a negative buffer size. -EFAULT
199 * is returned if either the buffer or the length field are not
200 * accessible.
201 * After copying the data up to the limit the user specifies, the true
202 * length of the data is written over the length limit the user
203 * specified. Zero is returned for a success.
204 */
89bddce5
SH
205
206int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
207 int __user *ulen)
1da177e4
LT
208{
209 int err;
210 int len;
211
89bddce5
SH
212 err = get_user(len, ulen);
213 if (err)
1da177e4 214 return err;
89bddce5
SH
215 if (len > klen)
216 len = klen;
217 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 218 return -EINVAL;
89bddce5 219 if (len) {
d6fe3945
SG
220 if (audit_sockaddr(klen, kaddr))
221 return -ENOMEM;
89bddce5 222 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
223 return -EFAULT;
224 }
225 /*
89bddce5
SH
226 * "fromlen shall refer to the value before truncation.."
227 * 1003.1g
1da177e4
LT
228 */
229 return __put_user(klen, ulen);
230}
231
232#define SOCKFS_MAGIC 0x534F434B
233
89bddce5 234static kmem_cache_t *sock_inode_cachep __read_mostly;
1da177e4
LT
235
236static struct inode *sock_alloc_inode(struct super_block *sb)
237{
238 struct socket_alloc *ei;
89bddce5
SH
239
240 ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
1da177e4
LT
241 if (!ei)
242 return NULL;
243 init_waitqueue_head(&ei->socket.wait);
89bddce5 244
1da177e4
LT
245 ei->socket.fasync_list = NULL;
246 ei->socket.state = SS_UNCONNECTED;
247 ei->socket.flags = 0;
248 ei->socket.ops = NULL;
249 ei->socket.sk = NULL;
250 ei->socket.file = NULL;
1da177e4
LT
251
252 return &ei->vfs_inode;
253}
254
255static void sock_destroy_inode(struct inode *inode)
256{
257 kmem_cache_free(sock_inode_cachep,
258 container_of(inode, struct socket_alloc, vfs_inode));
259}
260
89bddce5 261static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
1da177e4 262{
89bddce5 263 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 264
89bddce5
SH
265 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
266 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
267 inode_init_once(&ei->vfs_inode);
268}
89bddce5 269
1da177e4
LT
270static int init_inodecache(void)
271{
272 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
273 sizeof(struct socket_alloc),
274 0,
275 (SLAB_HWCACHE_ALIGN |
276 SLAB_RECLAIM_ACCOUNT |
277 SLAB_MEM_SPREAD),
278 init_once,
279 NULL);
1da177e4
LT
280 if (sock_inode_cachep == NULL)
281 return -ENOMEM;
282 return 0;
283}
284
285static struct super_operations sockfs_ops = {
286 .alloc_inode = sock_alloc_inode,
287 .destroy_inode =sock_destroy_inode,
288 .statfs = simple_statfs,
289};
290
454e2398 291static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
292 int flags, const char *dev_name, void *data,
293 struct vfsmount *mnt)
1da177e4 294{
454e2398
DH
295 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
296 mnt);
1da177e4
LT
297}
298
ba89966c 299static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
300
301static struct file_system_type sock_fs_type = {
302 .name = "sockfs",
303 .get_sb = sockfs_get_sb,
304 .kill_sb = kill_anon_super,
305};
89bddce5 306
1da177e4
LT
307static int sockfs_delete_dentry(struct dentry *dentry)
308{
309 return 1;
310}
311static struct dentry_operations sockfs_dentry_operations = {
89bddce5 312 .d_delete = sockfs_delete_dentry,
1da177e4
LT
313};
314
315/*
316 * Obtains the first available file descriptor and sets it up for use.
317 *
39d8c1b6
DM
318 * These functions create file structures and maps them to fd space
319 * of the current process. On success it returns file descriptor
1da177e4
LT
320 * and file struct implicitly stored in sock->file.
321 * Note that another thread may close file descriptor before we return
322 * from this function. We use the fact that now we do not refer
323 * to socket after mapping. If one day we will need it, this
324 * function will increment ref. count on file by 1.
325 *
326 * In any case returned fd MAY BE not valid!
327 * This race condition is unavoidable
328 * with shared fd spaces, we cannot solve it inside kernel,
329 * but we take care of internal coherence yet.
330 */
331
39d8c1b6 332static int sock_alloc_fd(struct file **filep)
1da177e4
LT
333{
334 int fd;
1da177e4
LT
335
336 fd = get_unused_fd();
39d8c1b6 337 if (likely(fd >= 0)) {
1da177e4
LT
338 struct file *file = get_empty_filp();
339
39d8c1b6
DM
340 *filep = file;
341 if (unlikely(!file)) {
1da177e4 342 put_unused_fd(fd);
39d8c1b6 343 return -ENFILE;
1da177e4 344 }
39d8c1b6
DM
345 } else
346 *filep = NULL;
347 return fd;
348}
1da177e4 349
39d8c1b6
DM
350static int sock_attach_fd(struct socket *sock, struct file *file)
351{
352 struct qstr this;
353 char name[32];
354
355 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
356 this.name = name;
357 this.hash = SOCK_INODE(sock)->i_ino;
358
359 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
360 if (unlikely(!file->f_dentry))
361 return -ENOMEM;
362
363 file->f_dentry->d_op = &sockfs_dentry_operations;
364 d_add(file->f_dentry, SOCK_INODE(sock));
365 file->f_vfsmnt = mntget(sock_mnt);
366 file->f_mapping = file->f_dentry->d_inode->i_mapping;
367
368 sock->file = file;
369 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
370 file->f_mode = FMODE_READ | FMODE_WRITE;
371 file->f_flags = O_RDWR;
372 file->f_pos = 0;
373 file->private_data = sock;
1da177e4 374
39d8c1b6
DM
375 return 0;
376}
377
378int sock_map_fd(struct socket *sock)
379{
380 struct file *newfile;
381 int fd = sock_alloc_fd(&newfile);
382
383 if (likely(fd >= 0)) {
384 int err = sock_attach_fd(sock, newfile);
385
386 if (unlikely(err < 0)) {
387 put_filp(newfile);
1da177e4 388 put_unused_fd(fd);
39d8c1b6 389 return err;
1da177e4 390 }
39d8c1b6 391 fd_install(fd, newfile);
1da177e4 392 }
1da177e4
LT
393 return fd;
394}
395
6cb153ca
BL
396static struct socket *sock_from_file(struct file *file, int *err)
397{
398 struct inode *inode;
399 struct socket *sock;
400
401 if (file->f_op == &socket_file_ops)
402 return file->private_data; /* set in sock_map_fd */
403
404 inode = file->f_dentry->d_inode;
405 if (!S_ISSOCK(inode->i_mode)) {
406 *err = -ENOTSOCK;
407 return NULL;
408 }
409
410 sock = SOCKET_I(inode);
411 if (sock->file != file) {
412 printk(KERN_ERR "socki_lookup: socket file changed!\n");
413 sock->file = file;
414 }
415 return sock;
416}
417
1da177e4
LT
418/**
419 * sockfd_lookup - Go from a file number to its socket slot
420 * @fd: file handle
421 * @err: pointer to an error code return
422 *
423 * The file handle passed in is locked and the socket it is bound
424 * too is returned. If an error occurs the err pointer is overwritten
425 * with a negative errno code and NULL is returned. The function checks
426 * for both invalid handles and passing a handle which is not a socket.
427 *
428 * On a success the socket object pointer is returned.
429 */
430
431struct socket *sockfd_lookup(int fd, int *err)
432{
433 struct file *file;
1da177e4
LT
434 struct socket *sock;
435
89bddce5
SH
436 file = fget(fd);
437 if (!file) {
1da177e4
LT
438 *err = -EBADF;
439 return NULL;
440 }
89bddce5 441
6cb153ca
BL
442 sock = sock_from_file(file, err);
443 if (!sock)
1da177e4 444 fput(file);
6cb153ca
BL
445 return sock;
446}
1da177e4 447
6cb153ca
BL
448static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
449{
450 struct file *file;
451 struct socket *sock;
452
3672558c 453 *err = -EBADF;
6cb153ca
BL
454 file = fget_light(fd, fput_needed);
455 if (file) {
456 sock = sock_from_file(file, err);
457 if (sock)
458 return sock;
459 fput_light(file, *fput_needed);
1da177e4 460 }
6cb153ca 461 return NULL;
1da177e4
LT
462}
463
464/**
465 * sock_alloc - allocate a socket
89bddce5 466 *
1da177e4
LT
467 * Allocate a new inode and socket object. The two are bound together
468 * and initialised. The socket is then returned. If we are out of inodes
469 * NULL is returned.
470 */
471
472static struct socket *sock_alloc(void)
473{
89bddce5
SH
474 struct inode *inode;
475 struct socket *sock;
1da177e4
LT
476
477 inode = new_inode(sock_mnt->mnt_sb);
478 if (!inode)
479 return NULL;
480
481 sock = SOCKET_I(inode);
482
89bddce5 483 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
484 inode->i_uid = current->fsuid;
485 inode->i_gid = current->fsgid;
486
487 get_cpu_var(sockets_in_use)++;
488 put_cpu_var(sockets_in_use);
489 return sock;
490}
491
492/*
493 * In theory you can't get an open on this inode, but /proc provides
494 * a back door. Remember to keep it shut otherwise you'll let the
495 * creepy crawlies in.
496 */
89bddce5 497
1da177e4
LT
498static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
499{
500 return -ENXIO;
501}
502
4b6f5d20 503const struct file_operations bad_sock_fops = {
1da177e4
LT
504 .owner = THIS_MODULE,
505 .open = sock_no_open,
506};
507
508/**
509 * sock_release - close a socket
510 * @sock: socket to close
511 *
512 * The socket is released from the protocol stack if it has a release
513 * callback, and the inode is then released if the socket is bound to
89bddce5 514 * an inode not a file.
1da177e4 515 */
89bddce5 516
1da177e4
LT
517void sock_release(struct socket *sock)
518{
519 if (sock->ops) {
520 struct module *owner = sock->ops->owner;
521
522 sock->ops->release(sock);
523 sock->ops = NULL;
524 module_put(owner);
525 }
526
527 if (sock->fasync_list)
528 printk(KERN_ERR "sock_release: fasync list not empty!\n");
529
530 get_cpu_var(sockets_in_use)--;
531 put_cpu_var(sockets_in_use);
532 if (!sock->file) {
533 iput(SOCK_INODE(sock));
534 return;
535 }
89bddce5 536 sock->file = NULL;
1da177e4
LT
537}
538
89bddce5 539static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
540 struct msghdr *msg, size_t size)
541{
542 struct sock_iocb *si = kiocb_to_siocb(iocb);
543 int err;
544
545 si->sock = sock;
546 si->scm = NULL;
547 si->msg = msg;
548 si->size = size;
549
550 err = security_socket_sendmsg(sock, msg, size);
551 if (err)
552 return err;
553
554 return sock->ops->sendmsg(iocb, sock, msg, size);
555}
556
557int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
558{
559 struct kiocb iocb;
560 struct sock_iocb siocb;
561 int ret;
562
563 init_sync_kiocb(&iocb, NULL);
564 iocb.private = &siocb;
565 ret = __sock_sendmsg(&iocb, sock, msg, size);
566 if (-EIOCBQUEUED == ret)
567 ret = wait_on_sync_kiocb(&iocb);
568 return ret;
569}
570
571int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
572 struct kvec *vec, size_t num, size_t size)
573{
574 mm_segment_t oldfs = get_fs();
575 int result;
576
577 set_fs(KERNEL_DS);
578 /*
579 * the following is safe, since for compiler definitions of kvec and
580 * iovec are identical, yielding the same in-core layout and alignment
581 */
89bddce5 582 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
583 msg->msg_iovlen = num;
584 result = sock_sendmsg(sock, msg, size);
585 set_fs(oldfs);
586 return result;
587}
588
89bddce5 589static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
590 struct msghdr *msg, size_t size, int flags)
591{
592 int err;
593 struct sock_iocb *si = kiocb_to_siocb(iocb);
594
595 si->sock = sock;
596 si->scm = NULL;
597 si->msg = msg;
598 si->size = size;
599 si->flags = flags;
600
601 err = security_socket_recvmsg(sock, msg, size, flags);
602 if (err)
603 return err;
604
605 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
606}
607
89bddce5 608int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
609 size_t size, int flags)
610{
611 struct kiocb iocb;
612 struct sock_iocb siocb;
613 int ret;
614
89bddce5 615 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
616 iocb.private = &siocb;
617 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
618 if (-EIOCBQUEUED == ret)
619 ret = wait_on_sync_kiocb(&iocb);
620 return ret;
621}
622
89bddce5
SH
623int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
624 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
625{
626 mm_segment_t oldfs = get_fs();
627 int result;
628
629 set_fs(KERNEL_DS);
630 /*
631 * the following is safe, since for compiler definitions of kvec and
632 * iovec are identical, yielding the same in-core layout and alignment
633 */
89bddce5 634 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
635 result = sock_recvmsg(sock, msg, size, flags);
636 set_fs(oldfs);
637 return result;
638}
639
640static void sock_aio_dtor(struct kiocb *iocb)
641{
642 kfree(iocb->private);
643}
644
ce1d4d3e
CH
645static ssize_t sock_sendpage(struct file *file, struct page *page,
646 int offset, size_t size, loff_t *ppos, int more)
1da177e4 647{
1da177e4
LT
648 struct socket *sock;
649 int flags;
650
ce1d4d3e
CH
651 sock = file->private_data;
652
653 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
654 if (more)
655 flags |= MSG_MORE;
656
657 return sock->ops->sendpage(sock, page, offset, size, flags);
658}
1da177e4 659
ce1d4d3e 660static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 661 struct sock_iocb *siocb)
ce1d4d3e
CH
662{
663 if (!is_sync_kiocb(iocb)) {
664 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
665 if (!siocb)
666 return NULL;
1da177e4
LT
667 iocb->ki_dtor = sock_aio_dtor;
668 }
1da177e4 669
ce1d4d3e 670 siocb->kiocb = iocb;
ce1d4d3e
CH
671 iocb->private = siocb;
672 return siocb;
1da177e4
LT
673}
674
ce1d4d3e 675static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
676 struct file *file, const struct iovec *iov,
677 unsigned long nr_segs)
ce1d4d3e
CH
678{
679 struct socket *sock = file->private_data;
680 size_t size = 0;
681 int i;
1da177e4 682
89bddce5
SH
683 for (i = 0; i < nr_segs; i++)
684 size += iov[i].iov_len;
1da177e4 685
ce1d4d3e
CH
686 msg->msg_name = NULL;
687 msg->msg_namelen = 0;
688 msg->msg_control = NULL;
689 msg->msg_controllen = 0;
89bddce5 690 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
691 msg->msg_iovlen = nr_segs;
692 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
693
694 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
695}
696
027445c3
BP
697static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
698 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
699{
700 struct sock_iocb siocb, *x;
701
1da177e4
LT
702 if (pos != 0)
703 return -ESPIPE;
027445c3
BP
704
705 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
706 return 0;
707
027445c3
BP
708
709 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
710 if (!x)
711 return -ENOMEM;
027445c3 712 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
713}
714
ce1d4d3e 715static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
716 struct file *file, const struct iovec *iov,
717 unsigned long nr_segs)
1da177e4 718{
ce1d4d3e
CH
719 struct socket *sock = file->private_data;
720 size_t size = 0;
721 int i;
1da177e4 722
89bddce5
SH
723 for (i = 0; i < nr_segs; i++)
724 size += iov[i].iov_len;
1da177e4 725
ce1d4d3e
CH
726 msg->msg_name = NULL;
727 msg->msg_namelen = 0;
728 msg->msg_control = NULL;
729 msg->msg_controllen = 0;
89bddce5 730 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
731 msg->msg_iovlen = nr_segs;
732 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
733 if (sock->type == SOCK_SEQPACKET)
734 msg->msg_flags |= MSG_EOR;
1da177e4 735
ce1d4d3e 736 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
737}
738
027445c3
BP
739static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
740 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
741{
742 struct sock_iocb siocb, *x;
1da177e4 743
ce1d4d3e
CH
744 if (pos != 0)
745 return -ESPIPE;
027445c3
BP
746
747 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 748 return 0;
1da177e4 749
027445c3 750 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
751 if (!x)
752 return -ENOMEM;
1da177e4 753
027445c3 754 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
755}
756
1da177e4
LT
757/*
758 * Atomic setting of ioctl hooks to avoid race
759 * with module unload.
760 */
761
4a3e2f71 762static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 763static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 764
89bddce5 765void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 766{
4a3e2f71 767 mutex_lock(&br_ioctl_mutex);
1da177e4 768 br_ioctl_hook = hook;
4a3e2f71 769 mutex_unlock(&br_ioctl_mutex);
1da177e4 770}
89bddce5 771
1da177e4
LT
772EXPORT_SYMBOL(brioctl_set);
773
4a3e2f71 774static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 775static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 776
89bddce5 777void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 778{
4a3e2f71 779 mutex_lock(&vlan_ioctl_mutex);
1da177e4 780 vlan_ioctl_hook = hook;
4a3e2f71 781 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 782}
89bddce5 783
1da177e4
LT
784EXPORT_SYMBOL(vlan_ioctl_set);
785
4a3e2f71 786static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 787static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 788
89bddce5 789void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 790{
4a3e2f71 791 mutex_lock(&dlci_ioctl_mutex);
1da177e4 792 dlci_ioctl_hook = hook;
4a3e2f71 793 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 794}
89bddce5 795
1da177e4
LT
796EXPORT_SYMBOL(dlci_ioctl_set);
797
798/*
799 * With an ioctl, arg may well be a user mode pointer, but we don't know
800 * what to do with it - that's up to the protocol still.
801 */
802
803static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
804{
805 struct socket *sock;
806 void __user *argp = (void __user *)arg;
807 int pid, err;
808
b69aee04 809 sock = file->private_data;
1da177e4
LT
810 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
811 err = dev_ioctl(cmd, argp);
812 } else
d86b5e0e 813#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
814 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
815 err = dev_ioctl(cmd, argp);
816 } else
89bddce5
SH
817#endif /* CONFIG_WIRELESS_EXT */
818 switch (cmd) {
1da177e4
LT
819 case FIOSETOWN:
820 case SIOCSPGRP:
821 err = -EFAULT;
822 if (get_user(pid, (int __user *)argp))
823 break;
824 err = f_setown(sock->file, pid, 1);
825 break;
826 case FIOGETOWN:
827 case SIOCGPGRP:
89bddce5
SH
828 err = put_user(sock->file->f_owner.pid,
829 (int __user *)argp);
1da177e4
LT
830 break;
831 case SIOCGIFBR:
832 case SIOCSIFBR:
833 case SIOCBRADDBR:
834 case SIOCBRDELBR:
835 err = -ENOPKG;
836 if (!br_ioctl_hook)
837 request_module("bridge");
838
4a3e2f71 839 mutex_lock(&br_ioctl_mutex);
89bddce5 840 if (br_ioctl_hook)
1da177e4 841 err = br_ioctl_hook(cmd, argp);
4a3e2f71 842 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
843 break;
844 case SIOCGIFVLAN:
845 case SIOCSIFVLAN:
846 err = -ENOPKG;
847 if (!vlan_ioctl_hook)
848 request_module("8021q");
849
4a3e2f71 850 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
851 if (vlan_ioctl_hook)
852 err = vlan_ioctl_hook(argp);
4a3e2f71 853 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
854 break;
855 case SIOCGIFDIVERT:
856 case SIOCSIFDIVERT:
89bddce5 857 /* Convert this to call through a hook */
1da177e4
LT
858 err = divert_ioctl(cmd, argp);
859 break;
860 case SIOCADDDLCI:
861 case SIOCDELDLCI:
862 err = -ENOPKG;
863 if (!dlci_ioctl_hook)
864 request_module("dlci");
865
866 if (dlci_ioctl_hook) {
4a3e2f71 867 mutex_lock(&dlci_ioctl_mutex);
1da177e4 868 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 869 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
870 }
871 break;
872 default:
873 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
874
875 /*
876 * If this ioctl is unknown try to hand it down
877 * to the NIC driver.
878 */
879 if (err == -ENOIOCTLCMD)
880 err = dev_ioctl(cmd, argp);
1da177e4 881 break;
89bddce5 882 }
1da177e4
LT
883 return err;
884}
885
886int sock_create_lite(int family, int type, int protocol, struct socket **res)
887{
888 int err;
889 struct socket *sock = NULL;
89bddce5 890
1da177e4
LT
891 err = security_socket_create(family, type, protocol, 1);
892 if (err)
893 goto out;
894
895 sock = sock_alloc();
896 if (!sock) {
897 err = -ENOMEM;
898 goto out;
899 }
900
1da177e4 901 sock->type = type;
7420ed23
VY
902 err = security_socket_post_create(sock, family, type, protocol, 1);
903 if (err)
904 goto out_release;
905
1da177e4
LT
906out:
907 *res = sock;
908 return err;
7420ed23
VY
909out_release:
910 sock_release(sock);
911 sock = NULL;
912 goto out;
1da177e4
LT
913}
914
915/* No kernel lock held - perfect */
89bddce5 916static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
917{
918 struct socket *sock;
919
920 /*
89bddce5 921 * We can't return errors to poll, so it's either yes or no.
1da177e4 922 */
b69aee04 923 sock = file->private_data;
1da177e4
LT
924 return sock->ops->poll(file, sock, wait);
925}
926
89bddce5 927static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 928{
b69aee04 929 struct socket *sock = file->private_data;
1da177e4
LT
930
931 return sock->ops->mmap(file, sock, vma);
932}
933
20380731 934static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
935{
936 /*
89bddce5
SH
937 * It was possible the inode is NULL we were
938 * closing an unfinished socket.
1da177e4
LT
939 */
940
89bddce5 941 if (!inode) {
1da177e4
LT
942 printk(KERN_DEBUG "sock_close: NULL inode\n");
943 return 0;
944 }
945 sock_fasync(-1, filp, 0);
946 sock_release(SOCKET_I(inode));
947 return 0;
948}
949
950/*
951 * Update the socket async list
952 *
953 * Fasync_list locking strategy.
954 *
955 * 1. fasync_list is modified only under process context socket lock
956 * i.e. under semaphore.
957 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
958 * or under socket lock.
959 * 3. fasync_list can be used from softirq context, so that
960 * modification under socket lock have to be enhanced with
961 * write_lock_bh(&sk->sk_callback_lock).
962 * --ANK (990710)
963 */
964
965static int sock_fasync(int fd, struct file *filp, int on)
966{
89bddce5 967 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
968 struct socket *sock;
969 struct sock *sk;
970
89bddce5 971 if (on) {
8b3a7005 972 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 973 if (fna == NULL)
1da177e4
LT
974 return -ENOMEM;
975 }
976
b69aee04 977 sock = filp->private_data;
1da177e4 978
89bddce5
SH
979 sk = sock->sk;
980 if (sk == NULL) {
1da177e4
LT
981 kfree(fna);
982 return -EINVAL;
983 }
984
985 lock_sock(sk);
986
89bddce5 987 prev = &(sock->fasync_list);
1da177e4 988
89bddce5
SH
989 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
990 if (fa->fa_file == filp)
1da177e4
LT
991 break;
992
89bddce5
SH
993 if (on) {
994 if (fa != NULL) {
1da177e4 995 write_lock_bh(&sk->sk_callback_lock);
89bddce5 996 fa->fa_fd = fd;
1da177e4
LT
997 write_unlock_bh(&sk->sk_callback_lock);
998
999 kfree(fna);
1000 goto out;
1001 }
89bddce5
SH
1002 fna->fa_file = filp;
1003 fna->fa_fd = fd;
1004 fna->magic = FASYNC_MAGIC;
1005 fna->fa_next = sock->fasync_list;
1da177e4 1006 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1007 sock->fasync_list = fna;
1da177e4 1008 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1009 } else {
1010 if (fa != NULL) {
1da177e4 1011 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1012 *prev = fa->fa_next;
1da177e4
LT
1013 write_unlock_bh(&sk->sk_callback_lock);
1014 kfree(fa);
1015 }
1016 }
1017
1018out:
1019 release_sock(sock->sk);
1020 return 0;
1021}
1022
1023/* This function may be called only under socket lock or callback_lock */
1024
1025int sock_wake_async(struct socket *sock, int how, int band)
1026{
1027 if (!sock || !sock->fasync_list)
1028 return -1;
89bddce5 1029 switch (how) {
1da177e4 1030 case 1:
89bddce5 1031
1da177e4
LT
1032 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1033 break;
1034 goto call_kill;
1035 case 2:
1036 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1037 break;
1038 /* fall through */
1039 case 0:
89bddce5 1040call_kill:
1da177e4
LT
1041 __kill_fasync(sock->fasync_list, SIGIO, band);
1042 break;
1043 case 3:
1044 __kill_fasync(sock->fasync_list, SIGURG, band);
1045 }
1046 return 0;
1047}
1048
89bddce5
SH
1049static int __sock_create(int family, int type, int protocol,
1050 struct socket **res, int kern)
1da177e4
LT
1051{
1052 int err;
1053 struct socket *sock;
55737fda 1054 const struct net_proto_family *pf;
1da177e4
LT
1055
1056 /*
89bddce5 1057 * Check protocol is in range
1da177e4
LT
1058 */
1059 if (family < 0 || family >= NPROTO)
1060 return -EAFNOSUPPORT;
1061 if (type < 0 || type >= SOCK_MAX)
1062 return -EINVAL;
1063
1064 /* Compatibility.
1065
1066 This uglymoron is moved from INET layer to here to avoid
1067 deadlock in module load.
1068 */
1069 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1070 static int warned;
1da177e4
LT
1071 if (!warned) {
1072 warned = 1;
89bddce5
SH
1073 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1074 current->comm);
1da177e4
LT
1075 }
1076 family = PF_PACKET;
1077 }
1078
1079 err = security_socket_create(family, type, protocol, kern);
1080 if (err)
1081 return err;
89bddce5 1082
55737fda
SH
1083 /*
1084 * Allocate the socket and allow the family to set things up. if
1085 * the protocol is 0, the family is instructed to select an appropriate
1086 * default.
1087 */
1088 sock = sock_alloc();
1089 if (!sock) {
1090 if (net_ratelimit())
1091 printk(KERN_WARNING "socket: no more sockets\n");
1092 return -ENFILE; /* Not exactly a match, but its the
1093 closest posix thing */
1094 }
1095
1096 sock->type = type;
1097
1da177e4 1098#if defined(CONFIG_KMOD)
89bddce5
SH
1099 /* Attempt to load a protocol module if the find failed.
1100 *
1101 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1102 * requested real, full-featured networking support upon configuration.
1103 * Otherwise module support will break!
1104 */
55737fda 1105 if (net_families[family] == NULL)
89bddce5 1106 request_module("net-pf-%d", family);
1da177e4
LT
1107#endif
1108
55737fda
SH
1109 rcu_read_lock();
1110 pf = rcu_dereference(net_families[family]);
1111 err = -EAFNOSUPPORT;
1112 if (!pf)
1113 goto out_release;
1da177e4
LT
1114
1115 /*
1116 * We will call the ->create function, that possibly is in a loadable
1117 * module, so we have to bump that loadable module refcnt first.
1118 */
55737fda 1119 if (!try_module_get(pf->owner))
1da177e4
LT
1120 goto out_release;
1121
55737fda
SH
1122 /* Now protected by module ref count */
1123 rcu_read_unlock();
1124
1125 err = pf->create(sock, protocol);
1126 if (err < 0)
1da177e4 1127 goto out_module_put;
a79af59e 1128
1da177e4
LT
1129 /*
1130 * Now to bump the refcnt of the [loadable] module that owns this
1131 * socket at sock_release time we decrement its refcnt.
1132 */
55737fda
SH
1133 if (!try_module_get(sock->ops->owner))
1134 goto out_module_busy;
1135
1da177e4
LT
1136 /*
1137 * Now that we're done with the ->create function, the [loadable]
1138 * module can have its refcnt decremented
1139 */
55737fda 1140 module_put(pf->owner);
7420ed23
VY
1141 err = security_socket_post_create(sock, family, type, protocol, kern);
1142 if (err)
1143 goto out_release;
55737fda 1144 *res = sock;
1da177e4 1145
55737fda
SH
1146 return 0;
1147
1148out_module_busy:
1149 err = -EAFNOSUPPORT;
1da177e4 1150out_module_put:
55737fda
SH
1151 sock->ops = NULL;
1152 module_put(pf->owner);
1153out_sock_release:
1da177e4 1154 sock_release(sock);
55737fda
SH
1155 return err;
1156
1157out_release:
1158 rcu_read_unlock();
1159 goto out_sock_release;
1da177e4
LT
1160}
1161
1162int sock_create(int family, int type, int protocol, struct socket **res)
1163{
1164 return __sock_create(family, type, protocol, res, 0);
1165}
1166
1167int sock_create_kern(int family, int type, int protocol, struct socket **res)
1168{
1169 return __sock_create(family, type, protocol, res, 1);
1170}
1171
1172asmlinkage long sys_socket(int family, int type, int protocol)
1173{
1174 int retval;
1175 struct socket *sock;
1176
1177 retval = sock_create(family, type, protocol, &sock);
1178 if (retval < 0)
1179 goto out;
1180
1181 retval = sock_map_fd(sock);
1182 if (retval < 0)
1183 goto out_release;
1184
1185out:
1186 /* It may be already another descriptor 8) Not kernel problem. */
1187 return retval;
1188
1189out_release:
1190 sock_release(sock);
1191 return retval;
1192}
1193
1194/*
1195 * Create a pair of connected sockets.
1196 */
1197
89bddce5
SH
1198asmlinkage long sys_socketpair(int family, int type, int protocol,
1199 int __user *usockvec)
1da177e4
LT
1200{
1201 struct socket *sock1, *sock2;
1202 int fd1, fd2, err;
1203
1204 /*
1205 * Obtain the first socket and check if the underlying protocol
1206 * supports the socketpair call.
1207 */
1208
1209 err = sock_create(family, type, protocol, &sock1);
1210 if (err < 0)
1211 goto out;
1212
1213 err = sock_create(family, type, protocol, &sock2);
1214 if (err < 0)
1215 goto out_release_1;
1216
1217 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1218 if (err < 0)
1da177e4
LT
1219 goto out_release_both;
1220
1221 fd1 = fd2 = -1;
1222
1223 err = sock_map_fd(sock1);
1224 if (err < 0)
1225 goto out_release_both;
1226 fd1 = err;
1227
1228 err = sock_map_fd(sock2);
1229 if (err < 0)
1230 goto out_close_1;
1231 fd2 = err;
1232
1233 /* fd1 and fd2 may be already another descriptors.
1234 * Not kernel problem.
1235 */
1236
89bddce5 1237 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1238 if (!err)
1239 err = put_user(fd2, &usockvec[1]);
1240 if (!err)
1241 return 0;
1242
1243 sys_close(fd2);
1244 sys_close(fd1);
1245 return err;
1246
1247out_close_1:
89bddce5 1248 sock_release(sock2);
1da177e4
LT
1249 sys_close(fd1);
1250 return err;
1251
1252out_release_both:
89bddce5 1253 sock_release(sock2);
1da177e4 1254out_release_1:
89bddce5 1255 sock_release(sock1);
1da177e4
LT
1256out:
1257 return err;
1258}
1259
1da177e4
LT
1260/*
1261 * Bind a name to a socket. Nothing much to do here since it's
1262 * the protocol's responsibility to handle the local address.
1263 *
1264 * We move the socket address to kernel space before we call
1265 * the protocol layer (having also checked the address is ok).
1266 */
1267
1268asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1269{
1270 struct socket *sock;
1271 char address[MAX_SOCK_ADDR];
6cb153ca 1272 int err, fput_needed;
1da177e4 1273
89bddce5
SH
1274 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1275 if(sock) {
1276 err = move_addr_to_kernel(umyaddr, addrlen, address);
1277 if (err >= 0) {
1278 err = security_socket_bind(sock,
1279 (struct sockaddr *)address,
1280 addrlen);
6cb153ca
BL
1281 if (!err)
1282 err = sock->ops->bind(sock,
89bddce5
SH
1283 (struct sockaddr *)
1284 address, addrlen);
1da177e4 1285 }
6cb153ca 1286 fput_light(sock->file, fput_needed);
89bddce5 1287 }
1da177e4
LT
1288 return err;
1289}
1290
1da177e4
LT
1291/*
1292 * Perform a listen. Basically, we allow the protocol to do anything
1293 * necessary for a listen, and if that works, we mark the socket as
1294 * ready for listening.
1295 */
1296
7a42c217 1297int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1298
1299asmlinkage long sys_listen(int fd, int backlog)
1300{
1301 struct socket *sock;
6cb153ca 1302 int err, fput_needed;
89bddce5
SH
1303
1304 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1305 if (sock) {
1306 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1307 backlog = sysctl_somaxconn;
1308
1309 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1310 if (!err)
1311 err = sock->ops->listen(sock, backlog);
1da177e4 1312
6cb153ca 1313 fput_light(sock->file, fput_needed);
1da177e4
LT
1314 }
1315 return err;
1316}
1317
1da177e4
LT
1318/*
1319 * For accept, we attempt to create a new socket, set up the link
1320 * with the client, wake up the client, then return the new
1321 * connected fd. We collect the address of the connector in kernel
1322 * space and move it to user at the very end. This is unclean because
1323 * we open the socket then return an error.
1324 *
1325 * 1003.1g adds the ability to recvmsg() to query connection pending
1326 * status to recvmsg. We need to add that support in a way thats
1327 * clean when we restucture accept also.
1328 */
1329
89bddce5
SH
1330asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1331 int __user *upeer_addrlen)
1da177e4
LT
1332{
1333 struct socket *sock, *newsock;
39d8c1b6 1334 struct file *newfile;
6cb153ca 1335 int err, len, newfd, fput_needed;
1da177e4
LT
1336 char address[MAX_SOCK_ADDR];
1337
6cb153ca 1338 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1339 if (!sock)
1340 goto out;
1341
1342 err = -ENFILE;
89bddce5 1343 if (!(newsock = sock_alloc()))
1da177e4
LT
1344 goto out_put;
1345
1346 newsock->type = sock->type;
1347 newsock->ops = sock->ops;
1348
1da177e4
LT
1349 /*
1350 * We don't need try_module_get here, as the listening socket (sock)
1351 * has the protocol module (sock->ops->owner) held.
1352 */
1353 __module_get(newsock->ops->owner);
1354
39d8c1b6
DM
1355 newfd = sock_alloc_fd(&newfile);
1356 if (unlikely(newfd < 0)) {
1357 err = newfd;
9a1875e6
DM
1358 sock_release(newsock);
1359 goto out_put;
39d8c1b6
DM
1360 }
1361
1362 err = sock_attach_fd(newsock, newfile);
1363 if (err < 0)
1364 goto out_fd;
1365
a79af59e
FF
1366 err = security_socket_accept(sock, newsock);
1367 if (err)
39d8c1b6 1368 goto out_fd;
a79af59e 1369
1da177e4
LT
1370 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1371 if (err < 0)
39d8c1b6 1372 goto out_fd;
1da177e4
LT
1373
1374 if (upeer_sockaddr) {
89bddce5
SH
1375 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1376 &len, 2) < 0) {
1da177e4 1377 err = -ECONNABORTED;
39d8c1b6 1378 goto out_fd;
1da177e4 1379 }
89bddce5
SH
1380 err = move_addr_to_user(address, len, upeer_sockaddr,
1381 upeer_addrlen);
1da177e4 1382 if (err < 0)
39d8c1b6 1383 goto out_fd;
1da177e4
LT
1384 }
1385
1386 /* File flags are not inherited via accept() unlike another OSes. */
1387
39d8c1b6
DM
1388 fd_install(newfd, newfile);
1389 err = newfd;
1da177e4
LT
1390
1391 security_socket_post_accept(sock, newsock);
1392
1393out_put:
6cb153ca 1394 fput_light(sock->file, fput_needed);
1da177e4
LT
1395out:
1396 return err;
39d8c1b6 1397out_fd:
9606a216 1398 fput(newfile);
39d8c1b6 1399 put_unused_fd(newfd);
1da177e4
LT
1400 goto out_put;
1401}
1402
1da177e4
LT
1403/*
1404 * Attempt to connect to a socket with the server address. The address
1405 * is in user space so we verify it is OK and move it to kernel space.
1406 *
1407 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1408 * break bindings
1409 *
1410 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1411 * other SEQPACKET protocols that take time to connect() as it doesn't
1412 * include the -EINPROGRESS status for such sockets.
1413 */
1414
89bddce5
SH
1415asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1416 int addrlen)
1da177e4
LT
1417{
1418 struct socket *sock;
1419 char address[MAX_SOCK_ADDR];
6cb153ca 1420 int err, fput_needed;
1da177e4 1421
6cb153ca 1422 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1423 if (!sock)
1424 goto out;
1425 err = move_addr_to_kernel(uservaddr, addrlen, address);
1426 if (err < 0)
1427 goto out_put;
1428
89bddce5
SH
1429 err =
1430 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1431 if (err)
1432 goto out_put;
1433
89bddce5 1434 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1435 sock->file->f_flags);
1436out_put:
6cb153ca 1437 fput_light(sock->file, fput_needed);
1da177e4
LT
1438out:
1439 return err;
1440}
1441
1442/*
1443 * Get the local address ('name') of a socket object. Move the obtained
1444 * name to user space.
1445 */
1446
89bddce5
SH
1447asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1448 int __user *usockaddr_len)
1da177e4
LT
1449{
1450 struct socket *sock;
1451 char address[MAX_SOCK_ADDR];
6cb153ca 1452 int len, err, fput_needed;
89bddce5 1453
6cb153ca 1454 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1455 if (!sock)
1456 goto out;
1457
1458 err = security_socket_getsockname(sock);
1459 if (err)
1460 goto out_put;
1461
1462 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1463 if (err)
1464 goto out_put;
1465 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1466
1467out_put:
6cb153ca 1468 fput_light(sock->file, fput_needed);
1da177e4
LT
1469out:
1470 return err;
1471}
1472
1473/*
1474 * Get the remote address ('name') of a socket object. Move the obtained
1475 * name to user space.
1476 */
1477
89bddce5
SH
1478asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1479 int __user *usockaddr_len)
1da177e4
LT
1480{
1481 struct socket *sock;
1482 char address[MAX_SOCK_ADDR];
6cb153ca 1483 int len, err, fput_needed;
1da177e4 1484
89bddce5
SH
1485 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1486 if (sock != NULL) {
1da177e4
LT
1487 err = security_socket_getpeername(sock);
1488 if (err) {
6cb153ca 1489 fput_light(sock->file, fput_needed);
1da177e4
LT
1490 return err;
1491 }
1492
89bddce5
SH
1493 err =
1494 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1495 1);
1da177e4 1496 if (!err)
89bddce5
SH
1497 err = move_addr_to_user(address, len, usockaddr,
1498 usockaddr_len);
6cb153ca 1499 fput_light(sock->file, fput_needed);
1da177e4
LT
1500 }
1501 return err;
1502}
1503
1504/*
1505 * Send a datagram to a given address. We move the address into kernel
1506 * space and check the user space data area is readable before invoking
1507 * the protocol.
1508 */
1509
89bddce5
SH
1510asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1511 unsigned flags, struct sockaddr __user *addr,
1512 int addr_len)
1da177e4
LT
1513{
1514 struct socket *sock;
1515 char address[MAX_SOCK_ADDR];
1516 int err;
1517 struct msghdr msg;
1518 struct iovec iov;
6cb153ca
BL
1519 int fput_needed;
1520 struct file *sock_file;
1521
1522 sock_file = fget_light(fd, &fput_needed);
1523 if (!sock_file)
1524 return -EBADF;
1525
1526 sock = sock_from_file(sock_file, &err);
1da177e4 1527 if (!sock)
6cb153ca 1528 goto out_put;
89bddce5
SH
1529 iov.iov_base = buff;
1530 iov.iov_len = len;
1531 msg.msg_name = NULL;
1532 msg.msg_iov = &iov;
1533 msg.msg_iovlen = 1;
1534 msg.msg_control = NULL;
1535 msg.msg_controllen = 0;
1536 msg.msg_namelen = 0;
6cb153ca 1537 if (addr) {
1da177e4
LT
1538 err = move_addr_to_kernel(addr, addr_len, address);
1539 if (err < 0)
1540 goto out_put;
89bddce5
SH
1541 msg.msg_name = address;
1542 msg.msg_namelen = addr_len;
1da177e4
LT
1543 }
1544 if (sock->file->f_flags & O_NONBLOCK)
1545 flags |= MSG_DONTWAIT;
1546 msg.msg_flags = flags;
1547 err = sock_sendmsg(sock, &msg, len);
1548
89bddce5 1549out_put:
6cb153ca 1550 fput_light(sock_file, fput_needed);
1da177e4
LT
1551 return err;
1552}
1553
1554/*
89bddce5 1555 * Send a datagram down a socket.
1da177e4
LT
1556 */
1557
89bddce5 1558asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1559{
1560 return sys_sendto(fd, buff, len, flags, NULL, 0);
1561}
1562
1563/*
89bddce5 1564 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1565 * sender. We verify the buffers are writable and if needed move the
1566 * sender address from kernel to user space.
1567 */
1568
89bddce5
SH
1569asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1570 unsigned flags, struct sockaddr __user *addr,
1571 int __user *addr_len)
1da177e4
LT
1572{
1573 struct socket *sock;
1574 struct iovec iov;
1575 struct msghdr msg;
1576 char address[MAX_SOCK_ADDR];
89bddce5 1577 int err, err2;
6cb153ca
BL
1578 struct file *sock_file;
1579 int fput_needed;
1580
1581 sock_file = fget_light(fd, &fput_needed);
1582 if (!sock_file)
1583 return -EBADF;
1da177e4 1584
6cb153ca 1585 sock = sock_from_file(sock_file, &err);
1da177e4
LT
1586 if (!sock)
1587 goto out;
1588
89bddce5
SH
1589 msg.msg_control = NULL;
1590 msg.msg_controllen = 0;
1591 msg.msg_iovlen = 1;
1592 msg.msg_iov = &iov;
1593 iov.iov_len = size;
1594 iov.iov_base = ubuf;
1595 msg.msg_name = address;
1596 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1597 if (sock->file->f_flags & O_NONBLOCK)
1598 flags |= MSG_DONTWAIT;
89bddce5 1599 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1600
89bddce5
SH
1601 if (err >= 0 && addr != NULL) {
1602 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1603 if (err2 < 0)
1604 err = err2;
1da177e4 1605 }
1da177e4 1606out:
6cb153ca 1607 fput_light(sock_file, fput_needed);
1da177e4
LT
1608 return err;
1609}
1610
1611/*
89bddce5 1612 * Receive a datagram from a socket.
1da177e4
LT
1613 */
1614
89bddce5
SH
1615asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1616 unsigned flags)
1da177e4
LT
1617{
1618 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1619}
1620
1621/*
1622 * Set a socket option. Because we don't know the option lengths we have
1623 * to pass the user mode parameter for the protocols to sort out.
1624 */
1625
89bddce5
SH
1626asmlinkage long sys_setsockopt(int fd, int level, int optname,
1627 char __user *optval, int optlen)
1da177e4 1628{
6cb153ca 1629 int err, fput_needed;
1da177e4
LT
1630 struct socket *sock;
1631
1632 if (optlen < 0)
1633 return -EINVAL;
89bddce5
SH
1634
1635 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1636 if (sock != NULL) {
1637 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1638 if (err)
1639 goto out_put;
1da177e4
LT
1640
1641 if (level == SOL_SOCKET)
89bddce5
SH
1642 err =
1643 sock_setsockopt(sock, level, optname, optval,
1644 optlen);
1da177e4 1645 else
89bddce5
SH
1646 err =
1647 sock->ops->setsockopt(sock, level, optname, optval,
1648 optlen);
6cb153ca
BL
1649out_put:
1650 fput_light(sock->file, fput_needed);
1da177e4
LT
1651 }
1652 return err;
1653}
1654
1655/*
1656 * Get a socket option. Because we don't know the option lengths we have
1657 * to pass a user mode parameter for the protocols to sort out.
1658 */
1659
89bddce5
SH
1660asmlinkage long sys_getsockopt(int fd, int level, int optname,
1661 char __user *optval, int __user *optlen)
1da177e4 1662{
6cb153ca 1663 int err, fput_needed;
1da177e4
LT
1664 struct socket *sock;
1665
89bddce5
SH
1666 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1667 if (sock != NULL) {
6cb153ca
BL
1668 err = security_socket_getsockopt(sock, level, optname);
1669 if (err)
1670 goto out_put;
1da177e4
LT
1671
1672 if (level == SOL_SOCKET)
89bddce5
SH
1673 err =
1674 sock_getsockopt(sock, level, optname, optval,
1675 optlen);
1da177e4 1676 else
89bddce5
SH
1677 err =
1678 sock->ops->getsockopt(sock, level, optname, optval,
1679 optlen);
6cb153ca
BL
1680out_put:
1681 fput_light(sock->file, fput_needed);
1da177e4
LT
1682 }
1683 return err;
1684}
1685
1da177e4
LT
1686/*
1687 * Shutdown a socket.
1688 */
1689
1690asmlinkage long sys_shutdown(int fd, int how)
1691{
6cb153ca 1692 int err, fput_needed;
1da177e4
LT
1693 struct socket *sock;
1694
89bddce5
SH
1695 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1696 if (sock != NULL) {
1da177e4 1697 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1698 if (!err)
1699 err = sock->ops->shutdown(sock, how);
1700 fput_light(sock->file, fput_needed);
1da177e4
LT
1701 }
1702 return err;
1703}
1704
89bddce5 1705/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1706 * fields which are the same type (int / unsigned) on our platforms.
1707 */
1708#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1709#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1710#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1711
1da177e4
LT
1712/*
1713 * BSD sendmsg interface
1714 */
1715
1716asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1717{
89bddce5
SH
1718 struct compat_msghdr __user *msg_compat =
1719 (struct compat_msghdr __user *)msg;
1da177e4
LT
1720 struct socket *sock;
1721 char address[MAX_SOCK_ADDR];
1722 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1723 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1724 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1725 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1726 unsigned char *ctl_buf = ctl;
1727 struct msghdr msg_sys;
1728 int err, ctl_len, iov_size, total_len;
6cb153ca 1729 int fput_needed;
89bddce5 1730
1da177e4
LT
1731 err = -EFAULT;
1732 if (MSG_CMSG_COMPAT & flags) {
1733 if (get_compat_msghdr(&msg_sys, msg_compat))
1734 return -EFAULT;
89bddce5
SH
1735 }
1736 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1737 return -EFAULT;
1738
6cb153ca 1739 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1740 if (!sock)
1da177e4
LT
1741 goto out;
1742
1743 /* do not move before msg_sys is valid */
1744 err = -EMSGSIZE;
1745 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1746 goto out_put;
1747
89bddce5 1748 /* Check whether to allocate the iovec area */
1da177e4
LT
1749 err = -ENOMEM;
1750 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1751 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1752 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1753 if (!iov)
1754 goto out_put;
1755 }
1756
1757 /* This will also move the address data into kernel space */
1758 if (MSG_CMSG_COMPAT & flags) {
1759 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1760 } else
1761 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1762 if (err < 0)
1da177e4
LT
1763 goto out_freeiov;
1764 total_len = err;
1765
1766 err = -ENOBUFS;
1767
1768 if (msg_sys.msg_controllen > INT_MAX)
1769 goto out_freeiov;
89bddce5 1770 ctl_len = msg_sys.msg_controllen;
1da177e4 1771 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1772 err =
1773 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1774 sizeof(ctl));
1da177e4
LT
1775 if (err)
1776 goto out_freeiov;
1777 ctl_buf = msg_sys.msg_control;
8920e8f9 1778 ctl_len = msg_sys.msg_controllen;
1da177e4 1779 } else if (ctl_len) {
89bddce5 1780 if (ctl_len > sizeof(ctl)) {
1da177e4 1781 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1782 if (ctl_buf == NULL)
1da177e4
LT
1783 goto out_freeiov;
1784 }
1785 err = -EFAULT;
1786 /*
1787 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1788 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1789 * checking falls down on this.
1790 */
89bddce5
SH
1791 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1792 ctl_len))
1da177e4
LT
1793 goto out_freectl;
1794 msg_sys.msg_control = ctl_buf;
1795 }
1796 msg_sys.msg_flags = flags;
1797
1798 if (sock->file->f_flags & O_NONBLOCK)
1799 msg_sys.msg_flags |= MSG_DONTWAIT;
1800 err = sock_sendmsg(sock, &msg_sys, total_len);
1801
1802out_freectl:
89bddce5 1803 if (ctl_buf != ctl)
1da177e4
LT
1804 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1805out_freeiov:
1806 if (iov != iovstack)
1807 sock_kfree_s(sock->sk, iov, iov_size);
1808out_put:
6cb153ca 1809 fput_light(sock->file, fput_needed);
89bddce5 1810out:
1da177e4
LT
1811 return err;
1812}
1813
1814/*
1815 * BSD recvmsg interface
1816 */
1817
89bddce5
SH
1818asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1819 unsigned int flags)
1da177e4 1820{
89bddce5
SH
1821 struct compat_msghdr __user *msg_compat =
1822 (struct compat_msghdr __user *)msg;
1da177e4
LT
1823 struct socket *sock;
1824 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1825 struct iovec *iov = iovstack;
1da177e4
LT
1826 struct msghdr msg_sys;
1827 unsigned long cmsg_ptr;
1828 int err, iov_size, total_len, len;
6cb153ca 1829 int fput_needed;
1da177e4
LT
1830
1831 /* kernel mode address */
1832 char addr[MAX_SOCK_ADDR];
1833
1834 /* user mode address pointers */
1835 struct sockaddr __user *uaddr;
1836 int __user *uaddr_len;
89bddce5 1837
1da177e4
LT
1838 if (MSG_CMSG_COMPAT & flags) {
1839 if (get_compat_msghdr(&msg_sys, msg_compat))
1840 return -EFAULT;
89bddce5
SH
1841 }
1842 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1843 return -EFAULT;
1da177e4 1844
6cb153ca 1845 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1846 if (!sock)
1847 goto out;
1848
1849 err = -EMSGSIZE;
1850 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1851 goto out_put;
89bddce5
SH
1852
1853 /* Check whether to allocate the iovec area */
1da177e4
LT
1854 err = -ENOMEM;
1855 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1856 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1857 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1858 if (!iov)
1859 goto out_put;
1860 }
1861
1862 /*
89bddce5
SH
1863 * Save the user-mode address (verify_iovec will change the
1864 * kernel msghdr to use the kernel address space)
1da177e4 1865 */
89bddce5
SH
1866
1867 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1868 uaddr_len = COMPAT_NAMELEN(msg);
1869 if (MSG_CMSG_COMPAT & flags) {
1870 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1871 } else
1872 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1873 if (err < 0)
1874 goto out_freeiov;
89bddce5 1875 total_len = err;
1da177e4
LT
1876
1877 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1878 msg_sys.msg_flags = 0;
1879 if (MSG_CMSG_COMPAT & flags)
1880 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1881
1da177e4
LT
1882 if (sock->file->f_flags & O_NONBLOCK)
1883 flags |= MSG_DONTWAIT;
1884 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1885 if (err < 0)
1886 goto out_freeiov;
1887 len = err;
1888
1889 if (uaddr != NULL) {
89bddce5
SH
1890 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1891 uaddr_len);
1da177e4
LT
1892 if (err < 0)
1893 goto out_freeiov;
1894 }
37f7f421
DM
1895 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1896 COMPAT_FLAGS(msg));
1da177e4
LT
1897 if (err)
1898 goto out_freeiov;
1899 if (MSG_CMSG_COMPAT & flags)
89bddce5 1900 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1901 &msg_compat->msg_controllen);
1902 else
89bddce5 1903 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1904 &msg->msg_controllen);
1905 if (err)
1906 goto out_freeiov;
1907 err = len;
1908
1909out_freeiov:
1910 if (iov != iovstack)
1911 sock_kfree_s(sock->sk, iov, iov_size);
1912out_put:
6cb153ca 1913 fput_light(sock->file, fput_needed);
1da177e4
LT
1914out:
1915 return err;
1916}
1917
1918#ifdef __ARCH_WANT_SYS_SOCKETCALL
1919
1920/* Argument list sizes for sys_socketcall */
1921#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1922static const unsigned char nargs[18]={
1923 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1924 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1925 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1926};
1927
1da177e4
LT
1928#undef AL
1929
1930/*
89bddce5 1931 * System call vectors.
1da177e4
LT
1932 *
1933 * Argument checking cleaned up. Saved 20% in size.
1934 * This function doesn't need to set the kernel lock because
89bddce5 1935 * it is set by the callees.
1da177e4
LT
1936 */
1937
1938asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1939{
1940 unsigned long a[6];
89bddce5 1941 unsigned long a0, a1;
1da177e4
LT
1942 int err;
1943
89bddce5 1944 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
1945 return -EINVAL;
1946
1947 /* copy_from_user should be SMP safe. */
1948 if (copy_from_user(a, args, nargs[call]))
1949 return -EFAULT;
3ec3b2fb 1950
89bddce5 1951 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
1952 if (err)
1953 return err;
1954
89bddce5
SH
1955 a0 = a[0];
1956 a1 = a[1];
1957
1958 switch (call) {
1959 case SYS_SOCKET:
1960 err = sys_socket(a0, a1, a[2]);
1961 break;
1962 case SYS_BIND:
1963 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
1964 break;
1965 case SYS_CONNECT:
1966 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
1967 break;
1968 case SYS_LISTEN:
1969 err = sys_listen(a0, a1);
1970 break;
1971 case SYS_ACCEPT:
1972 err =
1973 sys_accept(a0, (struct sockaddr __user *)a1,
1974 (int __user *)a[2]);
1975 break;
1976 case SYS_GETSOCKNAME:
1977 err =
1978 sys_getsockname(a0, (struct sockaddr __user *)a1,
1979 (int __user *)a[2]);
1980 break;
1981 case SYS_GETPEERNAME:
1982 err =
1983 sys_getpeername(a0, (struct sockaddr __user *)a1,
1984 (int __user *)a[2]);
1985 break;
1986 case SYS_SOCKETPAIR:
1987 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
1988 break;
1989 case SYS_SEND:
1990 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
1991 break;
1992 case SYS_SENDTO:
1993 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
1994 (struct sockaddr __user *)a[4], a[5]);
1995 break;
1996 case SYS_RECV:
1997 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
1998 break;
1999 case SYS_RECVFROM:
2000 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2001 (struct sockaddr __user *)a[4],
2002 (int __user *)a[5]);
2003 break;
2004 case SYS_SHUTDOWN:
2005 err = sys_shutdown(a0, a1);
2006 break;
2007 case SYS_SETSOCKOPT:
2008 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2009 break;
2010 case SYS_GETSOCKOPT:
2011 err =
2012 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2013 (int __user *)a[4]);
2014 break;
2015 case SYS_SENDMSG:
2016 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2017 break;
2018 case SYS_RECVMSG:
2019 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2020 break;
2021 default:
2022 err = -EINVAL;
2023 break;
1da177e4
LT
2024 }
2025 return err;
2026}
2027
89bddce5 2028#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2029
55737fda
SH
2030/**
2031 * sock_register - add a socket protocol handler
2032 * @ops: description of protocol
2033 *
1da177e4
LT
2034 * This function is called by a protocol handler that wants to
2035 * advertise its address family, and have it linked into the
55737fda
SH
2036 * socket interface. The value ops->family coresponds to the
2037 * socket system call protocol family.
1da177e4 2038 */
f0fd27d4 2039int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2040{
2041 int err;
2042
2043 if (ops->family >= NPROTO) {
89bddce5
SH
2044 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2045 NPROTO);
1da177e4
LT
2046 return -ENOBUFS;
2047 }
55737fda
SH
2048
2049 spin_lock(&net_family_lock);
2050 if (net_families[ops->family])
2051 err = -EEXIST;
2052 else {
89bddce5 2053 net_families[ops->family] = ops;
1da177e4
LT
2054 err = 0;
2055 }
55737fda
SH
2056 spin_unlock(&net_family_lock);
2057
89bddce5 2058 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2059 return err;
2060}
2061
55737fda
SH
2062/**
2063 * sock_unregister - remove a protocol handler
2064 * @family: protocol family to remove
2065 *
1da177e4
LT
2066 * This function is called by a protocol handler that wants to
2067 * remove its address family, and have it unlinked from the
55737fda
SH
2068 * new socket creation.
2069 *
2070 * If protocol handler is a module, then it can use module reference
2071 * counts to protect against new references. If protocol handler is not
2072 * a module then it needs to provide its own protection in
2073 * the ops->create routine.
1da177e4 2074 */
f0fd27d4 2075void sock_unregister(int family)
1da177e4 2076{
f0fd27d4 2077 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2078
55737fda 2079 spin_lock(&net_family_lock);
89bddce5 2080 net_families[family] = NULL;
55737fda
SH
2081 spin_unlock(&net_family_lock);
2082
2083 synchronize_rcu();
2084
89bddce5 2085 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2086}
2087
77d76ea3 2088static int __init sock_init(void)
1da177e4
LT
2089{
2090 /*
89bddce5 2091 * Initialize sock SLAB cache.
1da177e4 2092 */
89bddce5 2093
1da177e4
LT
2094 sk_init();
2095
1da177e4 2096 /*
89bddce5 2097 * Initialize skbuff SLAB cache
1da177e4
LT
2098 */
2099 skb_init();
1da177e4
LT
2100
2101 /*
89bddce5 2102 * Initialize the protocols module.
1da177e4
LT
2103 */
2104
2105 init_inodecache();
2106 register_filesystem(&sock_fs_type);
2107 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2108
2109 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2110 */
2111
2112#ifdef CONFIG_NETFILTER
2113 netfilter_init();
2114#endif
cbeb321a
DM
2115
2116 return 0;
1da177e4
LT
2117}
2118
77d76ea3
AK
2119core_initcall(sock_init); /* early initcall */
2120
1da177e4
LT
2121#ifdef CONFIG_PROC_FS
2122void socket_seq_show(struct seq_file *seq)
2123{
2124 int cpu;
2125 int counter = 0;
2126
6f912042 2127 for_each_possible_cpu(cpu)
89bddce5 2128 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2129
2130 /* It can be negative, by the way. 8) */
2131 if (counter < 0)
2132 counter = 0;
2133
2134 seq_printf(seq, "sockets: used %d\n", counter);
2135}
89bddce5 2136#endif /* CONFIG_PROC_FS */
1da177e4 2137
89bbfc95
SP
2138#ifdef CONFIG_COMPAT
2139static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2140 unsigned long arg)
89bbfc95
SP
2141{
2142 struct socket *sock = file->private_data;
2143 int ret = -ENOIOCTLCMD;
2144
2145 if (sock->ops->compat_ioctl)
2146 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2147
2148 return ret;
2149}
2150#endif
2151
ac5a488e
SS
2152int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2153{
2154 return sock->ops->bind(sock, addr, addrlen);
2155}
2156
2157int kernel_listen(struct socket *sock, int backlog)
2158{
2159 return sock->ops->listen(sock, backlog);
2160}
2161
2162int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2163{
2164 struct sock *sk = sock->sk;
2165 int err;
2166
2167 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2168 newsock);
2169 if (err < 0)
2170 goto done;
2171
2172 err = sock->ops->accept(sock, *newsock, flags);
2173 if (err < 0) {
2174 sock_release(*newsock);
2175 goto done;
2176 }
2177
2178 (*newsock)->ops = sock->ops;
2179
2180done:
2181 return err;
2182}
2183
2184int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2185 int flags)
2186{
2187 return sock->ops->connect(sock, addr, addrlen, flags);
2188}
2189
2190int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2191 int *addrlen)
2192{
2193 return sock->ops->getname(sock, addr, addrlen, 0);
2194}
2195
2196int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2197 int *addrlen)
2198{
2199 return sock->ops->getname(sock, addr, addrlen, 1);
2200}
2201
2202int kernel_getsockopt(struct socket *sock, int level, int optname,
2203 char *optval, int *optlen)
2204{
2205 mm_segment_t oldfs = get_fs();
2206 int err;
2207
2208 set_fs(KERNEL_DS);
2209 if (level == SOL_SOCKET)
2210 err = sock_getsockopt(sock, level, optname, optval, optlen);
2211 else
2212 err = sock->ops->getsockopt(sock, level, optname, optval,
2213 optlen);
2214 set_fs(oldfs);
2215 return err;
2216}
2217
2218int kernel_setsockopt(struct socket *sock, int level, int optname,
2219 char *optval, int optlen)
2220{
2221 mm_segment_t oldfs = get_fs();
2222 int err;
2223
2224 set_fs(KERNEL_DS);
2225 if (level == SOL_SOCKET)
2226 err = sock_setsockopt(sock, level, optname, optval, optlen);
2227 else
2228 err = sock->ops->setsockopt(sock, level, optname, optval,
2229 optlen);
2230 set_fs(oldfs);
2231 return err;
2232}
2233
2234int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2235 size_t size, int flags)
2236{
2237 if (sock->ops->sendpage)
2238 return sock->ops->sendpage(sock, page, offset, size, flags);
2239
2240 return sock_no_sendpage(sock, page, offset, size, flags);
2241}
2242
2243int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2244{
2245 mm_segment_t oldfs = get_fs();
2246 int err;
2247
2248 set_fs(KERNEL_DS);
2249 err = sock->ops->ioctl(sock, cmd, arg);
2250 set_fs(oldfs);
2251
2252 return err;
2253}
2254
1da177e4
LT
2255/* ABI emulation layers need these two */
2256EXPORT_SYMBOL(move_addr_to_kernel);
2257EXPORT_SYMBOL(move_addr_to_user);
2258EXPORT_SYMBOL(sock_create);
2259EXPORT_SYMBOL(sock_create_kern);
2260EXPORT_SYMBOL(sock_create_lite);
2261EXPORT_SYMBOL(sock_map_fd);
2262EXPORT_SYMBOL(sock_recvmsg);
2263EXPORT_SYMBOL(sock_register);
2264EXPORT_SYMBOL(sock_release);
2265EXPORT_SYMBOL(sock_sendmsg);
2266EXPORT_SYMBOL(sock_unregister);
2267EXPORT_SYMBOL(sock_wake_async);
2268EXPORT_SYMBOL(sockfd_lookup);
2269EXPORT_SYMBOL(kernel_sendmsg);
2270EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2271EXPORT_SYMBOL(kernel_bind);
2272EXPORT_SYMBOL(kernel_listen);
2273EXPORT_SYMBOL(kernel_accept);
2274EXPORT_SYMBOL(kernel_connect);
2275EXPORT_SYMBOL(kernel_getsockname);
2276EXPORT_SYMBOL(kernel_getpeername);
2277EXPORT_SYMBOL(kernel_getsockopt);
2278EXPORT_SYMBOL(kernel_setsockopt);
2279EXPORT_SYMBOL(kernel_sendpage);
2280EXPORT_SYMBOL(kernel_sock_ioctl);