[NET] XFRM: Fix whitespace errors.
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
120static struct file_operations socket_file_ops = {
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
89bddce5
SH
264 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
265 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
266 inode_init_once(&ei->vfs_inode);
267}
89bddce5 268
1da177e4
LT
269static int init_inodecache(void)
270{
271 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
272 sizeof(struct socket_alloc),
273 0,
274 (SLAB_HWCACHE_ALIGN |
275 SLAB_RECLAIM_ACCOUNT |
276 SLAB_MEM_SPREAD),
277 init_once,
278 NULL);
1da177e4
LT
279 if (sock_inode_cachep == NULL)
280 return -ENOMEM;
281 return 0;
282}
283
284static struct super_operations sockfs_ops = {
285 .alloc_inode = sock_alloc_inode,
286 .destroy_inode =sock_destroy_inode,
287 .statfs = simple_statfs,
288};
289
454e2398 290static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
291 int flags, const char *dev_name, void *data,
292 struct vfsmount *mnt)
1da177e4 293{
454e2398
DH
294 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
295 mnt);
1da177e4
LT
296}
297
ba89966c 298static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
299
300static struct file_system_type sock_fs_type = {
301 .name = "sockfs",
302 .get_sb = sockfs_get_sb,
303 .kill_sb = kill_anon_super,
304};
89bddce5 305
1da177e4
LT
306static int sockfs_delete_dentry(struct dentry *dentry)
307{
304e61e6
ED
308 /*
309 * At creation time, we pretended this dentry was hashed
310 * (by clearing DCACHE_UNHASHED bit in d_flags)
311 * At delete time, we restore the truth : not hashed.
312 * (so that dput() can proceed correctly)
313 */
314 dentry->d_flags |= DCACHE_UNHASHED;
315 return 0;
1da177e4
LT
316}
317static struct dentry_operations sockfs_dentry_operations = {
89bddce5 318 .d_delete = sockfs_delete_dentry,
1da177e4
LT
319};
320
321/*
322 * Obtains the first available file descriptor and sets it up for use.
323 *
39d8c1b6
DM
324 * These functions create file structures and maps them to fd space
325 * of the current process. On success it returns file descriptor
1da177e4
LT
326 * and file struct implicitly stored in sock->file.
327 * Note that another thread may close file descriptor before we return
328 * from this function. We use the fact that now we do not refer
329 * to socket after mapping. If one day we will need it, this
330 * function will increment ref. count on file by 1.
331 *
332 * In any case returned fd MAY BE not valid!
333 * This race condition is unavoidable
334 * with shared fd spaces, we cannot solve it inside kernel,
335 * but we take care of internal coherence yet.
336 */
337
39d8c1b6 338static int sock_alloc_fd(struct file **filep)
1da177e4
LT
339{
340 int fd;
1da177e4
LT
341
342 fd = get_unused_fd();
39d8c1b6 343 if (likely(fd >= 0)) {
1da177e4
LT
344 struct file *file = get_empty_filp();
345
39d8c1b6
DM
346 *filep = file;
347 if (unlikely(!file)) {
1da177e4 348 put_unused_fd(fd);
39d8c1b6 349 return -ENFILE;
1da177e4 350 }
39d8c1b6
DM
351 } else
352 *filep = NULL;
353 return fd;
354}
1da177e4 355
39d8c1b6
DM
356static int sock_attach_fd(struct socket *sock, struct file *file)
357{
358 struct qstr this;
359 char name[32];
360
361 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
362 this.name = name;
304e61e6 363 this.hash = 0;
39d8c1b6 364
3126a42c
JS
365 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
366 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
367 return -ENOMEM;
368
3126a42c 369 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
370 /*
371 * We dont want to push this dentry into global dentry hash table.
372 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
373 * This permits a working /proc/$pid/fd/XXX on sockets
374 */
3126a42c
JS
375 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
376 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
377 file->f_path.mnt = mntget(sock_mnt);
378 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
379
380 sock->file = file;
381 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
382 file->f_mode = FMODE_READ | FMODE_WRITE;
383 file->f_flags = O_RDWR;
384 file->f_pos = 0;
385 file->private_data = sock;
1da177e4 386
39d8c1b6
DM
387 return 0;
388}
389
390int sock_map_fd(struct socket *sock)
391{
392 struct file *newfile;
393 int fd = sock_alloc_fd(&newfile);
394
395 if (likely(fd >= 0)) {
396 int err = sock_attach_fd(sock, newfile);
397
398 if (unlikely(err < 0)) {
399 put_filp(newfile);
1da177e4 400 put_unused_fd(fd);
39d8c1b6 401 return err;
1da177e4 402 }
39d8c1b6 403 fd_install(fd, newfile);
1da177e4 404 }
1da177e4
LT
405 return fd;
406}
407
6cb153ca
BL
408static struct socket *sock_from_file(struct file *file, int *err)
409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca
BL
415}
416
1da177e4
LT
417/**
418 * sockfd_lookup - Go from a file number to its socket slot
419 * @fd: file handle
420 * @err: pointer to an error code return
421 *
422 * The file handle passed in is locked and the socket it is bound
423 * too is returned. If an error occurs the err pointer is overwritten
424 * with a negative errno code and NULL is returned. The function checks
425 * for both invalid handles and passing a handle which is not a socket.
426 *
427 * On a success the socket object pointer is returned.
428 */
429
430struct socket *sockfd_lookup(int fd, int *err)
431{
432 struct file *file;
1da177e4
LT
433 struct socket *sock;
434
89bddce5
SH
435 file = fget(fd);
436 if (!file) {
1da177e4
LT
437 *err = -EBADF;
438 return NULL;
439 }
89bddce5 440
6cb153ca
BL
441 sock = sock_from_file(file, err);
442 if (!sock)
1da177e4 443 fput(file);
6cb153ca
BL
444 return sock;
445}
1da177e4 446
6cb153ca
BL
447static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
448{
449 struct file *file;
450 struct socket *sock;
451
3672558c 452 *err = -EBADF;
6cb153ca
BL
453 file = fget_light(fd, fput_needed);
454 if (file) {
455 sock = sock_from_file(file, err);
456 if (sock)
457 return sock;
458 fput_light(file, *fput_needed);
1da177e4 459 }
6cb153ca 460 return NULL;
1da177e4
LT
461}
462
463/**
464 * sock_alloc - allocate a socket
89bddce5 465 *
1da177e4
LT
466 * Allocate a new inode and socket object. The two are bound together
467 * and initialised. The socket is then returned. If we are out of inodes
468 * NULL is returned.
469 */
470
471static struct socket *sock_alloc(void)
472{
89bddce5
SH
473 struct inode *inode;
474 struct socket *sock;
1da177e4
LT
475
476 inode = new_inode(sock_mnt->mnt_sb);
477 if (!inode)
478 return NULL;
479
480 sock = SOCKET_I(inode);
481
89bddce5 482 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
483 inode->i_uid = current->fsuid;
484 inode->i_gid = current->fsgid;
485
486 get_cpu_var(sockets_in_use)++;
487 put_cpu_var(sockets_in_use);
488 return sock;
489}
490
491/*
492 * In theory you can't get an open on this inode, but /proc provides
493 * a back door. Remember to keep it shut otherwise you'll let the
494 * creepy crawlies in.
495 */
89bddce5 496
1da177e4
LT
497static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
498{
499 return -ENXIO;
500}
501
4b6f5d20 502const struct file_operations bad_sock_fops = {
1da177e4
LT
503 .owner = THIS_MODULE,
504 .open = sock_no_open,
505};
506
507/**
508 * sock_release - close a socket
509 * @sock: socket to close
510 *
511 * The socket is released from the protocol stack if it has a release
512 * callback, and the inode is then released if the socket is bound to
89bddce5 513 * an inode not a file.
1da177e4 514 */
89bddce5 515
1da177e4
LT
516void sock_release(struct socket *sock)
517{
518 if (sock->ops) {
519 struct module *owner = sock->ops->owner;
520
521 sock->ops->release(sock);
522 sock->ops = NULL;
523 module_put(owner);
524 }
525
526 if (sock->fasync_list)
527 printk(KERN_ERR "sock_release: fasync list not empty!\n");
528
529 get_cpu_var(sockets_in_use)--;
530 put_cpu_var(sockets_in_use);
531 if (!sock->file) {
532 iput(SOCK_INODE(sock));
533 return;
534 }
89bddce5 535 sock->file = NULL;
1da177e4
LT
536}
537
89bddce5 538static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
539 struct msghdr *msg, size_t size)
540{
541 struct sock_iocb *si = kiocb_to_siocb(iocb);
542 int err;
543
544 si->sock = sock;
545 si->scm = NULL;
546 si->msg = msg;
547 si->size = size;
548
549 err = security_socket_sendmsg(sock, msg, size);
550 if (err)
551 return err;
552
553 return sock->ops->sendmsg(iocb, sock, msg, size);
554}
555
556int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
557{
558 struct kiocb iocb;
559 struct sock_iocb siocb;
560 int ret;
561
562 init_sync_kiocb(&iocb, NULL);
563 iocb.private = &siocb;
564 ret = __sock_sendmsg(&iocb, sock, msg, size);
565 if (-EIOCBQUEUED == ret)
566 ret = wait_on_sync_kiocb(&iocb);
567 return ret;
568}
569
570int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
571 struct kvec *vec, size_t num, size_t size)
572{
573 mm_segment_t oldfs = get_fs();
574 int result;
575
576 set_fs(KERNEL_DS);
577 /*
578 * the following is safe, since for compiler definitions of kvec and
579 * iovec are identical, yielding the same in-core layout and alignment
580 */
89bddce5 581 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
582 msg->msg_iovlen = num;
583 result = sock_sendmsg(sock, msg, size);
584 set_fs(oldfs);
585 return result;
586}
587
89bddce5 588static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
589 struct msghdr *msg, size_t size, int flags)
590{
591 int err;
592 struct sock_iocb *si = kiocb_to_siocb(iocb);
593
594 si->sock = sock;
595 si->scm = NULL;
596 si->msg = msg;
597 si->size = size;
598 si->flags = flags;
599
600 err = security_socket_recvmsg(sock, msg, size, flags);
601 if (err)
602 return err;
603
604 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
605}
606
89bddce5 607int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
608 size_t size, int flags)
609{
610 struct kiocb iocb;
611 struct sock_iocb siocb;
612 int ret;
613
89bddce5 614 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
615 iocb.private = &siocb;
616 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
617 if (-EIOCBQUEUED == ret)
618 ret = wait_on_sync_kiocb(&iocb);
619 return ret;
620}
621
89bddce5
SH
622int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
623 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
624{
625 mm_segment_t oldfs = get_fs();
626 int result;
627
628 set_fs(KERNEL_DS);
629 /*
630 * the following is safe, since for compiler definitions of kvec and
631 * iovec are identical, yielding the same in-core layout and alignment
632 */
89bddce5 633 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
634 result = sock_recvmsg(sock, msg, size, flags);
635 set_fs(oldfs);
636 return result;
637}
638
639static void sock_aio_dtor(struct kiocb *iocb)
640{
641 kfree(iocb->private);
642}
643
ce1d4d3e
CH
644static ssize_t sock_sendpage(struct file *file, struct page *page,
645 int offset, size_t size, loff_t *ppos, int more)
1da177e4 646{
1da177e4
LT
647 struct socket *sock;
648 int flags;
649
ce1d4d3e
CH
650 sock = file->private_data;
651
652 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
653 if (more)
654 flags |= MSG_MORE;
655
656 return sock->ops->sendpage(sock, page, offset, size, flags);
657}
1da177e4 658
ce1d4d3e 659static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 660 struct sock_iocb *siocb)
ce1d4d3e
CH
661{
662 if (!is_sync_kiocb(iocb)) {
663 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
664 if (!siocb)
665 return NULL;
1da177e4
LT
666 iocb->ki_dtor = sock_aio_dtor;
667 }
1da177e4 668
ce1d4d3e 669 siocb->kiocb = iocb;
ce1d4d3e
CH
670 iocb->private = siocb;
671 return siocb;
1da177e4
LT
672}
673
ce1d4d3e 674static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
675 struct file *file, const struct iovec *iov,
676 unsigned long nr_segs)
ce1d4d3e
CH
677{
678 struct socket *sock = file->private_data;
679 size_t size = 0;
680 int i;
1da177e4 681
89bddce5
SH
682 for (i = 0; i < nr_segs; i++)
683 size += iov[i].iov_len;
1da177e4 684
ce1d4d3e
CH
685 msg->msg_name = NULL;
686 msg->msg_namelen = 0;
687 msg->msg_control = NULL;
688 msg->msg_controllen = 0;
89bddce5 689 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
690 msg->msg_iovlen = nr_segs;
691 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
692
693 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
694}
695
027445c3
BP
696static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
697 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
698{
699 struct sock_iocb siocb, *x;
700
1da177e4
LT
701 if (pos != 0)
702 return -ESPIPE;
027445c3
BP
703
704 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
705 return 0;
706
027445c3
BP
707
708 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
709 if (!x)
710 return -ENOMEM;
027445c3 711 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
712}
713
ce1d4d3e 714static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
715 struct file *file, const struct iovec *iov,
716 unsigned long nr_segs)
1da177e4 717{
ce1d4d3e
CH
718 struct socket *sock = file->private_data;
719 size_t size = 0;
720 int i;
1da177e4 721
89bddce5
SH
722 for (i = 0; i < nr_segs; i++)
723 size += iov[i].iov_len;
1da177e4 724
ce1d4d3e
CH
725 msg->msg_name = NULL;
726 msg->msg_namelen = 0;
727 msg->msg_control = NULL;
728 msg->msg_controllen = 0;
89bddce5 729 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
730 msg->msg_iovlen = nr_segs;
731 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
732 if (sock->type == SOCK_SEQPACKET)
733 msg->msg_flags |= MSG_EOR;
1da177e4 734
ce1d4d3e 735 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
736}
737
027445c3
BP
738static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
739 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
740{
741 struct sock_iocb siocb, *x;
1da177e4 742
ce1d4d3e
CH
743 if (pos != 0)
744 return -ESPIPE;
027445c3
BP
745
746 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 747 return 0;
1da177e4 748
027445c3 749 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
750 if (!x)
751 return -ENOMEM;
1da177e4 752
027445c3 753 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
754}
755
1da177e4
LT
756/*
757 * Atomic setting of ioctl hooks to avoid race
758 * with module unload.
759 */
760
4a3e2f71 761static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 762static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 763
89bddce5 764void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 765{
4a3e2f71 766 mutex_lock(&br_ioctl_mutex);
1da177e4 767 br_ioctl_hook = hook;
4a3e2f71 768 mutex_unlock(&br_ioctl_mutex);
1da177e4 769}
89bddce5 770
1da177e4
LT
771EXPORT_SYMBOL(brioctl_set);
772
4a3e2f71 773static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 774static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 775
89bddce5 776void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 777{
4a3e2f71 778 mutex_lock(&vlan_ioctl_mutex);
1da177e4 779 vlan_ioctl_hook = hook;
4a3e2f71 780 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 781}
89bddce5 782
1da177e4
LT
783EXPORT_SYMBOL(vlan_ioctl_set);
784
4a3e2f71 785static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 786static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 787
89bddce5 788void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 789{
4a3e2f71 790 mutex_lock(&dlci_ioctl_mutex);
1da177e4 791 dlci_ioctl_hook = hook;
4a3e2f71 792 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 793}
89bddce5 794
1da177e4
LT
795EXPORT_SYMBOL(dlci_ioctl_set);
796
797/*
798 * With an ioctl, arg may well be a user mode pointer, but we don't know
799 * what to do with it - that's up to the protocol still.
800 */
801
802static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
803{
804 struct socket *sock;
805 void __user *argp = (void __user *)arg;
806 int pid, err;
807
b69aee04 808 sock = file->private_data;
1da177e4
LT
809 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
810 err = dev_ioctl(cmd, argp);
811 } else
d86b5e0e 812#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
813 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
814 err = dev_ioctl(cmd, argp);
815 } else
89bddce5
SH
816#endif /* CONFIG_WIRELESS_EXT */
817 switch (cmd) {
1da177e4
LT
818 case FIOSETOWN:
819 case SIOCSPGRP:
820 err = -EFAULT;
821 if (get_user(pid, (int __user *)argp))
822 break;
823 err = f_setown(sock->file, pid, 1);
824 break;
825 case FIOGETOWN:
826 case SIOCGPGRP:
609d7fa9 827 err = put_user(f_getown(sock->file),
89bddce5 828 (int __user *)argp);
1da177e4
LT
829 break;
830 case SIOCGIFBR:
831 case SIOCSIFBR:
832 case SIOCBRADDBR:
833 case SIOCBRDELBR:
834 err = -ENOPKG;
835 if (!br_ioctl_hook)
836 request_module("bridge");
837
4a3e2f71 838 mutex_lock(&br_ioctl_mutex);
89bddce5 839 if (br_ioctl_hook)
1da177e4 840 err = br_ioctl_hook(cmd, argp);
4a3e2f71 841 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
842 break;
843 case SIOCGIFVLAN:
844 case SIOCSIFVLAN:
845 err = -ENOPKG;
846 if (!vlan_ioctl_hook)
847 request_module("8021q");
848
4a3e2f71 849 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
850 if (vlan_ioctl_hook)
851 err = vlan_ioctl_hook(argp);
4a3e2f71 852 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 853 break;
1da177e4
LT
854 case SIOCADDDLCI:
855 case SIOCDELDLCI:
856 err = -ENOPKG;
857 if (!dlci_ioctl_hook)
858 request_module("dlci");
859
860 if (dlci_ioctl_hook) {
4a3e2f71 861 mutex_lock(&dlci_ioctl_mutex);
1da177e4 862 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 863 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
864 }
865 break;
866 default:
867 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
868
869 /*
870 * If this ioctl is unknown try to hand it down
871 * to the NIC driver.
872 */
873 if (err == -ENOIOCTLCMD)
874 err = dev_ioctl(cmd, argp);
1da177e4 875 break;
89bddce5 876 }
1da177e4
LT
877 return err;
878}
879
880int sock_create_lite(int family, int type, int protocol, struct socket **res)
881{
882 int err;
883 struct socket *sock = NULL;
89bddce5 884
1da177e4
LT
885 err = security_socket_create(family, type, protocol, 1);
886 if (err)
887 goto out;
888
889 sock = sock_alloc();
890 if (!sock) {
891 err = -ENOMEM;
892 goto out;
893 }
894
1da177e4 895 sock->type = type;
7420ed23
VY
896 err = security_socket_post_create(sock, family, type, protocol, 1);
897 if (err)
898 goto out_release;
899
1da177e4
LT
900out:
901 *res = sock;
902 return err;
7420ed23
VY
903out_release:
904 sock_release(sock);
905 sock = NULL;
906 goto out;
1da177e4
LT
907}
908
909/* No kernel lock held - perfect */
89bddce5 910static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
911{
912 struct socket *sock;
913
914 /*
89bddce5 915 * We can't return errors to poll, so it's either yes or no.
1da177e4 916 */
b69aee04 917 sock = file->private_data;
1da177e4
LT
918 return sock->ops->poll(file, sock, wait);
919}
920
89bddce5 921static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 922{
b69aee04 923 struct socket *sock = file->private_data;
1da177e4
LT
924
925 return sock->ops->mmap(file, sock, vma);
926}
927
20380731 928static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
929{
930 /*
89bddce5
SH
931 * It was possible the inode is NULL we were
932 * closing an unfinished socket.
1da177e4
LT
933 */
934
89bddce5 935 if (!inode) {
1da177e4
LT
936 printk(KERN_DEBUG "sock_close: NULL inode\n");
937 return 0;
938 }
939 sock_fasync(-1, filp, 0);
940 sock_release(SOCKET_I(inode));
941 return 0;
942}
943
944/*
945 * Update the socket async list
946 *
947 * Fasync_list locking strategy.
948 *
949 * 1. fasync_list is modified only under process context socket lock
950 * i.e. under semaphore.
951 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
952 * or under socket lock.
953 * 3. fasync_list can be used from softirq context, so that
954 * modification under socket lock have to be enhanced with
955 * write_lock_bh(&sk->sk_callback_lock).
956 * --ANK (990710)
957 */
958
959static int sock_fasync(int fd, struct file *filp, int on)
960{
89bddce5 961 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
962 struct socket *sock;
963 struct sock *sk;
964
89bddce5 965 if (on) {
8b3a7005 966 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 967 if (fna == NULL)
1da177e4
LT
968 return -ENOMEM;
969 }
970
b69aee04 971 sock = filp->private_data;
1da177e4 972
89bddce5
SH
973 sk = sock->sk;
974 if (sk == NULL) {
1da177e4
LT
975 kfree(fna);
976 return -EINVAL;
977 }
978
979 lock_sock(sk);
980
89bddce5 981 prev = &(sock->fasync_list);
1da177e4 982
89bddce5
SH
983 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
984 if (fa->fa_file == filp)
1da177e4
LT
985 break;
986
89bddce5
SH
987 if (on) {
988 if (fa != NULL) {
1da177e4 989 write_lock_bh(&sk->sk_callback_lock);
89bddce5 990 fa->fa_fd = fd;
1da177e4
LT
991 write_unlock_bh(&sk->sk_callback_lock);
992
993 kfree(fna);
994 goto out;
995 }
89bddce5
SH
996 fna->fa_file = filp;
997 fna->fa_fd = fd;
998 fna->magic = FASYNC_MAGIC;
999 fna->fa_next = sock->fasync_list;
1da177e4 1000 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1001 sock->fasync_list = fna;
1da177e4 1002 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1003 } else {
1004 if (fa != NULL) {
1da177e4 1005 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1006 *prev = fa->fa_next;
1da177e4
LT
1007 write_unlock_bh(&sk->sk_callback_lock);
1008 kfree(fa);
1009 }
1010 }
1011
1012out:
1013 release_sock(sock->sk);
1014 return 0;
1015}
1016
1017/* This function may be called only under socket lock or callback_lock */
1018
1019int sock_wake_async(struct socket *sock, int how, int band)
1020{
1021 if (!sock || !sock->fasync_list)
1022 return -1;
89bddce5 1023 switch (how) {
1da177e4 1024 case 1:
89bddce5 1025
1da177e4
LT
1026 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1027 break;
1028 goto call_kill;
1029 case 2:
1030 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1031 break;
1032 /* fall through */
1033 case 0:
89bddce5 1034call_kill:
1da177e4
LT
1035 __kill_fasync(sock->fasync_list, SIGIO, band);
1036 break;
1037 case 3:
1038 __kill_fasync(sock->fasync_list, SIGURG, band);
1039 }
1040 return 0;
1041}
1042
89bddce5
SH
1043static int __sock_create(int family, int type, int protocol,
1044 struct socket **res, int kern)
1da177e4
LT
1045{
1046 int err;
1047 struct socket *sock;
55737fda 1048 const struct net_proto_family *pf;
1da177e4
LT
1049
1050 /*
89bddce5 1051 * Check protocol is in range
1da177e4
LT
1052 */
1053 if (family < 0 || family >= NPROTO)
1054 return -EAFNOSUPPORT;
1055 if (type < 0 || type >= SOCK_MAX)
1056 return -EINVAL;
1057
1058 /* Compatibility.
1059
1060 This uglymoron is moved from INET layer to here to avoid
1061 deadlock in module load.
1062 */
1063 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1064 static int warned;
1da177e4
LT
1065 if (!warned) {
1066 warned = 1;
89bddce5
SH
1067 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1068 current->comm);
1da177e4
LT
1069 }
1070 family = PF_PACKET;
1071 }
1072
1073 err = security_socket_create(family, type, protocol, kern);
1074 if (err)
1075 return err;
89bddce5 1076
55737fda
SH
1077 /*
1078 * Allocate the socket and allow the family to set things up. if
1079 * the protocol is 0, the family is instructed to select an appropriate
1080 * default.
1081 */
1082 sock = sock_alloc();
1083 if (!sock) {
1084 if (net_ratelimit())
1085 printk(KERN_WARNING "socket: no more sockets\n");
1086 return -ENFILE; /* Not exactly a match, but its the
1087 closest posix thing */
1088 }
1089
1090 sock->type = type;
1091
1da177e4 1092#if defined(CONFIG_KMOD)
89bddce5
SH
1093 /* Attempt to load a protocol module if the find failed.
1094 *
1095 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1096 * requested real, full-featured networking support upon configuration.
1097 * Otherwise module support will break!
1098 */
55737fda 1099 if (net_families[family] == NULL)
89bddce5 1100 request_module("net-pf-%d", family);
1da177e4
LT
1101#endif
1102
55737fda
SH
1103 rcu_read_lock();
1104 pf = rcu_dereference(net_families[family]);
1105 err = -EAFNOSUPPORT;
1106 if (!pf)
1107 goto out_release;
1da177e4
LT
1108
1109 /*
1110 * We will call the ->create function, that possibly is in a loadable
1111 * module, so we have to bump that loadable module refcnt first.
1112 */
55737fda 1113 if (!try_module_get(pf->owner))
1da177e4
LT
1114 goto out_release;
1115
55737fda
SH
1116 /* Now protected by module ref count */
1117 rcu_read_unlock();
1118
1119 err = pf->create(sock, protocol);
1120 if (err < 0)
1da177e4 1121 goto out_module_put;
a79af59e 1122
1da177e4
LT
1123 /*
1124 * Now to bump the refcnt of the [loadable] module that owns this
1125 * socket at sock_release time we decrement its refcnt.
1126 */
55737fda
SH
1127 if (!try_module_get(sock->ops->owner))
1128 goto out_module_busy;
1129
1da177e4
LT
1130 /*
1131 * Now that we're done with the ->create function, the [loadable]
1132 * module can have its refcnt decremented
1133 */
55737fda 1134 module_put(pf->owner);
7420ed23
VY
1135 err = security_socket_post_create(sock, family, type, protocol, kern);
1136 if (err)
1137 goto out_release;
55737fda 1138 *res = sock;
1da177e4 1139
55737fda
SH
1140 return 0;
1141
1142out_module_busy:
1143 err = -EAFNOSUPPORT;
1da177e4 1144out_module_put:
55737fda
SH
1145 sock->ops = NULL;
1146 module_put(pf->owner);
1147out_sock_release:
1da177e4 1148 sock_release(sock);
55737fda
SH
1149 return err;
1150
1151out_release:
1152 rcu_read_unlock();
1153 goto out_sock_release;
1da177e4
LT
1154}
1155
1156int sock_create(int family, int type, int protocol, struct socket **res)
1157{
1158 return __sock_create(family, type, protocol, res, 0);
1159}
1160
1161int sock_create_kern(int family, int type, int protocol, struct socket **res)
1162{
1163 return __sock_create(family, type, protocol, res, 1);
1164}
1165
1166asmlinkage long sys_socket(int family, int type, int protocol)
1167{
1168 int retval;
1169 struct socket *sock;
1170
1171 retval = sock_create(family, type, protocol, &sock);
1172 if (retval < 0)
1173 goto out;
1174
1175 retval = sock_map_fd(sock);
1176 if (retval < 0)
1177 goto out_release;
1178
1179out:
1180 /* It may be already another descriptor 8) Not kernel problem. */
1181 return retval;
1182
1183out_release:
1184 sock_release(sock);
1185 return retval;
1186}
1187
1188/*
1189 * Create a pair of connected sockets.
1190 */
1191
89bddce5
SH
1192asmlinkage long sys_socketpair(int family, int type, int protocol,
1193 int __user *usockvec)
1da177e4
LT
1194{
1195 struct socket *sock1, *sock2;
1196 int fd1, fd2, err;
1197
1198 /*
1199 * Obtain the first socket and check if the underlying protocol
1200 * supports the socketpair call.
1201 */
1202
1203 err = sock_create(family, type, protocol, &sock1);
1204 if (err < 0)
1205 goto out;
1206
1207 err = sock_create(family, type, protocol, &sock2);
1208 if (err < 0)
1209 goto out_release_1;
1210
1211 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1212 if (err < 0)
1da177e4
LT
1213 goto out_release_both;
1214
1215 fd1 = fd2 = -1;
1216
1217 err = sock_map_fd(sock1);
1218 if (err < 0)
1219 goto out_release_both;
1220 fd1 = err;
1221
1222 err = sock_map_fd(sock2);
1223 if (err < 0)
1224 goto out_close_1;
1225 fd2 = err;
1226
1227 /* fd1 and fd2 may be already another descriptors.
1228 * Not kernel problem.
1229 */
1230
89bddce5 1231 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1232 if (!err)
1233 err = put_user(fd2, &usockvec[1]);
1234 if (!err)
1235 return 0;
1236
1237 sys_close(fd2);
1238 sys_close(fd1);
1239 return err;
1240
1241out_close_1:
89bddce5 1242 sock_release(sock2);
1da177e4
LT
1243 sys_close(fd1);
1244 return err;
1245
1246out_release_both:
89bddce5 1247 sock_release(sock2);
1da177e4 1248out_release_1:
89bddce5 1249 sock_release(sock1);
1da177e4
LT
1250out:
1251 return err;
1252}
1253
1da177e4
LT
1254/*
1255 * Bind a name to a socket. Nothing much to do here since it's
1256 * the protocol's responsibility to handle the local address.
1257 *
1258 * We move the socket address to kernel space before we call
1259 * the protocol layer (having also checked the address is ok).
1260 */
1261
1262asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1263{
1264 struct socket *sock;
1265 char address[MAX_SOCK_ADDR];
6cb153ca 1266 int err, fput_needed;
1da177e4 1267
89bddce5
SH
1268 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1269 if(sock) {
1270 err = move_addr_to_kernel(umyaddr, addrlen, address);
1271 if (err >= 0) {
1272 err = security_socket_bind(sock,
1273 (struct sockaddr *)address,
1274 addrlen);
6cb153ca
BL
1275 if (!err)
1276 err = sock->ops->bind(sock,
89bddce5
SH
1277 (struct sockaddr *)
1278 address, addrlen);
1da177e4 1279 }
6cb153ca 1280 fput_light(sock->file, fput_needed);
89bddce5 1281 }
1da177e4
LT
1282 return err;
1283}
1284
1da177e4
LT
1285/*
1286 * Perform a listen. Basically, we allow the protocol to do anything
1287 * necessary for a listen, and if that works, we mark the socket as
1288 * ready for listening.
1289 */
1290
7a42c217 1291int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1292
1293asmlinkage long sys_listen(int fd, int backlog)
1294{
1295 struct socket *sock;
6cb153ca 1296 int err, fput_needed;
89bddce5
SH
1297
1298 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1299 if (sock) {
1300 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1301 backlog = sysctl_somaxconn;
1302
1303 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1304 if (!err)
1305 err = sock->ops->listen(sock, backlog);
1da177e4 1306
6cb153ca 1307 fput_light(sock->file, fput_needed);
1da177e4
LT
1308 }
1309 return err;
1310}
1311
1da177e4
LT
1312/*
1313 * For accept, we attempt to create a new socket, set up the link
1314 * with the client, wake up the client, then return the new
1315 * connected fd. We collect the address of the connector in kernel
1316 * space and move it to user at the very end. This is unclean because
1317 * we open the socket then return an error.
1318 *
1319 * 1003.1g adds the ability to recvmsg() to query connection pending
1320 * status to recvmsg. We need to add that support in a way thats
1321 * clean when we restucture accept also.
1322 */
1323
89bddce5
SH
1324asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1325 int __user *upeer_addrlen)
1da177e4
LT
1326{
1327 struct socket *sock, *newsock;
39d8c1b6 1328 struct file *newfile;
6cb153ca 1329 int err, len, newfd, fput_needed;
1da177e4
LT
1330 char address[MAX_SOCK_ADDR];
1331
6cb153ca 1332 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1333 if (!sock)
1334 goto out;
1335
1336 err = -ENFILE;
89bddce5 1337 if (!(newsock = sock_alloc()))
1da177e4
LT
1338 goto out_put;
1339
1340 newsock->type = sock->type;
1341 newsock->ops = sock->ops;
1342
1da177e4
LT
1343 /*
1344 * We don't need try_module_get here, as the listening socket (sock)
1345 * has the protocol module (sock->ops->owner) held.
1346 */
1347 __module_get(newsock->ops->owner);
1348
39d8c1b6
DM
1349 newfd = sock_alloc_fd(&newfile);
1350 if (unlikely(newfd < 0)) {
1351 err = newfd;
9a1875e6
DM
1352 sock_release(newsock);
1353 goto out_put;
39d8c1b6
DM
1354 }
1355
1356 err = sock_attach_fd(newsock, newfile);
1357 if (err < 0)
1358 goto out_fd;
1359
a79af59e
FF
1360 err = security_socket_accept(sock, newsock);
1361 if (err)
39d8c1b6 1362 goto out_fd;
a79af59e 1363
1da177e4
LT
1364 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1365 if (err < 0)
39d8c1b6 1366 goto out_fd;
1da177e4
LT
1367
1368 if (upeer_sockaddr) {
89bddce5
SH
1369 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1370 &len, 2) < 0) {
1da177e4 1371 err = -ECONNABORTED;
39d8c1b6 1372 goto out_fd;
1da177e4 1373 }
89bddce5
SH
1374 err = move_addr_to_user(address, len, upeer_sockaddr,
1375 upeer_addrlen);
1da177e4 1376 if (err < 0)
39d8c1b6 1377 goto out_fd;
1da177e4
LT
1378 }
1379
1380 /* File flags are not inherited via accept() unlike another OSes. */
1381
39d8c1b6
DM
1382 fd_install(newfd, newfile);
1383 err = newfd;
1da177e4
LT
1384
1385 security_socket_post_accept(sock, newsock);
1386
1387out_put:
6cb153ca 1388 fput_light(sock->file, fput_needed);
1da177e4
LT
1389out:
1390 return err;
39d8c1b6 1391out_fd:
9606a216 1392 fput(newfile);
39d8c1b6 1393 put_unused_fd(newfd);
1da177e4
LT
1394 goto out_put;
1395}
1396
1da177e4
LT
1397/*
1398 * Attempt to connect to a socket with the server address. The address
1399 * is in user space so we verify it is OK and move it to kernel space.
1400 *
1401 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1402 * break bindings
1403 *
1404 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1405 * other SEQPACKET protocols that take time to connect() as it doesn't
1406 * include the -EINPROGRESS status for such sockets.
1407 */
1408
89bddce5
SH
1409asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1410 int addrlen)
1da177e4
LT
1411{
1412 struct socket *sock;
1413 char address[MAX_SOCK_ADDR];
6cb153ca 1414 int err, fput_needed;
1da177e4 1415
6cb153ca 1416 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1417 if (!sock)
1418 goto out;
1419 err = move_addr_to_kernel(uservaddr, addrlen, address);
1420 if (err < 0)
1421 goto out_put;
1422
89bddce5
SH
1423 err =
1424 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1425 if (err)
1426 goto out_put;
1427
89bddce5 1428 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1429 sock->file->f_flags);
1430out_put:
6cb153ca 1431 fput_light(sock->file, fput_needed);
1da177e4
LT
1432out:
1433 return err;
1434}
1435
1436/*
1437 * Get the local address ('name') of a socket object. Move the obtained
1438 * name to user space.
1439 */
1440
89bddce5
SH
1441asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1442 int __user *usockaddr_len)
1da177e4
LT
1443{
1444 struct socket *sock;
1445 char address[MAX_SOCK_ADDR];
6cb153ca 1446 int len, err, fput_needed;
89bddce5 1447
6cb153ca 1448 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1449 if (!sock)
1450 goto out;
1451
1452 err = security_socket_getsockname(sock);
1453 if (err)
1454 goto out_put;
1455
1456 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1457 if (err)
1458 goto out_put;
1459 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1460
1461out_put:
6cb153ca 1462 fput_light(sock->file, fput_needed);
1da177e4
LT
1463out:
1464 return err;
1465}
1466
1467/*
1468 * Get the remote address ('name') of a socket object. Move the obtained
1469 * name to user space.
1470 */
1471
89bddce5
SH
1472asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1473 int __user *usockaddr_len)
1da177e4
LT
1474{
1475 struct socket *sock;
1476 char address[MAX_SOCK_ADDR];
6cb153ca 1477 int len, err, fput_needed;
1da177e4 1478
89bddce5
SH
1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1480 if (sock != NULL) {
1da177e4
LT
1481 err = security_socket_getpeername(sock);
1482 if (err) {
6cb153ca 1483 fput_light(sock->file, fput_needed);
1da177e4
LT
1484 return err;
1485 }
1486
89bddce5
SH
1487 err =
1488 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1489 1);
1da177e4 1490 if (!err)
89bddce5
SH
1491 err = move_addr_to_user(address, len, usockaddr,
1492 usockaddr_len);
6cb153ca 1493 fput_light(sock->file, fput_needed);
1da177e4
LT
1494 }
1495 return err;
1496}
1497
1498/*
1499 * Send a datagram to a given address. We move the address into kernel
1500 * space and check the user space data area is readable before invoking
1501 * the protocol.
1502 */
1503
89bddce5
SH
1504asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1505 unsigned flags, struct sockaddr __user *addr,
1506 int addr_len)
1da177e4
LT
1507{
1508 struct socket *sock;
1509 char address[MAX_SOCK_ADDR];
1510 int err;
1511 struct msghdr msg;
1512 struct iovec iov;
6cb153ca
BL
1513 int fput_needed;
1514 struct file *sock_file;
1515
1516 sock_file = fget_light(fd, &fput_needed);
4387ff75 1517 err = -EBADF;
6cb153ca 1518 if (!sock_file)
4387ff75 1519 goto out;
6cb153ca
BL
1520
1521 sock = sock_from_file(sock_file, &err);
1da177e4 1522 if (!sock)
6cb153ca 1523 goto out_put;
89bddce5
SH
1524 iov.iov_base = buff;
1525 iov.iov_len = len;
1526 msg.msg_name = NULL;
1527 msg.msg_iov = &iov;
1528 msg.msg_iovlen = 1;
1529 msg.msg_control = NULL;
1530 msg.msg_controllen = 0;
1531 msg.msg_namelen = 0;
6cb153ca 1532 if (addr) {
1da177e4
LT
1533 err = move_addr_to_kernel(addr, addr_len, address);
1534 if (err < 0)
1535 goto out_put;
89bddce5
SH
1536 msg.msg_name = address;
1537 msg.msg_namelen = addr_len;
1da177e4
LT
1538 }
1539 if (sock->file->f_flags & O_NONBLOCK)
1540 flags |= MSG_DONTWAIT;
1541 msg.msg_flags = flags;
1542 err = sock_sendmsg(sock, &msg, len);
1543
89bddce5 1544out_put:
6cb153ca 1545 fput_light(sock_file, fput_needed);
4387ff75 1546out:
1da177e4
LT
1547 return err;
1548}
1549
1550/*
89bddce5 1551 * Send a datagram down a socket.
1da177e4
LT
1552 */
1553
89bddce5 1554asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1555{
1556 return sys_sendto(fd, buff, len, flags, NULL, 0);
1557}
1558
1559/*
89bddce5 1560 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1561 * sender. We verify the buffers are writable and if needed move the
1562 * sender address from kernel to user space.
1563 */
1564
89bddce5
SH
1565asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1566 unsigned flags, struct sockaddr __user *addr,
1567 int __user *addr_len)
1da177e4
LT
1568{
1569 struct socket *sock;
1570 struct iovec iov;
1571 struct msghdr msg;
1572 char address[MAX_SOCK_ADDR];
89bddce5 1573 int err, err2;
6cb153ca
BL
1574 struct file *sock_file;
1575 int fput_needed;
1576
1577 sock_file = fget_light(fd, &fput_needed);
4387ff75 1578 err = -EBADF;
6cb153ca 1579 if (!sock_file)
4387ff75 1580 goto out;
1da177e4 1581
6cb153ca 1582 sock = sock_from_file(sock_file, &err);
1da177e4 1583 if (!sock)
4387ff75 1584 goto out_put;
1da177e4 1585
89bddce5
SH
1586 msg.msg_control = NULL;
1587 msg.msg_controllen = 0;
1588 msg.msg_iovlen = 1;
1589 msg.msg_iov = &iov;
1590 iov.iov_len = size;
1591 iov.iov_base = ubuf;
1592 msg.msg_name = address;
1593 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1594 if (sock->file->f_flags & O_NONBLOCK)
1595 flags |= MSG_DONTWAIT;
89bddce5 1596 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1597
89bddce5
SH
1598 if (err >= 0 && addr != NULL) {
1599 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1600 if (err2 < 0)
1601 err = err2;
1da177e4 1602 }
4387ff75 1603out_put:
6cb153ca 1604 fput_light(sock_file, fput_needed);
4387ff75 1605out:
1da177e4
LT
1606 return err;
1607}
1608
1609/*
89bddce5 1610 * Receive a datagram from a socket.
1da177e4
LT
1611 */
1612
89bddce5
SH
1613asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1614 unsigned flags)
1da177e4
LT
1615{
1616 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1617}
1618
1619/*
1620 * Set a socket option. Because we don't know the option lengths we have
1621 * to pass the user mode parameter for the protocols to sort out.
1622 */
1623
89bddce5
SH
1624asmlinkage long sys_setsockopt(int fd, int level, int optname,
1625 char __user *optval, int optlen)
1da177e4 1626{
6cb153ca 1627 int err, fput_needed;
1da177e4
LT
1628 struct socket *sock;
1629
1630 if (optlen < 0)
1631 return -EINVAL;
89bddce5
SH
1632
1633 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1634 if (sock != NULL) {
1635 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1636 if (err)
1637 goto out_put;
1da177e4
LT
1638
1639 if (level == SOL_SOCKET)
89bddce5
SH
1640 err =
1641 sock_setsockopt(sock, level, optname, optval,
1642 optlen);
1da177e4 1643 else
89bddce5
SH
1644 err =
1645 sock->ops->setsockopt(sock, level, optname, optval,
1646 optlen);
6cb153ca
BL
1647out_put:
1648 fput_light(sock->file, fput_needed);
1da177e4
LT
1649 }
1650 return err;
1651}
1652
1653/*
1654 * Get a socket option. Because we don't know the option lengths we have
1655 * to pass a user mode parameter for the protocols to sort out.
1656 */
1657
89bddce5
SH
1658asmlinkage long sys_getsockopt(int fd, int level, int optname,
1659 char __user *optval, int __user *optlen)
1da177e4 1660{
6cb153ca 1661 int err, fput_needed;
1da177e4
LT
1662 struct socket *sock;
1663
89bddce5
SH
1664 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1665 if (sock != NULL) {
6cb153ca
BL
1666 err = security_socket_getsockopt(sock, level, optname);
1667 if (err)
1668 goto out_put;
1da177e4
LT
1669
1670 if (level == SOL_SOCKET)
89bddce5
SH
1671 err =
1672 sock_getsockopt(sock, level, optname, optval,
1673 optlen);
1da177e4 1674 else
89bddce5
SH
1675 err =
1676 sock->ops->getsockopt(sock, level, optname, optval,
1677 optlen);
6cb153ca
BL
1678out_put:
1679 fput_light(sock->file, fput_needed);
1da177e4
LT
1680 }
1681 return err;
1682}
1683
1da177e4
LT
1684/*
1685 * Shutdown a socket.
1686 */
1687
1688asmlinkage long sys_shutdown(int fd, int how)
1689{
6cb153ca 1690 int err, fput_needed;
1da177e4
LT
1691 struct socket *sock;
1692
89bddce5
SH
1693 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1694 if (sock != NULL) {
1da177e4 1695 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1696 if (!err)
1697 err = sock->ops->shutdown(sock, how);
1698 fput_light(sock->file, fput_needed);
1da177e4
LT
1699 }
1700 return err;
1701}
1702
89bddce5 1703/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1704 * fields which are the same type (int / unsigned) on our platforms.
1705 */
1706#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1707#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1708#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1709
1da177e4
LT
1710/*
1711 * BSD sendmsg interface
1712 */
1713
1714asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1715{
89bddce5
SH
1716 struct compat_msghdr __user *msg_compat =
1717 (struct compat_msghdr __user *)msg;
1da177e4
LT
1718 struct socket *sock;
1719 char address[MAX_SOCK_ADDR];
1720 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1721 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1722 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1723 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1724 unsigned char *ctl_buf = ctl;
1725 struct msghdr msg_sys;
1726 int err, ctl_len, iov_size, total_len;
6cb153ca 1727 int fput_needed;
89bddce5 1728
1da177e4
LT
1729 err = -EFAULT;
1730 if (MSG_CMSG_COMPAT & flags) {
1731 if (get_compat_msghdr(&msg_sys, msg_compat))
1732 return -EFAULT;
89bddce5
SH
1733 }
1734 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1735 return -EFAULT;
1736
6cb153ca 1737 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1738 if (!sock)
1da177e4
LT
1739 goto out;
1740
1741 /* do not move before msg_sys is valid */
1742 err = -EMSGSIZE;
1743 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1744 goto out_put;
1745
89bddce5 1746 /* Check whether to allocate the iovec area */
1da177e4
LT
1747 err = -ENOMEM;
1748 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1749 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1750 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1751 if (!iov)
1752 goto out_put;
1753 }
1754
1755 /* This will also move the address data into kernel space */
1756 if (MSG_CMSG_COMPAT & flags) {
1757 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1758 } else
1759 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1760 if (err < 0)
1da177e4
LT
1761 goto out_freeiov;
1762 total_len = err;
1763
1764 err = -ENOBUFS;
1765
1766 if (msg_sys.msg_controllen > INT_MAX)
1767 goto out_freeiov;
89bddce5 1768 ctl_len = msg_sys.msg_controllen;
1da177e4 1769 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1770 err =
1771 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1772 sizeof(ctl));
1da177e4
LT
1773 if (err)
1774 goto out_freeiov;
1775 ctl_buf = msg_sys.msg_control;
8920e8f9 1776 ctl_len = msg_sys.msg_controllen;
1da177e4 1777 } else if (ctl_len) {
89bddce5 1778 if (ctl_len > sizeof(ctl)) {
1da177e4 1779 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1780 if (ctl_buf == NULL)
1da177e4
LT
1781 goto out_freeiov;
1782 }
1783 err = -EFAULT;
1784 /*
1785 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1786 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1787 * checking falls down on this.
1788 */
89bddce5
SH
1789 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1790 ctl_len))
1da177e4
LT
1791 goto out_freectl;
1792 msg_sys.msg_control = ctl_buf;
1793 }
1794 msg_sys.msg_flags = flags;
1795
1796 if (sock->file->f_flags & O_NONBLOCK)
1797 msg_sys.msg_flags |= MSG_DONTWAIT;
1798 err = sock_sendmsg(sock, &msg_sys, total_len);
1799
1800out_freectl:
89bddce5 1801 if (ctl_buf != ctl)
1da177e4
LT
1802 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1803out_freeiov:
1804 if (iov != iovstack)
1805 sock_kfree_s(sock->sk, iov, iov_size);
1806out_put:
6cb153ca 1807 fput_light(sock->file, fput_needed);
89bddce5 1808out:
1da177e4
LT
1809 return err;
1810}
1811
1812/*
1813 * BSD recvmsg interface
1814 */
1815
89bddce5
SH
1816asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1817 unsigned int flags)
1da177e4 1818{
89bddce5
SH
1819 struct compat_msghdr __user *msg_compat =
1820 (struct compat_msghdr __user *)msg;
1da177e4
LT
1821 struct socket *sock;
1822 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1823 struct iovec *iov = iovstack;
1da177e4
LT
1824 struct msghdr msg_sys;
1825 unsigned long cmsg_ptr;
1826 int err, iov_size, total_len, len;
6cb153ca 1827 int fput_needed;
1da177e4
LT
1828
1829 /* kernel mode address */
1830 char addr[MAX_SOCK_ADDR];
1831
1832 /* user mode address pointers */
1833 struct sockaddr __user *uaddr;
1834 int __user *uaddr_len;
89bddce5 1835
1da177e4
LT
1836 if (MSG_CMSG_COMPAT & flags) {
1837 if (get_compat_msghdr(&msg_sys, msg_compat))
1838 return -EFAULT;
89bddce5
SH
1839 }
1840 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1841 return -EFAULT;
1da177e4 1842
6cb153ca 1843 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1844 if (!sock)
1845 goto out;
1846
1847 err = -EMSGSIZE;
1848 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1849 goto out_put;
89bddce5
SH
1850
1851 /* Check whether to allocate the iovec area */
1da177e4
LT
1852 err = -ENOMEM;
1853 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1854 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1855 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1856 if (!iov)
1857 goto out_put;
1858 }
1859
1860 /*
89bddce5
SH
1861 * Save the user-mode address (verify_iovec will change the
1862 * kernel msghdr to use the kernel address space)
1da177e4 1863 */
89bddce5
SH
1864
1865 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1866 uaddr_len = COMPAT_NAMELEN(msg);
1867 if (MSG_CMSG_COMPAT & flags) {
1868 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1869 } else
1870 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1871 if (err < 0)
1872 goto out_freeiov;
89bddce5 1873 total_len = err;
1da177e4
LT
1874
1875 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1876 msg_sys.msg_flags = 0;
1877 if (MSG_CMSG_COMPAT & flags)
1878 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1879
1da177e4
LT
1880 if (sock->file->f_flags & O_NONBLOCK)
1881 flags |= MSG_DONTWAIT;
1882 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1883 if (err < 0)
1884 goto out_freeiov;
1885 len = err;
1886
1887 if (uaddr != NULL) {
89bddce5
SH
1888 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1889 uaddr_len);
1da177e4
LT
1890 if (err < 0)
1891 goto out_freeiov;
1892 }
37f7f421
DM
1893 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1894 COMPAT_FLAGS(msg));
1da177e4
LT
1895 if (err)
1896 goto out_freeiov;
1897 if (MSG_CMSG_COMPAT & flags)
89bddce5 1898 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1899 &msg_compat->msg_controllen);
1900 else
89bddce5 1901 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1902 &msg->msg_controllen);
1903 if (err)
1904 goto out_freeiov;
1905 err = len;
1906
1907out_freeiov:
1908 if (iov != iovstack)
1909 sock_kfree_s(sock->sk, iov, iov_size);
1910out_put:
6cb153ca 1911 fput_light(sock->file, fput_needed);
1da177e4
LT
1912out:
1913 return err;
1914}
1915
1916#ifdef __ARCH_WANT_SYS_SOCKETCALL
1917
1918/* Argument list sizes for sys_socketcall */
1919#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1920static const unsigned char nargs[18]={
1921 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1922 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1923 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1924};
1925
1da177e4
LT
1926#undef AL
1927
1928/*
89bddce5 1929 * System call vectors.
1da177e4
LT
1930 *
1931 * Argument checking cleaned up. Saved 20% in size.
1932 * This function doesn't need to set the kernel lock because
89bddce5 1933 * it is set by the callees.
1da177e4
LT
1934 */
1935
1936asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1937{
1938 unsigned long a[6];
89bddce5 1939 unsigned long a0, a1;
1da177e4
LT
1940 int err;
1941
89bddce5 1942 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
1943 return -EINVAL;
1944
1945 /* copy_from_user should be SMP safe. */
1946 if (copy_from_user(a, args, nargs[call]))
1947 return -EFAULT;
3ec3b2fb 1948
89bddce5 1949 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
1950 if (err)
1951 return err;
1952
89bddce5
SH
1953 a0 = a[0];
1954 a1 = a[1];
1955
1956 switch (call) {
1957 case SYS_SOCKET:
1958 err = sys_socket(a0, a1, a[2]);
1959 break;
1960 case SYS_BIND:
1961 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
1962 break;
1963 case SYS_CONNECT:
1964 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
1965 break;
1966 case SYS_LISTEN:
1967 err = sys_listen(a0, a1);
1968 break;
1969 case SYS_ACCEPT:
1970 err =
1971 sys_accept(a0, (struct sockaddr __user *)a1,
1972 (int __user *)a[2]);
1973 break;
1974 case SYS_GETSOCKNAME:
1975 err =
1976 sys_getsockname(a0, (struct sockaddr __user *)a1,
1977 (int __user *)a[2]);
1978 break;
1979 case SYS_GETPEERNAME:
1980 err =
1981 sys_getpeername(a0, (struct sockaddr __user *)a1,
1982 (int __user *)a[2]);
1983 break;
1984 case SYS_SOCKETPAIR:
1985 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
1986 break;
1987 case SYS_SEND:
1988 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
1989 break;
1990 case SYS_SENDTO:
1991 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
1992 (struct sockaddr __user *)a[4], a[5]);
1993 break;
1994 case SYS_RECV:
1995 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
1996 break;
1997 case SYS_RECVFROM:
1998 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
1999 (struct sockaddr __user *)a[4],
2000 (int __user *)a[5]);
2001 break;
2002 case SYS_SHUTDOWN:
2003 err = sys_shutdown(a0, a1);
2004 break;
2005 case SYS_SETSOCKOPT:
2006 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2007 break;
2008 case SYS_GETSOCKOPT:
2009 err =
2010 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2011 (int __user *)a[4]);
2012 break;
2013 case SYS_SENDMSG:
2014 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2015 break;
2016 case SYS_RECVMSG:
2017 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2018 break;
2019 default:
2020 err = -EINVAL;
2021 break;
1da177e4
LT
2022 }
2023 return err;
2024}
2025
89bddce5 2026#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2027
55737fda
SH
2028/**
2029 * sock_register - add a socket protocol handler
2030 * @ops: description of protocol
2031 *
1da177e4
LT
2032 * This function is called by a protocol handler that wants to
2033 * advertise its address family, and have it linked into the
55737fda
SH
2034 * socket interface. The value ops->family coresponds to the
2035 * socket system call protocol family.
1da177e4 2036 */
f0fd27d4 2037int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2038{
2039 int err;
2040
2041 if (ops->family >= NPROTO) {
89bddce5
SH
2042 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2043 NPROTO);
1da177e4
LT
2044 return -ENOBUFS;
2045 }
55737fda
SH
2046
2047 spin_lock(&net_family_lock);
2048 if (net_families[ops->family])
2049 err = -EEXIST;
2050 else {
89bddce5 2051 net_families[ops->family] = ops;
1da177e4
LT
2052 err = 0;
2053 }
55737fda
SH
2054 spin_unlock(&net_family_lock);
2055
89bddce5 2056 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2057 return err;
2058}
2059
55737fda
SH
2060/**
2061 * sock_unregister - remove a protocol handler
2062 * @family: protocol family to remove
2063 *
1da177e4
LT
2064 * This function is called by a protocol handler that wants to
2065 * remove its address family, and have it unlinked from the
55737fda
SH
2066 * new socket creation.
2067 *
2068 * If protocol handler is a module, then it can use module reference
2069 * counts to protect against new references. If protocol handler is not
2070 * a module then it needs to provide its own protection in
2071 * the ops->create routine.
1da177e4 2072 */
f0fd27d4 2073void sock_unregister(int family)
1da177e4 2074{
f0fd27d4 2075 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2076
55737fda 2077 spin_lock(&net_family_lock);
89bddce5 2078 net_families[family] = NULL;
55737fda
SH
2079 spin_unlock(&net_family_lock);
2080
2081 synchronize_rcu();
2082
89bddce5 2083 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2084}
2085
77d76ea3 2086static int __init sock_init(void)
1da177e4
LT
2087{
2088 /*
89bddce5 2089 * Initialize sock SLAB cache.
1da177e4 2090 */
89bddce5 2091
1da177e4
LT
2092 sk_init();
2093
1da177e4 2094 /*
89bddce5 2095 * Initialize skbuff SLAB cache
1da177e4
LT
2096 */
2097 skb_init();
1da177e4
LT
2098
2099 /*
89bddce5 2100 * Initialize the protocols module.
1da177e4
LT
2101 */
2102
2103 init_inodecache();
2104 register_filesystem(&sock_fs_type);
2105 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2106
2107 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2108 */
2109
2110#ifdef CONFIG_NETFILTER
2111 netfilter_init();
2112#endif
cbeb321a
DM
2113
2114 return 0;
1da177e4
LT
2115}
2116
77d76ea3
AK
2117core_initcall(sock_init); /* early initcall */
2118
1da177e4
LT
2119#ifdef CONFIG_PROC_FS
2120void socket_seq_show(struct seq_file *seq)
2121{
2122 int cpu;
2123 int counter = 0;
2124
6f912042 2125 for_each_possible_cpu(cpu)
89bddce5 2126 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2127
2128 /* It can be negative, by the way. 8) */
2129 if (counter < 0)
2130 counter = 0;
2131
2132 seq_printf(seq, "sockets: used %d\n", counter);
2133}
89bddce5 2134#endif /* CONFIG_PROC_FS */
1da177e4 2135
89bbfc95
SP
2136#ifdef CONFIG_COMPAT
2137static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2138 unsigned long arg)
89bbfc95
SP
2139{
2140 struct socket *sock = file->private_data;
2141 int ret = -ENOIOCTLCMD;
2142
2143 if (sock->ops->compat_ioctl)
2144 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2145
2146 return ret;
2147}
2148#endif
2149
ac5a488e
SS
2150int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2151{
2152 return sock->ops->bind(sock, addr, addrlen);
2153}
2154
2155int kernel_listen(struct socket *sock, int backlog)
2156{
2157 return sock->ops->listen(sock, backlog);
2158}
2159
2160int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2161{
2162 struct sock *sk = sock->sk;
2163 int err;
2164
2165 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2166 newsock);
2167 if (err < 0)
2168 goto done;
2169
2170 err = sock->ops->accept(sock, *newsock, flags);
2171 if (err < 0) {
2172 sock_release(*newsock);
2173 goto done;
2174 }
2175
2176 (*newsock)->ops = sock->ops;
2177
2178done:
2179 return err;
2180}
2181
2182int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2183 int flags)
2184{
2185 return sock->ops->connect(sock, addr, addrlen, flags);
2186}
2187
2188int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2189 int *addrlen)
2190{
2191 return sock->ops->getname(sock, addr, addrlen, 0);
2192}
2193
2194int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2195 int *addrlen)
2196{
2197 return sock->ops->getname(sock, addr, addrlen, 1);
2198}
2199
2200int kernel_getsockopt(struct socket *sock, int level, int optname,
2201 char *optval, int *optlen)
2202{
2203 mm_segment_t oldfs = get_fs();
2204 int err;
2205
2206 set_fs(KERNEL_DS);
2207 if (level == SOL_SOCKET)
2208 err = sock_getsockopt(sock, level, optname, optval, optlen);
2209 else
2210 err = sock->ops->getsockopt(sock, level, optname, optval,
2211 optlen);
2212 set_fs(oldfs);
2213 return err;
2214}
2215
2216int kernel_setsockopt(struct socket *sock, int level, int optname,
2217 char *optval, int optlen)
2218{
2219 mm_segment_t oldfs = get_fs();
2220 int err;
2221
2222 set_fs(KERNEL_DS);
2223 if (level == SOL_SOCKET)
2224 err = sock_setsockopt(sock, level, optname, optval, optlen);
2225 else
2226 err = sock->ops->setsockopt(sock, level, optname, optval,
2227 optlen);
2228 set_fs(oldfs);
2229 return err;
2230}
2231
2232int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2233 size_t size, int flags)
2234{
2235 if (sock->ops->sendpage)
2236 return sock->ops->sendpage(sock, page, offset, size, flags);
2237
2238 return sock_no_sendpage(sock, page, offset, size, flags);
2239}
2240
2241int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2242{
2243 mm_segment_t oldfs = get_fs();
2244 int err;
2245
2246 set_fs(KERNEL_DS);
2247 err = sock->ops->ioctl(sock, cmd, arg);
2248 set_fs(oldfs);
2249
2250 return err;
2251}
2252
1da177e4
LT
2253/* ABI emulation layers need these two */
2254EXPORT_SYMBOL(move_addr_to_kernel);
2255EXPORT_SYMBOL(move_addr_to_user);
2256EXPORT_SYMBOL(sock_create);
2257EXPORT_SYMBOL(sock_create_kern);
2258EXPORT_SYMBOL(sock_create_lite);
2259EXPORT_SYMBOL(sock_map_fd);
2260EXPORT_SYMBOL(sock_recvmsg);
2261EXPORT_SYMBOL(sock_register);
2262EXPORT_SYMBOL(sock_release);
2263EXPORT_SYMBOL(sock_sendmsg);
2264EXPORT_SYMBOL(sock_unregister);
2265EXPORT_SYMBOL(sock_wake_async);
2266EXPORT_SYMBOL(sockfd_lookup);
2267EXPORT_SYMBOL(kernel_sendmsg);
2268EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2269EXPORT_SYMBOL(kernel_bind);
2270EXPORT_SYMBOL(kernel_listen);
2271EXPORT_SYMBOL(kernel_accept);
2272EXPORT_SYMBOL(kernel_connect);
2273EXPORT_SYMBOL(kernel_getsockname);
2274EXPORT_SYMBOL(kernel_getpeername);
2275EXPORT_SYMBOL(kernel_getsockopt);
2276EXPORT_SYMBOL(kernel_setsockopt);
2277EXPORT_SYMBOL(kernel_sendpage);
2278EXPORT_SYMBOL(kernel_sock_ioctl);