[UDP]: Use __skb_pull since we have checked it won't fail with pskb_may_pull
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
da7071d7 120static const struct file_operations socket_file_ops = {
1da177e4
LT
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
89bddce5
SH
264 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
265 == SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
266 inode_init_once(&ei->vfs_inode);
267}
89bddce5 268
1da177e4
LT
269static int init_inodecache(void)
270{
271 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
272 sizeof(struct socket_alloc),
273 0,
274 (SLAB_HWCACHE_ALIGN |
275 SLAB_RECLAIM_ACCOUNT |
276 SLAB_MEM_SPREAD),
277 init_once,
278 NULL);
1da177e4
LT
279 if (sock_inode_cachep == NULL)
280 return -ENOMEM;
281 return 0;
282}
283
284static struct super_operations sockfs_ops = {
285 .alloc_inode = sock_alloc_inode,
286 .destroy_inode =sock_destroy_inode,
287 .statfs = simple_statfs,
288};
289
454e2398 290static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
291 int flags, const char *dev_name, void *data,
292 struct vfsmount *mnt)
1da177e4 293{
454e2398
DH
294 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
295 mnt);
1da177e4
LT
296}
297
ba89966c 298static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
299
300static struct file_system_type sock_fs_type = {
301 .name = "sockfs",
302 .get_sb = sockfs_get_sb,
303 .kill_sb = kill_anon_super,
304};
89bddce5 305
1da177e4
LT
306static int sockfs_delete_dentry(struct dentry *dentry)
307{
304e61e6
ED
308 /*
309 * At creation time, we pretended this dentry was hashed
310 * (by clearing DCACHE_UNHASHED bit in d_flags)
311 * At delete time, we restore the truth : not hashed.
312 * (so that dput() can proceed correctly)
313 */
314 dentry->d_flags |= DCACHE_UNHASHED;
315 return 0;
1da177e4
LT
316}
317static struct dentry_operations sockfs_dentry_operations = {
89bddce5 318 .d_delete = sockfs_delete_dentry,
1da177e4
LT
319};
320
321/*
322 * Obtains the first available file descriptor and sets it up for use.
323 *
39d8c1b6
DM
324 * These functions create file structures and maps them to fd space
325 * of the current process. On success it returns file descriptor
1da177e4
LT
326 * and file struct implicitly stored in sock->file.
327 * Note that another thread may close file descriptor before we return
328 * from this function. We use the fact that now we do not refer
329 * to socket after mapping. If one day we will need it, this
330 * function will increment ref. count on file by 1.
331 *
332 * In any case returned fd MAY BE not valid!
333 * This race condition is unavoidable
334 * with shared fd spaces, we cannot solve it inside kernel,
335 * but we take care of internal coherence yet.
336 */
337
39d8c1b6 338static int sock_alloc_fd(struct file **filep)
1da177e4
LT
339{
340 int fd;
1da177e4
LT
341
342 fd = get_unused_fd();
39d8c1b6 343 if (likely(fd >= 0)) {
1da177e4
LT
344 struct file *file = get_empty_filp();
345
39d8c1b6
DM
346 *filep = file;
347 if (unlikely(!file)) {
1da177e4 348 put_unused_fd(fd);
39d8c1b6 349 return -ENFILE;
1da177e4 350 }
39d8c1b6
DM
351 } else
352 *filep = NULL;
353 return fd;
354}
1da177e4 355
39d8c1b6
DM
356static int sock_attach_fd(struct socket *sock, struct file *file)
357{
358 struct qstr this;
359 char name[32];
360
361 this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
362 this.name = name;
304e61e6 363 this.hash = 0;
39d8c1b6 364
3126a42c
JS
365 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
366 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
367 return -ENOMEM;
368
3126a42c 369 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
370 /*
371 * We dont want to push this dentry into global dentry hash table.
372 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
373 * This permits a working /proc/$pid/fd/XXX on sockets
374 */
3126a42c
JS
375 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
376 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
377 file->f_path.mnt = mntget(sock_mnt);
378 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
379
380 sock->file = file;
381 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
382 file->f_mode = FMODE_READ | FMODE_WRITE;
383 file->f_flags = O_RDWR;
384 file->f_pos = 0;
385 file->private_data = sock;
1da177e4 386
39d8c1b6
DM
387 return 0;
388}
389
390int sock_map_fd(struct socket *sock)
391{
392 struct file *newfile;
393 int fd = sock_alloc_fd(&newfile);
394
395 if (likely(fd >= 0)) {
396 int err = sock_attach_fd(sock, newfile);
397
398 if (unlikely(err < 0)) {
399 put_filp(newfile);
1da177e4 400 put_unused_fd(fd);
39d8c1b6 401 return err;
1da177e4 402 }
39d8c1b6 403 fd_install(fd, newfile);
1da177e4 404 }
1da177e4
LT
405 return fd;
406}
407
6cb153ca
BL
408static struct socket *sock_from_file(struct file *file, int *err)
409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca
BL
415}
416
1da177e4
LT
417/**
418 * sockfd_lookup - Go from a file number to its socket slot
419 * @fd: file handle
420 * @err: pointer to an error code return
421 *
422 * The file handle passed in is locked and the socket it is bound
423 * too is returned. If an error occurs the err pointer is overwritten
424 * with a negative errno code and NULL is returned. The function checks
425 * for both invalid handles and passing a handle which is not a socket.
426 *
427 * On a success the socket object pointer is returned.
428 */
429
430struct socket *sockfd_lookup(int fd, int *err)
431{
432 struct file *file;
1da177e4
LT
433 struct socket *sock;
434
89bddce5
SH
435 file = fget(fd);
436 if (!file) {
1da177e4
LT
437 *err = -EBADF;
438 return NULL;
439 }
89bddce5 440
6cb153ca
BL
441 sock = sock_from_file(file, err);
442 if (!sock)
1da177e4 443 fput(file);
6cb153ca
BL
444 return sock;
445}
1da177e4 446
6cb153ca
BL
447static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
448{
449 struct file *file;
450 struct socket *sock;
451
3672558c 452 *err = -EBADF;
6cb153ca
BL
453 file = fget_light(fd, fput_needed);
454 if (file) {
455 sock = sock_from_file(file, err);
456 if (sock)
457 return sock;
458 fput_light(file, *fput_needed);
1da177e4 459 }
6cb153ca 460 return NULL;
1da177e4
LT
461}
462
463/**
464 * sock_alloc - allocate a socket
89bddce5 465 *
1da177e4
LT
466 * Allocate a new inode and socket object. The two are bound together
467 * and initialised. The socket is then returned. If we are out of inodes
468 * NULL is returned.
469 */
470
471static struct socket *sock_alloc(void)
472{
89bddce5
SH
473 struct inode *inode;
474 struct socket *sock;
1da177e4
LT
475
476 inode = new_inode(sock_mnt->mnt_sb);
477 if (!inode)
478 return NULL;
479
480 sock = SOCKET_I(inode);
481
89bddce5 482 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
483 inode->i_uid = current->fsuid;
484 inode->i_gid = current->fsgid;
485
486 get_cpu_var(sockets_in_use)++;
487 put_cpu_var(sockets_in_use);
488 return sock;
489}
490
491/*
492 * In theory you can't get an open on this inode, but /proc provides
493 * a back door. Remember to keep it shut otherwise you'll let the
494 * creepy crawlies in.
495 */
89bddce5 496
1da177e4
LT
497static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
498{
499 return -ENXIO;
500}
501
4b6f5d20 502const struct file_operations bad_sock_fops = {
1da177e4
LT
503 .owner = THIS_MODULE,
504 .open = sock_no_open,
505};
506
507/**
508 * sock_release - close a socket
509 * @sock: socket to close
510 *
511 * The socket is released from the protocol stack if it has a release
512 * callback, and the inode is then released if the socket is bound to
89bddce5 513 * an inode not a file.
1da177e4 514 */
89bddce5 515
1da177e4
LT
516void sock_release(struct socket *sock)
517{
518 if (sock->ops) {
519 struct module *owner = sock->ops->owner;
520
521 sock->ops->release(sock);
522 sock->ops = NULL;
523 module_put(owner);
524 }
525
526 if (sock->fasync_list)
527 printk(KERN_ERR "sock_release: fasync list not empty!\n");
528
529 get_cpu_var(sockets_in_use)--;
530 put_cpu_var(sockets_in_use);
531 if (!sock->file) {
532 iput(SOCK_INODE(sock));
533 return;
534 }
89bddce5 535 sock->file = NULL;
1da177e4
LT
536}
537
89bddce5 538static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
539 struct msghdr *msg, size_t size)
540{
541 struct sock_iocb *si = kiocb_to_siocb(iocb);
542 int err;
543
544 si->sock = sock;
545 si->scm = NULL;
546 si->msg = msg;
547 si->size = size;
548
549 err = security_socket_sendmsg(sock, msg, size);
550 if (err)
551 return err;
552
553 return sock->ops->sendmsg(iocb, sock, msg, size);
554}
555
556int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
557{
558 struct kiocb iocb;
559 struct sock_iocb siocb;
560 int ret;
561
562 init_sync_kiocb(&iocb, NULL);
563 iocb.private = &siocb;
564 ret = __sock_sendmsg(&iocb, sock, msg, size);
565 if (-EIOCBQUEUED == ret)
566 ret = wait_on_sync_kiocb(&iocb);
567 return ret;
568}
569
570int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
571 struct kvec *vec, size_t num, size_t size)
572{
573 mm_segment_t oldfs = get_fs();
574 int result;
575
576 set_fs(KERNEL_DS);
577 /*
578 * the following is safe, since for compiler definitions of kvec and
579 * iovec are identical, yielding the same in-core layout and alignment
580 */
89bddce5 581 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
582 msg->msg_iovlen = num;
583 result = sock_sendmsg(sock, msg, size);
584 set_fs(oldfs);
585 return result;
586}
587
89bddce5 588static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
589 struct msghdr *msg, size_t size, int flags)
590{
591 int err;
592 struct sock_iocb *si = kiocb_to_siocb(iocb);
593
594 si->sock = sock;
595 si->scm = NULL;
596 si->msg = msg;
597 si->size = size;
598 si->flags = flags;
599
600 err = security_socket_recvmsg(sock, msg, size, flags);
601 if (err)
602 return err;
603
604 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
605}
606
89bddce5 607int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
608 size_t size, int flags)
609{
610 struct kiocb iocb;
611 struct sock_iocb siocb;
612 int ret;
613
89bddce5 614 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
615 iocb.private = &siocb;
616 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
617 if (-EIOCBQUEUED == ret)
618 ret = wait_on_sync_kiocb(&iocb);
619 return ret;
620}
621
89bddce5
SH
622int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
623 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
624{
625 mm_segment_t oldfs = get_fs();
626 int result;
627
628 set_fs(KERNEL_DS);
629 /*
630 * the following is safe, since for compiler definitions of kvec and
631 * iovec are identical, yielding the same in-core layout and alignment
632 */
89bddce5 633 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
634 result = sock_recvmsg(sock, msg, size, flags);
635 set_fs(oldfs);
636 return result;
637}
638
639static void sock_aio_dtor(struct kiocb *iocb)
640{
641 kfree(iocb->private);
642}
643
ce1d4d3e
CH
644static ssize_t sock_sendpage(struct file *file, struct page *page,
645 int offset, size_t size, loff_t *ppos, int more)
1da177e4 646{
1da177e4
LT
647 struct socket *sock;
648 int flags;
649
ce1d4d3e
CH
650 sock = file->private_data;
651
652 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
653 if (more)
654 flags |= MSG_MORE;
655
656 return sock->ops->sendpage(sock, page, offset, size, flags);
657}
1da177e4 658
ce1d4d3e 659static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 660 struct sock_iocb *siocb)
ce1d4d3e
CH
661{
662 if (!is_sync_kiocb(iocb)) {
663 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
664 if (!siocb)
665 return NULL;
1da177e4
LT
666 iocb->ki_dtor = sock_aio_dtor;
667 }
1da177e4 668
ce1d4d3e 669 siocb->kiocb = iocb;
ce1d4d3e
CH
670 iocb->private = siocb;
671 return siocb;
1da177e4
LT
672}
673
ce1d4d3e 674static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
675 struct file *file, const struct iovec *iov,
676 unsigned long nr_segs)
ce1d4d3e
CH
677{
678 struct socket *sock = file->private_data;
679 size_t size = 0;
680 int i;
1da177e4 681
89bddce5
SH
682 for (i = 0; i < nr_segs; i++)
683 size += iov[i].iov_len;
1da177e4 684
ce1d4d3e
CH
685 msg->msg_name = NULL;
686 msg->msg_namelen = 0;
687 msg->msg_control = NULL;
688 msg->msg_controllen = 0;
89bddce5 689 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
690 msg->msg_iovlen = nr_segs;
691 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
692
693 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
694}
695
027445c3
BP
696static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
697 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
698{
699 struct sock_iocb siocb, *x;
700
1da177e4
LT
701 if (pos != 0)
702 return -ESPIPE;
027445c3
BP
703
704 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
705 return 0;
706
027445c3
BP
707
708 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
709 if (!x)
710 return -ENOMEM;
027445c3 711 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
712}
713
ce1d4d3e 714static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
715 struct file *file, const struct iovec *iov,
716 unsigned long nr_segs)
1da177e4 717{
ce1d4d3e
CH
718 struct socket *sock = file->private_data;
719 size_t size = 0;
720 int i;
1da177e4 721
89bddce5
SH
722 for (i = 0; i < nr_segs; i++)
723 size += iov[i].iov_len;
1da177e4 724
ce1d4d3e
CH
725 msg->msg_name = NULL;
726 msg->msg_namelen = 0;
727 msg->msg_control = NULL;
728 msg->msg_controllen = 0;
89bddce5 729 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
730 msg->msg_iovlen = nr_segs;
731 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
732 if (sock->type == SOCK_SEQPACKET)
733 msg->msg_flags |= MSG_EOR;
1da177e4 734
ce1d4d3e 735 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
736}
737
027445c3
BP
738static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
739 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
740{
741 struct sock_iocb siocb, *x;
1da177e4 742
ce1d4d3e
CH
743 if (pos != 0)
744 return -ESPIPE;
027445c3
BP
745
746 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 747 return 0;
1da177e4 748
027445c3 749 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
750 if (!x)
751 return -ENOMEM;
1da177e4 752
027445c3 753 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
754}
755
1da177e4
LT
756/*
757 * Atomic setting of ioctl hooks to avoid race
758 * with module unload.
759 */
760
4a3e2f71 761static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 762static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 763
89bddce5 764void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 765{
4a3e2f71 766 mutex_lock(&br_ioctl_mutex);
1da177e4 767 br_ioctl_hook = hook;
4a3e2f71 768 mutex_unlock(&br_ioctl_mutex);
1da177e4 769}
89bddce5 770
1da177e4
LT
771EXPORT_SYMBOL(brioctl_set);
772
4a3e2f71 773static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 774static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 775
89bddce5 776void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 777{
4a3e2f71 778 mutex_lock(&vlan_ioctl_mutex);
1da177e4 779 vlan_ioctl_hook = hook;
4a3e2f71 780 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 781}
89bddce5 782
1da177e4
LT
783EXPORT_SYMBOL(vlan_ioctl_set);
784
4a3e2f71 785static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 786static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 787
89bddce5 788void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 789{
4a3e2f71 790 mutex_lock(&dlci_ioctl_mutex);
1da177e4 791 dlci_ioctl_hook = hook;
4a3e2f71 792 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 793}
89bddce5 794
1da177e4
LT
795EXPORT_SYMBOL(dlci_ioctl_set);
796
797/*
798 * With an ioctl, arg may well be a user mode pointer, but we don't know
799 * what to do with it - that's up to the protocol still.
800 */
801
802static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
803{
804 struct socket *sock;
805 void __user *argp = (void __user *)arg;
806 int pid, err;
807
b69aee04 808 sock = file->private_data;
1da177e4
LT
809 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
810 err = dev_ioctl(cmd, argp);
811 } else
d86b5e0e 812#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
813 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
814 err = dev_ioctl(cmd, argp);
815 } else
89bddce5
SH
816#endif /* CONFIG_WIRELESS_EXT */
817 switch (cmd) {
1da177e4
LT
818 case FIOSETOWN:
819 case SIOCSPGRP:
820 err = -EFAULT;
821 if (get_user(pid, (int __user *)argp))
822 break;
823 err = f_setown(sock->file, pid, 1);
824 break;
825 case FIOGETOWN:
826 case SIOCGPGRP:
609d7fa9 827 err = put_user(f_getown(sock->file),
89bddce5 828 (int __user *)argp);
1da177e4
LT
829 break;
830 case SIOCGIFBR:
831 case SIOCSIFBR:
832 case SIOCBRADDBR:
833 case SIOCBRDELBR:
834 err = -ENOPKG;
835 if (!br_ioctl_hook)
836 request_module("bridge");
837
4a3e2f71 838 mutex_lock(&br_ioctl_mutex);
89bddce5 839 if (br_ioctl_hook)
1da177e4 840 err = br_ioctl_hook(cmd, argp);
4a3e2f71 841 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
842 break;
843 case SIOCGIFVLAN:
844 case SIOCSIFVLAN:
845 err = -ENOPKG;
846 if (!vlan_ioctl_hook)
847 request_module("8021q");
848
4a3e2f71 849 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
850 if (vlan_ioctl_hook)
851 err = vlan_ioctl_hook(argp);
4a3e2f71 852 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 853 break;
1da177e4
LT
854 case SIOCADDDLCI:
855 case SIOCDELDLCI:
856 err = -ENOPKG;
857 if (!dlci_ioctl_hook)
858 request_module("dlci");
859
860 if (dlci_ioctl_hook) {
4a3e2f71 861 mutex_lock(&dlci_ioctl_mutex);
1da177e4 862 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 863 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
864 }
865 break;
866 default:
867 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
868
869 /*
870 * If this ioctl is unknown try to hand it down
871 * to the NIC driver.
872 */
873 if (err == -ENOIOCTLCMD)
874 err = dev_ioctl(cmd, argp);
1da177e4 875 break;
89bddce5 876 }
1da177e4
LT
877 return err;
878}
879
880int sock_create_lite(int family, int type, int protocol, struct socket **res)
881{
882 int err;
883 struct socket *sock = NULL;
89bddce5 884
1da177e4
LT
885 err = security_socket_create(family, type, protocol, 1);
886 if (err)
887 goto out;
888
889 sock = sock_alloc();
890 if (!sock) {
891 err = -ENOMEM;
892 goto out;
893 }
894
1da177e4 895 sock->type = type;
7420ed23
VY
896 err = security_socket_post_create(sock, family, type, protocol, 1);
897 if (err)
898 goto out_release;
899
1da177e4
LT
900out:
901 *res = sock;
902 return err;
7420ed23
VY
903out_release:
904 sock_release(sock);
905 sock = NULL;
906 goto out;
1da177e4
LT
907}
908
909/* No kernel lock held - perfect */
89bddce5 910static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
911{
912 struct socket *sock;
913
914 /*
89bddce5 915 * We can't return errors to poll, so it's either yes or no.
1da177e4 916 */
b69aee04 917 sock = file->private_data;
1da177e4
LT
918 return sock->ops->poll(file, sock, wait);
919}
920
89bddce5 921static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 922{
b69aee04 923 struct socket *sock = file->private_data;
1da177e4
LT
924
925 return sock->ops->mmap(file, sock, vma);
926}
927
20380731 928static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
929{
930 /*
89bddce5
SH
931 * It was possible the inode is NULL we were
932 * closing an unfinished socket.
1da177e4
LT
933 */
934
89bddce5 935 if (!inode) {
1da177e4
LT
936 printk(KERN_DEBUG "sock_close: NULL inode\n");
937 return 0;
938 }
939 sock_fasync(-1, filp, 0);
940 sock_release(SOCKET_I(inode));
941 return 0;
942}
943
944/*
945 * Update the socket async list
946 *
947 * Fasync_list locking strategy.
948 *
949 * 1. fasync_list is modified only under process context socket lock
950 * i.e. under semaphore.
951 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
952 * or under socket lock.
953 * 3. fasync_list can be used from softirq context, so that
954 * modification under socket lock have to be enhanced with
955 * write_lock_bh(&sk->sk_callback_lock).
956 * --ANK (990710)
957 */
958
959static int sock_fasync(int fd, struct file *filp, int on)
960{
89bddce5 961 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
962 struct socket *sock;
963 struct sock *sk;
964
89bddce5 965 if (on) {
8b3a7005 966 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 967 if (fna == NULL)
1da177e4
LT
968 return -ENOMEM;
969 }
970
b69aee04 971 sock = filp->private_data;
1da177e4 972
89bddce5
SH
973 sk = sock->sk;
974 if (sk == NULL) {
1da177e4
LT
975 kfree(fna);
976 return -EINVAL;
977 }
978
979 lock_sock(sk);
980
89bddce5 981 prev = &(sock->fasync_list);
1da177e4 982
89bddce5
SH
983 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
984 if (fa->fa_file == filp)
1da177e4
LT
985 break;
986
89bddce5
SH
987 if (on) {
988 if (fa != NULL) {
1da177e4 989 write_lock_bh(&sk->sk_callback_lock);
89bddce5 990 fa->fa_fd = fd;
1da177e4
LT
991 write_unlock_bh(&sk->sk_callback_lock);
992
993 kfree(fna);
994 goto out;
995 }
89bddce5
SH
996 fna->fa_file = filp;
997 fna->fa_fd = fd;
998 fna->magic = FASYNC_MAGIC;
999 fna->fa_next = sock->fasync_list;
1da177e4 1000 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1001 sock->fasync_list = fna;
1da177e4 1002 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1003 } else {
1004 if (fa != NULL) {
1da177e4 1005 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1006 *prev = fa->fa_next;
1da177e4
LT
1007 write_unlock_bh(&sk->sk_callback_lock);
1008 kfree(fa);
1009 }
1010 }
1011
1012out:
1013 release_sock(sock->sk);
1014 return 0;
1015}
1016
1017/* This function may be called only under socket lock or callback_lock */
1018
1019int sock_wake_async(struct socket *sock, int how, int band)
1020{
1021 if (!sock || !sock->fasync_list)
1022 return -1;
89bddce5 1023 switch (how) {
1da177e4 1024 case 1:
89bddce5 1025
1da177e4
LT
1026 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1027 break;
1028 goto call_kill;
1029 case 2:
1030 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1031 break;
1032 /* fall through */
1033 case 0:
89bddce5 1034call_kill:
1da177e4
LT
1035 __kill_fasync(sock->fasync_list, SIGIO, band);
1036 break;
1037 case 3:
1038 __kill_fasync(sock->fasync_list, SIGURG, band);
1039 }
1040 return 0;
1041}
1042
89bddce5
SH
1043static int __sock_create(int family, int type, int protocol,
1044 struct socket **res, int kern)
1da177e4
LT
1045{
1046 int err;
1047 struct socket *sock;
55737fda 1048 const struct net_proto_family *pf;
1da177e4
LT
1049
1050 /*
89bddce5 1051 * Check protocol is in range
1da177e4
LT
1052 */
1053 if (family < 0 || family >= NPROTO)
1054 return -EAFNOSUPPORT;
1055 if (type < 0 || type >= SOCK_MAX)
1056 return -EINVAL;
1057
1058 /* Compatibility.
1059
1060 This uglymoron is moved from INET layer to here to avoid
1061 deadlock in module load.
1062 */
1063 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1064 static int warned;
1da177e4
LT
1065 if (!warned) {
1066 warned = 1;
89bddce5
SH
1067 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1068 current->comm);
1da177e4
LT
1069 }
1070 family = PF_PACKET;
1071 }
1072
1073 err = security_socket_create(family, type, protocol, kern);
1074 if (err)
1075 return err;
89bddce5 1076
55737fda
SH
1077 /*
1078 * Allocate the socket and allow the family to set things up. if
1079 * the protocol is 0, the family is instructed to select an appropriate
1080 * default.
1081 */
1082 sock = sock_alloc();
1083 if (!sock) {
1084 if (net_ratelimit())
1085 printk(KERN_WARNING "socket: no more sockets\n");
1086 return -ENFILE; /* Not exactly a match, but its the
1087 closest posix thing */
1088 }
1089
1090 sock->type = type;
1091
1da177e4 1092#if defined(CONFIG_KMOD)
89bddce5
SH
1093 /* Attempt to load a protocol module if the find failed.
1094 *
1095 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1096 * requested real, full-featured networking support upon configuration.
1097 * Otherwise module support will break!
1098 */
55737fda 1099 if (net_families[family] == NULL)
89bddce5 1100 request_module("net-pf-%d", family);
1da177e4
LT
1101#endif
1102
55737fda
SH
1103 rcu_read_lock();
1104 pf = rcu_dereference(net_families[family]);
1105 err = -EAFNOSUPPORT;
1106 if (!pf)
1107 goto out_release;
1da177e4
LT
1108
1109 /*
1110 * We will call the ->create function, that possibly is in a loadable
1111 * module, so we have to bump that loadable module refcnt first.
1112 */
55737fda 1113 if (!try_module_get(pf->owner))
1da177e4
LT
1114 goto out_release;
1115
55737fda
SH
1116 /* Now protected by module ref count */
1117 rcu_read_unlock();
1118
1119 err = pf->create(sock, protocol);
1120 if (err < 0)
1da177e4 1121 goto out_module_put;
a79af59e 1122
1da177e4
LT
1123 /*
1124 * Now to bump the refcnt of the [loadable] module that owns this
1125 * socket at sock_release time we decrement its refcnt.
1126 */
55737fda
SH
1127 if (!try_module_get(sock->ops->owner))
1128 goto out_module_busy;
1129
1da177e4
LT
1130 /*
1131 * Now that we're done with the ->create function, the [loadable]
1132 * module can have its refcnt decremented
1133 */
55737fda 1134 module_put(pf->owner);
7420ed23
VY
1135 err = security_socket_post_create(sock, family, type, protocol, kern);
1136 if (err)
1137 goto out_release;
55737fda 1138 *res = sock;
1da177e4 1139
55737fda
SH
1140 return 0;
1141
1142out_module_busy:
1143 err = -EAFNOSUPPORT;
1da177e4 1144out_module_put:
55737fda
SH
1145 sock->ops = NULL;
1146 module_put(pf->owner);
1147out_sock_release:
1da177e4 1148 sock_release(sock);
55737fda
SH
1149 return err;
1150
1151out_release:
1152 rcu_read_unlock();
1153 goto out_sock_release;
1da177e4
LT
1154}
1155
1156int sock_create(int family, int type, int protocol, struct socket **res)
1157{
1158 return __sock_create(family, type, protocol, res, 0);
1159}
1160
1161int sock_create_kern(int family, int type, int protocol, struct socket **res)
1162{
1163 return __sock_create(family, type, protocol, res, 1);
1164}
1165
1166asmlinkage long sys_socket(int family, int type, int protocol)
1167{
1168 int retval;
1169 struct socket *sock;
1170
1171 retval = sock_create(family, type, protocol, &sock);
1172 if (retval < 0)
1173 goto out;
1174
1175 retval = sock_map_fd(sock);
1176 if (retval < 0)
1177 goto out_release;
1178
1179out:
1180 /* It may be already another descriptor 8) Not kernel problem. */
1181 return retval;
1182
1183out_release:
1184 sock_release(sock);
1185 return retval;
1186}
1187
1188/*
1189 * Create a pair of connected sockets.
1190 */
1191
89bddce5
SH
1192asmlinkage long sys_socketpair(int family, int type, int protocol,
1193 int __user *usockvec)
1da177e4
LT
1194{
1195 struct socket *sock1, *sock2;
1196 int fd1, fd2, err;
db349509 1197 struct file *newfile1, *newfile2;
1da177e4
LT
1198
1199 /*
1200 * Obtain the first socket and check if the underlying protocol
1201 * supports the socketpair call.
1202 */
1203
1204 err = sock_create(family, type, protocol, &sock1);
1205 if (err < 0)
1206 goto out;
1207
1208 err = sock_create(family, type, protocol, &sock2);
1209 if (err < 0)
1210 goto out_release_1;
1211
1212 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1213 if (err < 0)
1da177e4
LT
1214 goto out_release_both;
1215
db349509
AV
1216 fd1 = sock_alloc_fd(&newfile1);
1217 if (unlikely(fd1 < 0))
1218 goto out_release_both;
1da177e4 1219
db349509
AV
1220 fd2 = sock_alloc_fd(&newfile2);
1221 if (unlikely(fd2 < 0)) {
1222 put_filp(newfile1);
1223 put_unused_fd(fd1);
1da177e4 1224 goto out_release_both;
db349509 1225 }
1da177e4 1226
db349509
AV
1227 err = sock_attach_fd(sock1, newfile1);
1228 if (unlikely(err < 0)) {
1229 goto out_fd2;
1230 }
1231
1232 err = sock_attach_fd(sock2, newfile2);
1233 if (unlikely(err < 0)) {
1234 fput(newfile1);
1235 goto out_fd1;
1236 }
1237
1238 err = audit_fd_pair(fd1, fd2);
1239 if (err < 0) {
1240 fput(newfile1);
1241 fput(newfile2);
1242 goto out_fd;
1243 }
1da177e4 1244
db349509
AV
1245 fd_install(fd1, newfile1);
1246 fd_install(fd2, newfile2);
1da177e4
LT
1247 /* fd1 and fd2 may be already another descriptors.
1248 * Not kernel problem.
1249 */
1250
89bddce5 1251 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1252 if (!err)
1253 err = put_user(fd2, &usockvec[1]);
1254 if (!err)
1255 return 0;
1256
1257 sys_close(fd2);
1258 sys_close(fd1);
1259 return err;
1260
1da177e4 1261out_release_both:
89bddce5 1262 sock_release(sock2);
1da177e4 1263out_release_1:
89bddce5 1264 sock_release(sock1);
1da177e4
LT
1265out:
1266 return err;
db349509
AV
1267
1268out_fd2:
1269 put_filp(newfile1);
1270 sock_release(sock1);
1271out_fd1:
1272 put_filp(newfile2);
1273 sock_release(sock2);
1274out_fd:
1275 put_unused_fd(fd1);
1276 put_unused_fd(fd2);
1277 goto out;
1da177e4
LT
1278}
1279
1da177e4
LT
1280/*
1281 * Bind a name to a socket. Nothing much to do here since it's
1282 * the protocol's responsibility to handle the local address.
1283 *
1284 * We move the socket address to kernel space before we call
1285 * the protocol layer (having also checked the address is ok).
1286 */
1287
1288asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1289{
1290 struct socket *sock;
1291 char address[MAX_SOCK_ADDR];
6cb153ca 1292 int err, fput_needed;
1da177e4 1293
89bddce5 1294 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1295 if (sock) {
89bddce5
SH
1296 err = move_addr_to_kernel(umyaddr, addrlen, address);
1297 if (err >= 0) {
1298 err = security_socket_bind(sock,
1299 (struct sockaddr *)address,
1300 addrlen);
6cb153ca
BL
1301 if (!err)
1302 err = sock->ops->bind(sock,
89bddce5
SH
1303 (struct sockaddr *)
1304 address, addrlen);
1da177e4 1305 }
6cb153ca 1306 fput_light(sock->file, fput_needed);
89bddce5 1307 }
1da177e4
LT
1308 return err;
1309}
1310
1da177e4
LT
1311/*
1312 * Perform a listen. Basically, we allow the protocol to do anything
1313 * necessary for a listen, and if that works, we mark the socket as
1314 * ready for listening.
1315 */
1316
7a42c217 1317int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1318
1319asmlinkage long sys_listen(int fd, int backlog)
1320{
1321 struct socket *sock;
6cb153ca 1322 int err, fput_needed;
89bddce5
SH
1323
1324 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1325 if (sock) {
1326 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1327 backlog = sysctl_somaxconn;
1328
1329 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1330 if (!err)
1331 err = sock->ops->listen(sock, backlog);
1da177e4 1332
6cb153ca 1333 fput_light(sock->file, fput_needed);
1da177e4
LT
1334 }
1335 return err;
1336}
1337
1da177e4
LT
1338/*
1339 * For accept, we attempt to create a new socket, set up the link
1340 * with the client, wake up the client, then return the new
1341 * connected fd. We collect the address of the connector in kernel
1342 * space and move it to user at the very end. This is unclean because
1343 * we open the socket then return an error.
1344 *
1345 * 1003.1g adds the ability to recvmsg() to query connection pending
1346 * status to recvmsg. We need to add that support in a way thats
1347 * clean when we restucture accept also.
1348 */
1349
89bddce5
SH
1350asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1351 int __user *upeer_addrlen)
1da177e4
LT
1352{
1353 struct socket *sock, *newsock;
39d8c1b6 1354 struct file *newfile;
6cb153ca 1355 int err, len, newfd, fput_needed;
1da177e4
LT
1356 char address[MAX_SOCK_ADDR];
1357
6cb153ca 1358 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1359 if (!sock)
1360 goto out;
1361
1362 err = -ENFILE;
89bddce5 1363 if (!(newsock = sock_alloc()))
1da177e4
LT
1364 goto out_put;
1365
1366 newsock->type = sock->type;
1367 newsock->ops = sock->ops;
1368
1da177e4
LT
1369 /*
1370 * We don't need try_module_get here, as the listening socket (sock)
1371 * has the protocol module (sock->ops->owner) held.
1372 */
1373 __module_get(newsock->ops->owner);
1374
39d8c1b6
DM
1375 newfd = sock_alloc_fd(&newfile);
1376 if (unlikely(newfd < 0)) {
1377 err = newfd;
9a1875e6
DM
1378 sock_release(newsock);
1379 goto out_put;
39d8c1b6
DM
1380 }
1381
1382 err = sock_attach_fd(newsock, newfile);
1383 if (err < 0)
79f4f642 1384 goto out_fd_simple;
39d8c1b6 1385
a79af59e
FF
1386 err = security_socket_accept(sock, newsock);
1387 if (err)
39d8c1b6 1388 goto out_fd;
a79af59e 1389
1da177e4
LT
1390 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1391 if (err < 0)
39d8c1b6 1392 goto out_fd;
1da177e4
LT
1393
1394 if (upeer_sockaddr) {
89bddce5
SH
1395 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1396 &len, 2) < 0) {
1da177e4 1397 err = -ECONNABORTED;
39d8c1b6 1398 goto out_fd;
1da177e4 1399 }
89bddce5
SH
1400 err = move_addr_to_user(address, len, upeer_sockaddr,
1401 upeer_addrlen);
1da177e4 1402 if (err < 0)
39d8c1b6 1403 goto out_fd;
1da177e4
LT
1404 }
1405
1406 /* File flags are not inherited via accept() unlike another OSes. */
1407
39d8c1b6
DM
1408 fd_install(newfd, newfile);
1409 err = newfd;
1da177e4
LT
1410
1411 security_socket_post_accept(sock, newsock);
1412
1413out_put:
6cb153ca 1414 fput_light(sock->file, fput_needed);
1da177e4
LT
1415out:
1416 return err;
79f4f642
AD
1417out_fd_simple:
1418 sock_release(newsock);
1419 put_filp(newfile);
1420 put_unused_fd(newfd);
1421 goto out_put;
39d8c1b6 1422out_fd:
9606a216 1423 fput(newfile);
39d8c1b6 1424 put_unused_fd(newfd);
1da177e4
LT
1425 goto out_put;
1426}
1427
1da177e4
LT
1428/*
1429 * Attempt to connect to a socket with the server address. The address
1430 * is in user space so we verify it is OK and move it to kernel space.
1431 *
1432 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1433 * break bindings
1434 *
1435 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1436 * other SEQPACKET protocols that take time to connect() as it doesn't
1437 * include the -EINPROGRESS status for such sockets.
1438 */
1439
89bddce5
SH
1440asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1441 int addrlen)
1da177e4
LT
1442{
1443 struct socket *sock;
1444 char address[MAX_SOCK_ADDR];
6cb153ca 1445 int err, fput_needed;
1da177e4 1446
6cb153ca 1447 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1448 if (!sock)
1449 goto out;
1450 err = move_addr_to_kernel(uservaddr, addrlen, address);
1451 if (err < 0)
1452 goto out_put;
1453
89bddce5
SH
1454 err =
1455 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1456 if (err)
1457 goto out_put;
1458
89bddce5 1459 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1460 sock->file->f_flags);
1461out_put:
6cb153ca 1462 fput_light(sock->file, fput_needed);
1da177e4
LT
1463out:
1464 return err;
1465}
1466
1467/*
1468 * Get the local address ('name') of a socket object. Move the obtained
1469 * name to user space.
1470 */
1471
89bddce5
SH
1472asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1473 int __user *usockaddr_len)
1da177e4
LT
1474{
1475 struct socket *sock;
1476 char address[MAX_SOCK_ADDR];
6cb153ca 1477 int len, err, fput_needed;
89bddce5 1478
6cb153ca 1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1480 if (!sock)
1481 goto out;
1482
1483 err = security_socket_getsockname(sock);
1484 if (err)
1485 goto out_put;
1486
1487 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1488 if (err)
1489 goto out_put;
1490 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1491
1492out_put:
6cb153ca 1493 fput_light(sock->file, fput_needed);
1da177e4
LT
1494out:
1495 return err;
1496}
1497
1498/*
1499 * Get the remote address ('name') of a socket object. Move the obtained
1500 * name to user space.
1501 */
1502
89bddce5
SH
1503asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1504 int __user *usockaddr_len)
1da177e4
LT
1505{
1506 struct socket *sock;
1507 char address[MAX_SOCK_ADDR];
6cb153ca 1508 int len, err, fput_needed;
1da177e4 1509
89bddce5
SH
1510 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1511 if (sock != NULL) {
1da177e4
LT
1512 err = security_socket_getpeername(sock);
1513 if (err) {
6cb153ca 1514 fput_light(sock->file, fput_needed);
1da177e4
LT
1515 return err;
1516 }
1517
89bddce5
SH
1518 err =
1519 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1520 1);
1da177e4 1521 if (!err)
89bddce5
SH
1522 err = move_addr_to_user(address, len, usockaddr,
1523 usockaddr_len);
6cb153ca 1524 fput_light(sock->file, fput_needed);
1da177e4
LT
1525 }
1526 return err;
1527}
1528
1529/*
1530 * Send a datagram to a given address. We move the address into kernel
1531 * space and check the user space data area is readable before invoking
1532 * the protocol.
1533 */
1534
89bddce5
SH
1535asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1536 unsigned flags, struct sockaddr __user *addr,
1537 int addr_len)
1da177e4
LT
1538{
1539 struct socket *sock;
1540 char address[MAX_SOCK_ADDR];
1541 int err;
1542 struct msghdr msg;
1543 struct iovec iov;
6cb153ca
BL
1544 int fput_needed;
1545 struct file *sock_file;
1546
1547 sock_file = fget_light(fd, &fput_needed);
4387ff75 1548 err = -EBADF;
6cb153ca 1549 if (!sock_file)
4387ff75 1550 goto out;
6cb153ca
BL
1551
1552 sock = sock_from_file(sock_file, &err);
1da177e4 1553 if (!sock)
6cb153ca 1554 goto out_put;
89bddce5
SH
1555 iov.iov_base = buff;
1556 iov.iov_len = len;
1557 msg.msg_name = NULL;
1558 msg.msg_iov = &iov;
1559 msg.msg_iovlen = 1;
1560 msg.msg_control = NULL;
1561 msg.msg_controllen = 0;
1562 msg.msg_namelen = 0;
6cb153ca 1563 if (addr) {
1da177e4
LT
1564 err = move_addr_to_kernel(addr, addr_len, address);
1565 if (err < 0)
1566 goto out_put;
89bddce5
SH
1567 msg.msg_name = address;
1568 msg.msg_namelen = addr_len;
1da177e4
LT
1569 }
1570 if (sock->file->f_flags & O_NONBLOCK)
1571 flags |= MSG_DONTWAIT;
1572 msg.msg_flags = flags;
1573 err = sock_sendmsg(sock, &msg, len);
1574
89bddce5 1575out_put:
6cb153ca 1576 fput_light(sock_file, fput_needed);
4387ff75 1577out:
1da177e4
LT
1578 return err;
1579}
1580
1581/*
89bddce5 1582 * Send a datagram down a socket.
1da177e4
LT
1583 */
1584
89bddce5 1585asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1586{
1587 return sys_sendto(fd, buff, len, flags, NULL, 0);
1588}
1589
1590/*
89bddce5 1591 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1592 * sender. We verify the buffers are writable and if needed move the
1593 * sender address from kernel to user space.
1594 */
1595
89bddce5
SH
1596asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1597 unsigned flags, struct sockaddr __user *addr,
1598 int __user *addr_len)
1da177e4
LT
1599{
1600 struct socket *sock;
1601 struct iovec iov;
1602 struct msghdr msg;
1603 char address[MAX_SOCK_ADDR];
89bddce5 1604 int err, err2;
6cb153ca
BL
1605 struct file *sock_file;
1606 int fput_needed;
1607
1608 sock_file = fget_light(fd, &fput_needed);
4387ff75 1609 err = -EBADF;
6cb153ca 1610 if (!sock_file)
4387ff75 1611 goto out;
1da177e4 1612
6cb153ca 1613 sock = sock_from_file(sock_file, &err);
1da177e4 1614 if (!sock)
4387ff75 1615 goto out_put;
1da177e4 1616
89bddce5
SH
1617 msg.msg_control = NULL;
1618 msg.msg_controllen = 0;
1619 msg.msg_iovlen = 1;
1620 msg.msg_iov = &iov;
1621 iov.iov_len = size;
1622 iov.iov_base = ubuf;
1623 msg.msg_name = address;
1624 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1625 if (sock->file->f_flags & O_NONBLOCK)
1626 flags |= MSG_DONTWAIT;
89bddce5 1627 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1628
89bddce5
SH
1629 if (err >= 0 && addr != NULL) {
1630 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1631 if (err2 < 0)
1632 err = err2;
1da177e4 1633 }
4387ff75 1634out_put:
6cb153ca 1635 fput_light(sock_file, fput_needed);
4387ff75 1636out:
1da177e4
LT
1637 return err;
1638}
1639
1640/*
89bddce5 1641 * Receive a datagram from a socket.
1da177e4
LT
1642 */
1643
89bddce5
SH
1644asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1645 unsigned flags)
1da177e4
LT
1646{
1647 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1648}
1649
1650/*
1651 * Set a socket option. Because we don't know the option lengths we have
1652 * to pass the user mode parameter for the protocols to sort out.
1653 */
1654
89bddce5
SH
1655asmlinkage long sys_setsockopt(int fd, int level, int optname,
1656 char __user *optval, int optlen)
1da177e4 1657{
6cb153ca 1658 int err, fput_needed;
1da177e4
LT
1659 struct socket *sock;
1660
1661 if (optlen < 0)
1662 return -EINVAL;
89bddce5
SH
1663
1664 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1665 if (sock != NULL) {
1666 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1667 if (err)
1668 goto out_put;
1da177e4
LT
1669
1670 if (level == SOL_SOCKET)
89bddce5
SH
1671 err =
1672 sock_setsockopt(sock, level, optname, optval,
1673 optlen);
1da177e4 1674 else
89bddce5
SH
1675 err =
1676 sock->ops->setsockopt(sock, level, optname, optval,
1677 optlen);
6cb153ca
BL
1678out_put:
1679 fput_light(sock->file, fput_needed);
1da177e4
LT
1680 }
1681 return err;
1682}
1683
1684/*
1685 * Get a socket option. Because we don't know the option lengths we have
1686 * to pass a user mode parameter for the protocols to sort out.
1687 */
1688
89bddce5
SH
1689asmlinkage long sys_getsockopt(int fd, int level, int optname,
1690 char __user *optval, int __user *optlen)
1da177e4 1691{
6cb153ca 1692 int err, fput_needed;
1da177e4
LT
1693 struct socket *sock;
1694
89bddce5
SH
1695 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1696 if (sock != NULL) {
6cb153ca
BL
1697 err = security_socket_getsockopt(sock, level, optname);
1698 if (err)
1699 goto out_put;
1da177e4
LT
1700
1701 if (level == SOL_SOCKET)
89bddce5
SH
1702 err =
1703 sock_getsockopt(sock, level, optname, optval,
1704 optlen);
1da177e4 1705 else
89bddce5
SH
1706 err =
1707 sock->ops->getsockopt(sock, level, optname, optval,
1708 optlen);
6cb153ca
BL
1709out_put:
1710 fput_light(sock->file, fput_needed);
1da177e4
LT
1711 }
1712 return err;
1713}
1714
1da177e4
LT
1715/*
1716 * Shutdown a socket.
1717 */
1718
1719asmlinkage long sys_shutdown(int fd, int how)
1720{
6cb153ca 1721 int err, fput_needed;
1da177e4
LT
1722 struct socket *sock;
1723
89bddce5
SH
1724 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1725 if (sock != NULL) {
1da177e4 1726 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1727 if (!err)
1728 err = sock->ops->shutdown(sock, how);
1729 fput_light(sock->file, fput_needed);
1da177e4
LT
1730 }
1731 return err;
1732}
1733
89bddce5 1734/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1735 * fields which are the same type (int / unsigned) on our platforms.
1736 */
1737#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1738#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1739#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1740
1da177e4
LT
1741/*
1742 * BSD sendmsg interface
1743 */
1744
1745asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1746{
89bddce5
SH
1747 struct compat_msghdr __user *msg_compat =
1748 (struct compat_msghdr __user *)msg;
1da177e4
LT
1749 struct socket *sock;
1750 char address[MAX_SOCK_ADDR];
1751 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1752 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1753 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1754 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1755 unsigned char *ctl_buf = ctl;
1756 struct msghdr msg_sys;
1757 int err, ctl_len, iov_size, total_len;
6cb153ca 1758 int fput_needed;
89bddce5 1759
1da177e4
LT
1760 err = -EFAULT;
1761 if (MSG_CMSG_COMPAT & flags) {
1762 if (get_compat_msghdr(&msg_sys, msg_compat))
1763 return -EFAULT;
89bddce5
SH
1764 }
1765 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1766 return -EFAULT;
1767
6cb153ca 1768 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1769 if (!sock)
1da177e4
LT
1770 goto out;
1771
1772 /* do not move before msg_sys is valid */
1773 err = -EMSGSIZE;
1774 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1775 goto out_put;
1776
89bddce5 1777 /* Check whether to allocate the iovec area */
1da177e4
LT
1778 err = -ENOMEM;
1779 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1780 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1781 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1782 if (!iov)
1783 goto out_put;
1784 }
1785
1786 /* This will also move the address data into kernel space */
1787 if (MSG_CMSG_COMPAT & flags) {
1788 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1789 } else
1790 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1791 if (err < 0)
1da177e4
LT
1792 goto out_freeiov;
1793 total_len = err;
1794
1795 err = -ENOBUFS;
1796
1797 if (msg_sys.msg_controllen > INT_MAX)
1798 goto out_freeiov;
89bddce5 1799 ctl_len = msg_sys.msg_controllen;
1da177e4 1800 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1801 err =
1802 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1803 sizeof(ctl));
1da177e4
LT
1804 if (err)
1805 goto out_freeiov;
1806 ctl_buf = msg_sys.msg_control;
8920e8f9 1807 ctl_len = msg_sys.msg_controllen;
1da177e4 1808 } else if (ctl_len) {
89bddce5 1809 if (ctl_len > sizeof(ctl)) {
1da177e4 1810 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1811 if (ctl_buf == NULL)
1da177e4
LT
1812 goto out_freeiov;
1813 }
1814 err = -EFAULT;
1815 /*
1816 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1817 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1818 * checking falls down on this.
1819 */
89bddce5
SH
1820 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1821 ctl_len))
1da177e4
LT
1822 goto out_freectl;
1823 msg_sys.msg_control = ctl_buf;
1824 }
1825 msg_sys.msg_flags = flags;
1826
1827 if (sock->file->f_flags & O_NONBLOCK)
1828 msg_sys.msg_flags |= MSG_DONTWAIT;
1829 err = sock_sendmsg(sock, &msg_sys, total_len);
1830
1831out_freectl:
89bddce5 1832 if (ctl_buf != ctl)
1da177e4
LT
1833 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1834out_freeiov:
1835 if (iov != iovstack)
1836 sock_kfree_s(sock->sk, iov, iov_size);
1837out_put:
6cb153ca 1838 fput_light(sock->file, fput_needed);
89bddce5 1839out:
1da177e4
LT
1840 return err;
1841}
1842
1843/*
1844 * BSD recvmsg interface
1845 */
1846
89bddce5
SH
1847asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1848 unsigned int flags)
1da177e4 1849{
89bddce5
SH
1850 struct compat_msghdr __user *msg_compat =
1851 (struct compat_msghdr __user *)msg;
1da177e4
LT
1852 struct socket *sock;
1853 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1854 struct iovec *iov = iovstack;
1da177e4
LT
1855 struct msghdr msg_sys;
1856 unsigned long cmsg_ptr;
1857 int err, iov_size, total_len, len;
6cb153ca 1858 int fput_needed;
1da177e4
LT
1859
1860 /* kernel mode address */
1861 char addr[MAX_SOCK_ADDR];
1862
1863 /* user mode address pointers */
1864 struct sockaddr __user *uaddr;
1865 int __user *uaddr_len;
89bddce5 1866
1da177e4
LT
1867 if (MSG_CMSG_COMPAT & flags) {
1868 if (get_compat_msghdr(&msg_sys, msg_compat))
1869 return -EFAULT;
89bddce5
SH
1870 }
1871 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1872 return -EFAULT;
1da177e4 1873
6cb153ca 1874 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1875 if (!sock)
1876 goto out;
1877
1878 err = -EMSGSIZE;
1879 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1880 goto out_put;
89bddce5
SH
1881
1882 /* Check whether to allocate the iovec area */
1da177e4
LT
1883 err = -ENOMEM;
1884 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1885 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1886 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1887 if (!iov)
1888 goto out_put;
1889 }
1890
1891 /*
89bddce5
SH
1892 * Save the user-mode address (verify_iovec will change the
1893 * kernel msghdr to use the kernel address space)
1da177e4 1894 */
89bddce5
SH
1895
1896 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1897 uaddr_len = COMPAT_NAMELEN(msg);
1898 if (MSG_CMSG_COMPAT & flags) {
1899 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1900 } else
1901 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1902 if (err < 0)
1903 goto out_freeiov;
89bddce5 1904 total_len = err;
1da177e4
LT
1905
1906 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1907 msg_sys.msg_flags = 0;
1908 if (MSG_CMSG_COMPAT & flags)
1909 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1910
1da177e4
LT
1911 if (sock->file->f_flags & O_NONBLOCK)
1912 flags |= MSG_DONTWAIT;
1913 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1914 if (err < 0)
1915 goto out_freeiov;
1916 len = err;
1917
1918 if (uaddr != NULL) {
89bddce5
SH
1919 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1920 uaddr_len);
1da177e4
LT
1921 if (err < 0)
1922 goto out_freeiov;
1923 }
37f7f421
DM
1924 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1925 COMPAT_FLAGS(msg));
1da177e4
LT
1926 if (err)
1927 goto out_freeiov;
1928 if (MSG_CMSG_COMPAT & flags)
89bddce5 1929 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1930 &msg_compat->msg_controllen);
1931 else
89bddce5 1932 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1933 &msg->msg_controllen);
1934 if (err)
1935 goto out_freeiov;
1936 err = len;
1937
1938out_freeiov:
1939 if (iov != iovstack)
1940 sock_kfree_s(sock->sk, iov, iov_size);
1941out_put:
6cb153ca 1942 fput_light(sock->file, fput_needed);
1da177e4
LT
1943out:
1944 return err;
1945}
1946
1947#ifdef __ARCH_WANT_SYS_SOCKETCALL
1948
1949/* Argument list sizes for sys_socketcall */
1950#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1951static const unsigned char nargs[18]={
1952 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1953 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1954 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1955};
1956
1da177e4
LT
1957#undef AL
1958
1959/*
89bddce5 1960 * System call vectors.
1da177e4
LT
1961 *
1962 * Argument checking cleaned up. Saved 20% in size.
1963 * This function doesn't need to set the kernel lock because
89bddce5 1964 * it is set by the callees.
1da177e4
LT
1965 */
1966
1967asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1968{
1969 unsigned long a[6];
89bddce5 1970 unsigned long a0, a1;
1da177e4
LT
1971 int err;
1972
89bddce5 1973 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
1974 return -EINVAL;
1975
1976 /* copy_from_user should be SMP safe. */
1977 if (copy_from_user(a, args, nargs[call]))
1978 return -EFAULT;
3ec3b2fb 1979
89bddce5 1980 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
1981 if (err)
1982 return err;
1983
89bddce5
SH
1984 a0 = a[0];
1985 a1 = a[1];
1986
1987 switch (call) {
1988 case SYS_SOCKET:
1989 err = sys_socket(a0, a1, a[2]);
1990 break;
1991 case SYS_BIND:
1992 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
1993 break;
1994 case SYS_CONNECT:
1995 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
1996 break;
1997 case SYS_LISTEN:
1998 err = sys_listen(a0, a1);
1999 break;
2000 case SYS_ACCEPT:
2001 err =
2002 sys_accept(a0, (struct sockaddr __user *)a1,
2003 (int __user *)a[2]);
2004 break;
2005 case SYS_GETSOCKNAME:
2006 err =
2007 sys_getsockname(a0, (struct sockaddr __user *)a1,
2008 (int __user *)a[2]);
2009 break;
2010 case SYS_GETPEERNAME:
2011 err =
2012 sys_getpeername(a0, (struct sockaddr __user *)a1,
2013 (int __user *)a[2]);
2014 break;
2015 case SYS_SOCKETPAIR:
2016 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2017 break;
2018 case SYS_SEND:
2019 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2020 break;
2021 case SYS_SENDTO:
2022 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2023 (struct sockaddr __user *)a[4], a[5]);
2024 break;
2025 case SYS_RECV:
2026 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2027 break;
2028 case SYS_RECVFROM:
2029 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2030 (struct sockaddr __user *)a[4],
2031 (int __user *)a[5]);
2032 break;
2033 case SYS_SHUTDOWN:
2034 err = sys_shutdown(a0, a1);
2035 break;
2036 case SYS_SETSOCKOPT:
2037 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2038 break;
2039 case SYS_GETSOCKOPT:
2040 err =
2041 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2042 (int __user *)a[4]);
2043 break;
2044 case SYS_SENDMSG:
2045 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2046 break;
2047 case SYS_RECVMSG:
2048 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2049 break;
2050 default:
2051 err = -EINVAL;
2052 break;
1da177e4
LT
2053 }
2054 return err;
2055}
2056
89bddce5 2057#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2058
55737fda
SH
2059/**
2060 * sock_register - add a socket protocol handler
2061 * @ops: description of protocol
2062 *
1da177e4
LT
2063 * This function is called by a protocol handler that wants to
2064 * advertise its address family, and have it linked into the
55737fda
SH
2065 * socket interface. The value ops->family coresponds to the
2066 * socket system call protocol family.
1da177e4 2067 */
f0fd27d4 2068int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2069{
2070 int err;
2071
2072 if (ops->family >= NPROTO) {
89bddce5
SH
2073 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2074 NPROTO);
1da177e4
LT
2075 return -ENOBUFS;
2076 }
55737fda
SH
2077
2078 spin_lock(&net_family_lock);
2079 if (net_families[ops->family])
2080 err = -EEXIST;
2081 else {
89bddce5 2082 net_families[ops->family] = ops;
1da177e4
LT
2083 err = 0;
2084 }
55737fda
SH
2085 spin_unlock(&net_family_lock);
2086
89bddce5 2087 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2088 return err;
2089}
2090
55737fda
SH
2091/**
2092 * sock_unregister - remove a protocol handler
2093 * @family: protocol family to remove
2094 *
1da177e4
LT
2095 * This function is called by a protocol handler that wants to
2096 * remove its address family, and have it unlinked from the
55737fda
SH
2097 * new socket creation.
2098 *
2099 * If protocol handler is a module, then it can use module reference
2100 * counts to protect against new references. If protocol handler is not
2101 * a module then it needs to provide its own protection in
2102 * the ops->create routine.
1da177e4 2103 */
f0fd27d4 2104void sock_unregister(int family)
1da177e4 2105{
f0fd27d4 2106 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2107
55737fda 2108 spin_lock(&net_family_lock);
89bddce5 2109 net_families[family] = NULL;
55737fda
SH
2110 spin_unlock(&net_family_lock);
2111
2112 synchronize_rcu();
2113
89bddce5 2114 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2115}
2116
77d76ea3 2117static int __init sock_init(void)
1da177e4
LT
2118{
2119 /*
89bddce5 2120 * Initialize sock SLAB cache.
1da177e4 2121 */
89bddce5 2122
1da177e4
LT
2123 sk_init();
2124
1da177e4 2125 /*
89bddce5 2126 * Initialize skbuff SLAB cache
1da177e4
LT
2127 */
2128 skb_init();
1da177e4
LT
2129
2130 /*
89bddce5 2131 * Initialize the protocols module.
1da177e4
LT
2132 */
2133
2134 init_inodecache();
2135 register_filesystem(&sock_fs_type);
2136 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2137
2138 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2139 */
2140
2141#ifdef CONFIG_NETFILTER
2142 netfilter_init();
2143#endif
cbeb321a
DM
2144
2145 return 0;
1da177e4
LT
2146}
2147
77d76ea3
AK
2148core_initcall(sock_init); /* early initcall */
2149
1da177e4
LT
2150#ifdef CONFIG_PROC_FS
2151void socket_seq_show(struct seq_file *seq)
2152{
2153 int cpu;
2154 int counter = 0;
2155
6f912042 2156 for_each_possible_cpu(cpu)
89bddce5 2157 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2158
2159 /* It can be negative, by the way. 8) */
2160 if (counter < 0)
2161 counter = 0;
2162
2163 seq_printf(seq, "sockets: used %d\n", counter);
2164}
89bddce5 2165#endif /* CONFIG_PROC_FS */
1da177e4 2166
89bbfc95
SP
2167#ifdef CONFIG_COMPAT
2168static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2169 unsigned long arg)
89bbfc95
SP
2170{
2171 struct socket *sock = file->private_data;
2172 int ret = -ENOIOCTLCMD;
2173
2174 if (sock->ops->compat_ioctl)
2175 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2176
2177 return ret;
2178}
2179#endif
2180
ac5a488e
SS
2181int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2182{
2183 return sock->ops->bind(sock, addr, addrlen);
2184}
2185
2186int kernel_listen(struct socket *sock, int backlog)
2187{
2188 return sock->ops->listen(sock, backlog);
2189}
2190
2191int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2192{
2193 struct sock *sk = sock->sk;
2194 int err;
2195
2196 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2197 newsock);
2198 if (err < 0)
2199 goto done;
2200
2201 err = sock->ops->accept(sock, *newsock, flags);
2202 if (err < 0) {
2203 sock_release(*newsock);
2204 goto done;
2205 }
2206
2207 (*newsock)->ops = sock->ops;
2208
2209done:
2210 return err;
2211}
2212
2213int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2214 int flags)
ac5a488e
SS
2215{
2216 return sock->ops->connect(sock, addr, addrlen, flags);
2217}
2218
2219int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2220 int *addrlen)
2221{
2222 return sock->ops->getname(sock, addr, addrlen, 0);
2223}
2224
2225int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2226 int *addrlen)
2227{
2228 return sock->ops->getname(sock, addr, addrlen, 1);
2229}
2230
2231int kernel_getsockopt(struct socket *sock, int level, int optname,
2232 char *optval, int *optlen)
2233{
2234 mm_segment_t oldfs = get_fs();
2235 int err;
2236
2237 set_fs(KERNEL_DS);
2238 if (level == SOL_SOCKET)
2239 err = sock_getsockopt(sock, level, optname, optval, optlen);
2240 else
2241 err = sock->ops->getsockopt(sock, level, optname, optval,
2242 optlen);
2243 set_fs(oldfs);
2244 return err;
2245}
2246
2247int kernel_setsockopt(struct socket *sock, int level, int optname,
2248 char *optval, int optlen)
2249{
2250 mm_segment_t oldfs = get_fs();
2251 int err;
2252
2253 set_fs(KERNEL_DS);
2254 if (level == SOL_SOCKET)
2255 err = sock_setsockopt(sock, level, optname, optval, optlen);
2256 else
2257 err = sock->ops->setsockopt(sock, level, optname, optval,
2258 optlen);
2259 set_fs(oldfs);
2260 return err;
2261}
2262
2263int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2264 size_t size, int flags)
2265{
2266 if (sock->ops->sendpage)
2267 return sock->ops->sendpage(sock, page, offset, size, flags);
2268
2269 return sock_no_sendpage(sock, page, offset, size, flags);
2270}
2271
2272int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2273{
2274 mm_segment_t oldfs = get_fs();
2275 int err;
2276
2277 set_fs(KERNEL_DS);
2278 err = sock->ops->ioctl(sock, cmd, arg);
2279 set_fs(oldfs);
2280
2281 return err;
2282}
2283
1da177e4
LT
2284/* ABI emulation layers need these two */
2285EXPORT_SYMBOL(move_addr_to_kernel);
2286EXPORT_SYMBOL(move_addr_to_user);
2287EXPORT_SYMBOL(sock_create);
2288EXPORT_SYMBOL(sock_create_kern);
2289EXPORT_SYMBOL(sock_create_lite);
2290EXPORT_SYMBOL(sock_map_fd);
2291EXPORT_SYMBOL(sock_recvmsg);
2292EXPORT_SYMBOL(sock_register);
2293EXPORT_SYMBOL(sock_release);
2294EXPORT_SYMBOL(sock_sendmsg);
2295EXPORT_SYMBOL(sock_unregister);
2296EXPORT_SYMBOL(sock_wake_async);
2297EXPORT_SYMBOL(sockfd_lookup);
2298EXPORT_SYMBOL(kernel_sendmsg);
2299EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2300EXPORT_SYMBOL(kernel_bind);
2301EXPORT_SYMBOL(kernel_listen);
2302EXPORT_SYMBOL(kernel_accept);
2303EXPORT_SYMBOL(kernel_connect);
2304EXPORT_SYMBOL(kernel_getsockname);
2305EXPORT_SYMBOL(kernel_getpeername);
2306EXPORT_SYMBOL(kernel_getsockopt);
2307EXPORT_SYMBOL(kernel_setsockopt);
2308EXPORT_SYMBOL(kernel_sendpage);
2309EXPORT_SYMBOL(kernel_sock_ioctl);