[IPV4] ipconfig: Implement DHCP Class-identifier
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
115static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
116 struct pipe_inode_info *pipe, size_t len,
117 unsigned int flags);
1da177e4 118
1da177e4
LT
119/*
120 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
121 * in the operation structures but are done directly via the socketcall() multiplexor.
122 */
123
da7071d7 124static const struct file_operations socket_file_ops = {
1da177e4
LT
125 .owner = THIS_MODULE,
126 .llseek = no_llseek,
127 .aio_read = sock_aio_read,
128 .aio_write = sock_aio_write,
129 .poll = sock_poll,
130 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
131#ifdef CONFIG_COMPAT
132 .compat_ioctl = compat_sock_ioctl,
133#endif
1da177e4
LT
134 .mmap = sock_mmap,
135 .open = sock_no_open, /* special open code to disallow open via /proc */
136 .release = sock_close,
137 .fasync = sock_fasync,
5274f052
JA
138 .sendpage = sock_sendpage,
139 .splice_write = generic_splice_sendpage,
9c55e01c 140 .splice_read = sock_splice_read,
1da177e4
LT
141};
142
143/*
144 * The protocol list. Each protocol is registered in here.
145 */
146
1da177e4 147static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 148static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 149
1da177e4
LT
150/*
151 * Statistics counters of the socket lists
152 */
153
154static DEFINE_PER_CPU(int, sockets_in_use) = 0;
155
156/*
89bddce5
SH
157 * Support routines.
158 * Move socket addresses back and forth across the kernel/user
159 * divide and look after the messy bits.
1da177e4
LT
160 */
161
89bddce5 162#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
163 16 for IP, 16 for IPX,
164 24 for IPv6,
89bddce5 165 about 80 for AX.25
1da177e4
LT
166 must be at least one bigger than
167 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 168 :unix_mkname()).
1da177e4 169 */
89bddce5 170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
182int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
183{
89bddce5 184 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5
SH
209
210int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
211 int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
89bddce5
SH
216 err = get_user(len, ulen);
217 if (err)
1da177e4 218 return err;
89bddce5
SH
219 if (len > klen)
220 len = klen;
221 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 222 return -EINVAL;
89bddce5 223 if (len) {
d6fe3945
SG
224 if (audit_sockaddr(klen, kaddr))
225 return -ENOMEM;
89bddce5 226 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
227 return -EFAULT;
228 }
229 /*
89bddce5
SH
230 * "fromlen shall refer to the value before truncation.."
231 * 1003.1g
1da177e4
LT
232 */
233 return __put_user(klen, ulen);
234}
235
236#define SOCKFS_MAGIC 0x534F434B
237
e18b890b 238static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
239
240static struct inode *sock_alloc_inode(struct super_block *sb)
241{
242 struct socket_alloc *ei;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
247 init_waitqueue_head(&ei->socket.wait);
89bddce5 248
1da177e4
LT
249 ei->socket.fasync_list = NULL;
250 ei->socket.state = SS_UNCONNECTED;
251 ei->socket.flags = 0;
252 ei->socket.ops = NULL;
253 ei->socket.sk = NULL;
254 ei->socket.file = NULL;
1da177e4
LT
255
256 return &ei->vfs_inode;
257}
258
259static void sock_destroy_inode(struct inode *inode)
260{
261 kmem_cache_free(sock_inode_cachep,
262 container_of(inode, struct socket_alloc, vfs_inode));
263}
264
4ba9b9d0 265static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 266{
89bddce5 267 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 268
a35afb83 269 inode_init_once(&ei->vfs_inode);
1da177e4 270}
89bddce5 271
1da177e4
LT
272static int init_inodecache(void)
273{
274 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
275 sizeof(struct socket_alloc),
276 0,
277 (SLAB_HWCACHE_ALIGN |
278 SLAB_RECLAIM_ACCOUNT |
279 SLAB_MEM_SPREAD),
20c2df83 280 init_once);
1da177e4
LT
281 if (sock_inode_cachep == NULL)
282 return -ENOMEM;
283 return 0;
284}
285
286static struct super_operations sockfs_ops = {
287 .alloc_inode = sock_alloc_inode,
288 .destroy_inode =sock_destroy_inode,
289 .statfs = simple_statfs,
290};
291
454e2398 292static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
293 int flags, const char *dev_name, void *data,
294 struct vfsmount *mnt)
1da177e4 295{
454e2398
DH
296 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
297 mnt);
1da177e4
LT
298}
299
ba89966c 300static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
301
302static struct file_system_type sock_fs_type = {
303 .name = "sockfs",
304 .get_sb = sockfs_get_sb,
305 .kill_sb = kill_anon_super,
306};
89bddce5 307
1da177e4
LT
308static int sockfs_delete_dentry(struct dentry *dentry)
309{
304e61e6
ED
310 /*
311 * At creation time, we pretended this dentry was hashed
312 * (by clearing DCACHE_UNHASHED bit in d_flags)
313 * At delete time, we restore the truth : not hashed.
314 * (so that dput() can proceed correctly)
315 */
316 dentry->d_flags |= DCACHE_UNHASHED;
317 return 0;
1da177e4 318}
c23fbb6b
ED
319
320/*
321 * sockfs_dname() is called from d_path().
322 */
323static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
324{
325 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
326 dentry->d_inode->i_ino);
327}
328
1da177e4 329static struct dentry_operations sockfs_dentry_operations = {
89bddce5 330 .d_delete = sockfs_delete_dentry,
c23fbb6b 331 .d_dname = sockfs_dname,
1da177e4
LT
332};
333
334/*
335 * Obtains the first available file descriptor and sets it up for use.
336 *
39d8c1b6
DM
337 * These functions create file structures and maps them to fd space
338 * of the current process. On success it returns file descriptor
1da177e4
LT
339 * and file struct implicitly stored in sock->file.
340 * Note that another thread may close file descriptor before we return
341 * from this function. We use the fact that now we do not refer
342 * to socket after mapping. If one day we will need it, this
343 * function will increment ref. count on file by 1.
344 *
345 * In any case returned fd MAY BE not valid!
346 * This race condition is unavoidable
347 * with shared fd spaces, we cannot solve it inside kernel,
348 * but we take care of internal coherence yet.
349 */
350
39d8c1b6 351static int sock_alloc_fd(struct file **filep)
1da177e4
LT
352{
353 int fd;
1da177e4
LT
354
355 fd = get_unused_fd();
39d8c1b6 356 if (likely(fd >= 0)) {
1da177e4
LT
357 struct file *file = get_empty_filp();
358
39d8c1b6
DM
359 *filep = file;
360 if (unlikely(!file)) {
1da177e4 361 put_unused_fd(fd);
39d8c1b6 362 return -ENFILE;
1da177e4 363 }
39d8c1b6
DM
364 } else
365 *filep = NULL;
366 return fd;
367}
1da177e4 368
39d8c1b6
DM
369static int sock_attach_fd(struct socket *sock, struct file *file)
370{
ce8d2cdf 371 struct dentry *dentry;
c23fbb6b 372 struct qstr name = { .name = "" };
39d8c1b6 373
ce8d2cdf
DH
374 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
375 if (unlikely(!dentry))
39d8c1b6
DM
376 return -ENOMEM;
377
ce8d2cdf 378 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
379 /*
380 * We dont want to push this dentry into global dentry hash table.
381 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
382 * This permits a working /proc/$pid/fd/XXX on sockets
383 */
ce8d2cdf
DH
384 dentry->d_flags &= ~DCACHE_UNHASHED;
385 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
386
387 sock->file = file;
ce8d2cdf
DH
388 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
389 &socket_file_ops);
390 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
391 file->f_flags = O_RDWR;
392 file->f_pos = 0;
393 file->private_data = sock;
1da177e4 394
39d8c1b6
DM
395 return 0;
396}
397
398int sock_map_fd(struct socket *sock)
399{
400 struct file *newfile;
401 int fd = sock_alloc_fd(&newfile);
402
403 if (likely(fd >= 0)) {
404 int err = sock_attach_fd(sock, newfile);
405
406 if (unlikely(err < 0)) {
407 put_filp(newfile);
1da177e4 408 put_unused_fd(fd);
39d8c1b6 409 return err;
1da177e4 410 }
39d8c1b6 411 fd_install(fd, newfile);
1da177e4 412 }
1da177e4
LT
413 return fd;
414}
415
6cb153ca
BL
416static struct socket *sock_from_file(struct file *file, int *err)
417{
6cb153ca
BL
418 if (file->f_op == &socket_file_ops)
419 return file->private_data; /* set in sock_map_fd */
420
23bb80d2
ED
421 *err = -ENOTSOCK;
422 return NULL;
6cb153ca
BL
423}
424
1da177e4
LT
425/**
426 * sockfd_lookup - Go from a file number to its socket slot
427 * @fd: file handle
428 * @err: pointer to an error code return
429 *
430 * The file handle passed in is locked and the socket it is bound
431 * too is returned. If an error occurs the err pointer is overwritten
432 * with a negative errno code and NULL is returned. The function checks
433 * for both invalid handles and passing a handle which is not a socket.
434 *
435 * On a success the socket object pointer is returned.
436 */
437
438struct socket *sockfd_lookup(int fd, int *err)
439{
440 struct file *file;
1da177e4
LT
441 struct socket *sock;
442
89bddce5
SH
443 file = fget(fd);
444 if (!file) {
1da177e4
LT
445 *err = -EBADF;
446 return NULL;
447 }
89bddce5 448
6cb153ca
BL
449 sock = sock_from_file(file, err);
450 if (!sock)
1da177e4 451 fput(file);
6cb153ca
BL
452 return sock;
453}
1da177e4 454
6cb153ca
BL
455static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
456{
457 struct file *file;
458 struct socket *sock;
459
3672558c 460 *err = -EBADF;
6cb153ca
BL
461 file = fget_light(fd, fput_needed);
462 if (file) {
463 sock = sock_from_file(file, err);
464 if (sock)
465 return sock;
466 fput_light(file, *fput_needed);
1da177e4 467 }
6cb153ca 468 return NULL;
1da177e4
LT
469}
470
471/**
472 * sock_alloc - allocate a socket
89bddce5 473 *
1da177e4
LT
474 * Allocate a new inode and socket object. The two are bound together
475 * and initialised. The socket is then returned. If we are out of inodes
476 * NULL is returned.
477 */
478
479static struct socket *sock_alloc(void)
480{
89bddce5
SH
481 struct inode *inode;
482 struct socket *sock;
1da177e4
LT
483
484 inode = new_inode(sock_mnt->mnt_sb);
485 if (!inode)
486 return NULL;
487
488 sock = SOCKET_I(inode);
489
89bddce5 490 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
491 inode->i_uid = current->fsuid;
492 inode->i_gid = current->fsgid;
493
494 get_cpu_var(sockets_in_use)++;
495 put_cpu_var(sockets_in_use);
496 return sock;
497}
498
499/*
500 * In theory you can't get an open on this inode, but /proc provides
501 * a back door. Remember to keep it shut otherwise you'll let the
502 * creepy crawlies in.
503 */
89bddce5 504
1da177e4
LT
505static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
506{
507 return -ENXIO;
508}
509
4b6f5d20 510const struct file_operations bad_sock_fops = {
1da177e4
LT
511 .owner = THIS_MODULE,
512 .open = sock_no_open,
513};
514
515/**
516 * sock_release - close a socket
517 * @sock: socket to close
518 *
519 * The socket is released from the protocol stack if it has a release
520 * callback, and the inode is then released if the socket is bound to
89bddce5 521 * an inode not a file.
1da177e4 522 */
89bddce5 523
1da177e4
LT
524void sock_release(struct socket *sock)
525{
526 if (sock->ops) {
527 struct module *owner = sock->ops->owner;
528
529 sock->ops->release(sock);
530 sock->ops = NULL;
531 module_put(owner);
532 }
533
534 if (sock->fasync_list)
535 printk(KERN_ERR "sock_release: fasync list not empty!\n");
536
537 get_cpu_var(sockets_in_use)--;
538 put_cpu_var(sockets_in_use);
539 if (!sock->file) {
540 iput(SOCK_INODE(sock));
541 return;
542 }
89bddce5 543 sock->file = NULL;
1da177e4
LT
544}
545
89bddce5 546static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
547 struct msghdr *msg, size_t size)
548{
549 struct sock_iocb *si = kiocb_to_siocb(iocb);
550 int err;
551
552 si->sock = sock;
553 si->scm = NULL;
554 si->msg = msg;
555 si->size = size;
556
557 err = security_socket_sendmsg(sock, msg, size);
558 if (err)
559 return err;
560
561 return sock->ops->sendmsg(iocb, sock, msg, size);
562}
563
564int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
565{
566 struct kiocb iocb;
567 struct sock_iocb siocb;
568 int ret;
569
570 init_sync_kiocb(&iocb, NULL);
571 iocb.private = &siocb;
572 ret = __sock_sendmsg(&iocb, sock, msg, size);
573 if (-EIOCBQUEUED == ret)
574 ret = wait_on_sync_kiocb(&iocb);
575 return ret;
576}
577
578int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
579 struct kvec *vec, size_t num, size_t size)
580{
581 mm_segment_t oldfs = get_fs();
582 int result;
583
584 set_fs(KERNEL_DS);
585 /*
586 * the following is safe, since for compiler definitions of kvec and
587 * iovec are identical, yielding the same in-core layout and alignment
588 */
89bddce5 589 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
590 msg->msg_iovlen = num;
591 result = sock_sendmsg(sock, msg, size);
592 set_fs(oldfs);
593 return result;
594}
595
92f37fd2
ED
596/*
597 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
598 */
599void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
600 struct sk_buff *skb)
601{
602 ktime_t kt = skb->tstamp;
603
604 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
605 struct timeval tv;
606 /* Race occurred between timestamp enabling and packet
607 receiving. Fill in the current time for now. */
608 if (kt.tv64 == 0)
609 kt = ktime_get_real();
610 skb->tstamp = kt;
611 tv = ktime_to_timeval(kt);
612 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
613 } else {
614 struct timespec ts;
615 /* Race occurred between timestamp enabling and packet
616 receiving. Fill in the current time for now. */
617 if (kt.tv64 == 0)
618 kt = ktime_get_real();
619 skb->tstamp = kt;
620 ts = ktime_to_timespec(kt);
621 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
622 }
623}
624
7c81fd8b
ACM
625EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
626
89bddce5 627static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
628 struct msghdr *msg, size_t size, int flags)
629{
630 int err;
631 struct sock_iocb *si = kiocb_to_siocb(iocb);
632
633 si->sock = sock;
634 si->scm = NULL;
635 si->msg = msg;
636 si->size = size;
637 si->flags = flags;
638
639 err = security_socket_recvmsg(sock, msg, size, flags);
640 if (err)
641 return err;
642
643 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
644}
645
89bddce5 646int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
647 size_t size, int flags)
648{
649 struct kiocb iocb;
650 struct sock_iocb siocb;
651 int ret;
652
89bddce5 653 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
654 iocb.private = &siocb;
655 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
656 if (-EIOCBQUEUED == ret)
657 ret = wait_on_sync_kiocb(&iocb);
658 return ret;
659}
660
89bddce5
SH
661int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
662 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
663{
664 mm_segment_t oldfs = get_fs();
665 int result;
666
667 set_fs(KERNEL_DS);
668 /*
669 * the following is safe, since for compiler definitions of kvec and
670 * iovec are identical, yielding the same in-core layout and alignment
671 */
89bddce5 672 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
673 result = sock_recvmsg(sock, msg, size, flags);
674 set_fs(oldfs);
675 return result;
676}
677
678static void sock_aio_dtor(struct kiocb *iocb)
679{
680 kfree(iocb->private);
681}
682
ce1d4d3e
CH
683static ssize_t sock_sendpage(struct file *file, struct page *page,
684 int offset, size_t size, loff_t *ppos, int more)
1da177e4 685{
1da177e4
LT
686 struct socket *sock;
687 int flags;
688
ce1d4d3e
CH
689 sock = file->private_data;
690
691 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
692 if (more)
693 flags |= MSG_MORE;
694
695 return sock->ops->sendpage(sock, page, offset, size, flags);
696}
1da177e4 697
9c55e01c
JA
698static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
699 struct pipe_inode_info *pipe, size_t len,
700 unsigned int flags)
701{
702 struct socket *sock = file->private_data;
703
704 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
705}
706
ce1d4d3e 707static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 708 struct sock_iocb *siocb)
ce1d4d3e
CH
709{
710 if (!is_sync_kiocb(iocb)) {
711 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
712 if (!siocb)
713 return NULL;
1da177e4
LT
714 iocb->ki_dtor = sock_aio_dtor;
715 }
1da177e4 716
ce1d4d3e 717 siocb->kiocb = iocb;
ce1d4d3e
CH
718 iocb->private = siocb;
719 return siocb;
1da177e4
LT
720}
721
ce1d4d3e 722static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
723 struct file *file, const struct iovec *iov,
724 unsigned long nr_segs)
ce1d4d3e
CH
725{
726 struct socket *sock = file->private_data;
727 size_t size = 0;
728 int i;
1da177e4 729
89bddce5
SH
730 for (i = 0; i < nr_segs; i++)
731 size += iov[i].iov_len;
1da177e4 732
ce1d4d3e
CH
733 msg->msg_name = NULL;
734 msg->msg_namelen = 0;
735 msg->msg_control = NULL;
736 msg->msg_controllen = 0;
89bddce5 737 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
738 msg->msg_iovlen = nr_segs;
739 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
740
741 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
742}
743
027445c3
BP
744static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
745 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
746{
747 struct sock_iocb siocb, *x;
748
1da177e4
LT
749 if (pos != 0)
750 return -ESPIPE;
027445c3
BP
751
752 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
753 return 0;
754
027445c3
BP
755
756 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
757 if (!x)
758 return -ENOMEM;
027445c3 759 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
760}
761
ce1d4d3e 762static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
763 struct file *file, const struct iovec *iov,
764 unsigned long nr_segs)
1da177e4 765{
ce1d4d3e
CH
766 struct socket *sock = file->private_data;
767 size_t size = 0;
768 int i;
1da177e4 769
89bddce5
SH
770 for (i = 0; i < nr_segs; i++)
771 size += iov[i].iov_len;
1da177e4 772
ce1d4d3e
CH
773 msg->msg_name = NULL;
774 msg->msg_namelen = 0;
775 msg->msg_control = NULL;
776 msg->msg_controllen = 0;
89bddce5 777 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
778 msg->msg_iovlen = nr_segs;
779 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
780 if (sock->type == SOCK_SEQPACKET)
781 msg->msg_flags |= MSG_EOR;
1da177e4 782
ce1d4d3e 783 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
784}
785
027445c3
BP
786static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
787 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
788{
789 struct sock_iocb siocb, *x;
1da177e4 790
ce1d4d3e
CH
791 if (pos != 0)
792 return -ESPIPE;
027445c3 793
027445c3 794 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
795 if (!x)
796 return -ENOMEM;
1da177e4 797
027445c3 798 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
799}
800
1da177e4
LT
801/*
802 * Atomic setting of ioctl hooks to avoid race
803 * with module unload.
804 */
805
4a3e2f71 806static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 807static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 808
881d966b 809void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 810{
4a3e2f71 811 mutex_lock(&br_ioctl_mutex);
1da177e4 812 br_ioctl_hook = hook;
4a3e2f71 813 mutex_unlock(&br_ioctl_mutex);
1da177e4 814}
89bddce5 815
1da177e4
LT
816EXPORT_SYMBOL(brioctl_set);
817
4a3e2f71 818static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 819static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 820
881d966b 821void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 822{
4a3e2f71 823 mutex_lock(&vlan_ioctl_mutex);
1da177e4 824 vlan_ioctl_hook = hook;
4a3e2f71 825 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 826}
89bddce5 827
1da177e4
LT
828EXPORT_SYMBOL(vlan_ioctl_set);
829
4a3e2f71 830static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 831static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 832
89bddce5 833void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 834{
4a3e2f71 835 mutex_lock(&dlci_ioctl_mutex);
1da177e4 836 dlci_ioctl_hook = hook;
4a3e2f71 837 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 838}
89bddce5 839
1da177e4
LT
840EXPORT_SYMBOL(dlci_ioctl_set);
841
842/*
843 * With an ioctl, arg may well be a user mode pointer, but we don't know
844 * what to do with it - that's up to the protocol still.
845 */
846
847static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
848{
849 struct socket *sock;
881d966b 850 struct sock *sk;
1da177e4
LT
851 void __user *argp = (void __user *)arg;
852 int pid, err;
881d966b 853 struct net *net;
1da177e4 854
b69aee04 855 sock = file->private_data;
881d966b
EB
856 sk = sock->sk;
857 net = sk->sk_net;
1da177e4 858 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 859 err = dev_ioctl(net, cmd, argp);
1da177e4 860 } else
d86b5e0e 861#ifdef CONFIG_WIRELESS_EXT
1da177e4 862 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 863 err = dev_ioctl(net, cmd, argp);
1da177e4 864 } else
89bddce5
SH
865#endif /* CONFIG_WIRELESS_EXT */
866 switch (cmd) {
1da177e4
LT
867 case FIOSETOWN:
868 case SIOCSPGRP:
869 err = -EFAULT;
870 if (get_user(pid, (int __user *)argp))
871 break;
872 err = f_setown(sock->file, pid, 1);
873 break;
874 case FIOGETOWN:
875 case SIOCGPGRP:
609d7fa9 876 err = put_user(f_getown(sock->file),
89bddce5 877 (int __user *)argp);
1da177e4
LT
878 break;
879 case SIOCGIFBR:
880 case SIOCSIFBR:
881 case SIOCBRADDBR:
882 case SIOCBRDELBR:
883 err = -ENOPKG;
884 if (!br_ioctl_hook)
885 request_module("bridge");
886
4a3e2f71 887 mutex_lock(&br_ioctl_mutex);
89bddce5 888 if (br_ioctl_hook)
881d966b 889 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 890 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
891 break;
892 case SIOCGIFVLAN:
893 case SIOCSIFVLAN:
894 err = -ENOPKG;
895 if (!vlan_ioctl_hook)
896 request_module("8021q");
897
4a3e2f71 898 mutex_lock(&vlan_ioctl_mutex);
1da177e4 899 if (vlan_ioctl_hook)
881d966b 900 err = vlan_ioctl_hook(net, argp);
4a3e2f71 901 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 902 break;
1da177e4
LT
903 case SIOCADDDLCI:
904 case SIOCDELDLCI:
905 err = -ENOPKG;
906 if (!dlci_ioctl_hook)
907 request_module("dlci");
908
909 if (dlci_ioctl_hook) {
4a3e2f71 910 mutex_lock(&dlci_ioctl_mutex);
1da177e4 911 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 912 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
913 }
914 break;
915 default:
916 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
917
918 /*
919 * If this ioctl is unknown try to hand it down
920 * to the NIC driver.
921 */
922 if (err == -ENOIOCTLCMD)
881d966b 923 err = dev_ioctl(net, cmd, argp);
1da177e4 924 break;
89bddce5 925 }
1da177e4
LT
926 return err;
927}
928
929int sock_create_lite(int family, int type, int protocol, struct socket **res)
930{
931 int err;
932 struct socket *sock = NULL;
89bddce5 933
1da177e4
LT
934 err = security_socket_create(family, type, protocol, 1);
935 if (err)
936 goto out;
937
938 sock = sock_alloc();
939 if (!sock) {
940 err = -ENOMEM;
941 goto out;
942 }
943
1da177e4 944 sock->type = type;
7420ed23
VY
945 err = security_socket_post_create(sock, family, type, protocol, 1);
946 if (err)
947 goto out_release;
948
1da177e4
LT
949out:
950 *res = sock;
951 return err;
7420ed23
VY
952out_release:
953 sock_release(sock);
954 sock = NULL;
955 goto out;
1da177e4
LT
956}
957
958/* No kernel lock held - perfect */
89bddce5 959static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
960{
961 struct socket *sock;
962
963 /*
89bddce5 964 * We can't return errors to poll, so it's either yes or no.
1da177e4 965 */
b69aee04 966 sock = file->private_data;
1da177e4
LT
967 return sock->ops->poll(file, sock, wait);
968}
969
89bddce5 970static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 971{
b69aee04 972 struct socket *sock = file->private_data;
1da177e4
LT
973
974 return sock->ops->mmap(file, sock, vma);
975}
976
20380731 977static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
978{
979 /*
89bddce5
SH
980 * It was possible the inode is NULL we were
981 * closing an unfinished socket.
1da177e4
LT
982 */
983
89bddce5 984 if (!inode) {
1da177e4
LT
985 printk(KERN_DEBUG "sock_close: NULL inode\n");
986 return 0;
987 }
988 sock_fasync(-1, filp, 0);
989 sock_release(SOCKET_I(inode));
990 return 0;
991}
992
993/*
994 * Update the socket async list
995 *
996 * Fasync_list locking strategy.
997 *
998 * 1. fasync_list is modified only under process context socket lock
999 * i.e. under semaphore.
1000 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1001 * or under socket lock.
1002 * 3. fasync_list can be used from softirq context, so that
1003 * modification under socket lock have to be enhanced with
1004 * write_lock_bh(&sk->sk_callback_lock).
1005 * --ANK (990710)
1006 */
1007
1008static int sock_fasync(int fd, struct file *filp, int on)
1009{
89bddce5 1010 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1011 struct socket *sock;
1012 struct sock *sk;
1013
89bddce5 1014 if (on) {
8b3a7005 1015 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1016 if (fna == NULL)
1da177e4
LT
1017 return -ENOMEM;
1018 }
1019
b69aee04 1020 sock = filp->private_data;
1da177e4 1021
89bddce5
SH
1022 sk = sock->sk;
1023 if (sk == NULL) {
1da177e4
LT
1024 kfree(fna);
1025 return -EINVAL;
1026 }
1027
1028 lock_sock(sk);
1029
89bddce5 1030 prev = &(sock->fasync_list);
1da177e4 1031
89bddce5
SH
1032 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1033 if (fa->fa_file == filp)
1da177e4
LT
1034 break;
1035
89bddce5
SH
1036 if (on) {
1037 if (fa != NULL) {
1da177e4 1038 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1039 fa->fa_fd = fd;
1da177e4
LT
1040 write_unlock_bh(&sk->sk_callback_lock);
1041
1042 kfree(fna);
1043 goto out;
1044 }
89bddce5
SH
1045 fna->fa_file = filp;
1046 fna->fa_fd = fd;
1047 fna->magic = FASYNC_MAGIC;
1048 fna->fa_next = sock->fasync_list;
1da177e4 1049 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1050 sock->fasync_list = fna;
1da177e4 1051 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1052 } else {
1053 if (fa != NULL) {
1da177e4 1054 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1055 *prev = fa->fa_next;
1da177e4
LT
1056 write_unlock_bh(&sk->sk_callback_lock);
1057 kfree(fa);
1058 }
1059 }
1060
1061out:
1062 release_sock(sock->sk);
1063 return 0;
1064}
1065
1066/* This function may be called only under socket lock or callback_lock */
1067
1068int sock_wake_async(struct socket *sock, int how, int band)
1069{
1070 if (!sock || !sock->fasync_list)
1071 return -1;
89bddce5 1072 switch (how) {
1da177e4 1073 case 1:
89bddce5 1074
1da177e4
LT
1075 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1076 break;
1077 goto call_kill;
1078 case 2:
1079 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1080 break;
1081 /* fall through */
1082 case 0:
89bddce5 1083call_kill:
1da177e4
LT
1084 __kill_fasync(sock->fasync_list, SIGIO, band);
1085 break;
1086 case 3:
1087 __kill_fasync(sock->fasync_list, SIGURG, band);
1088 }
1089 return 0;
1090}
1091
1b8d7ae4 1092static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1093 struct socket **res, int kern)
1da177e4
LT
1094{
1095 int err;
1096 struct socket *sock;
55737fda 1097 const struct net_proto_family *pf;
1da177e4
LT
1098
1099 /*
89bddce5 1100 * Check protocol is in range
1da177e4
LT
1101 */
1102 if (family < 0 || family >= NPROTO)
1103 return -EAFNOSUPPORT;
1104 if (type < 0 || type >= SOCK_MAX)
1105 return -EINVAL;
1106
1107 /* Compatibility.
1108
1109 This uglymoron is moved from INET layer to here to avoid
1110 deadlock in module load.
1111 */
1112 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1113 static int warned;
1da177e4
LT
1114 if (!warned) {
1115 warned = 1;
89bddce5
SH
1116 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1117 current->comm);
1da177e4
LT
1118 }
1119 family = PF_PACKET;
1120 }
1121
1122 err = security_socket_create(family, type, protocol, kern);
1123 if (err)
1124 return err;
89bddce5 1125
55737fda
SH
1126 /*
1127 * Allocate the socket and allow the family to set things up. if
1128 * the protocol is 0, the family is instructed to select an appropriate
1129 * default.
1130 */
1131 sock = sock_alloc();
1132 if (!sock) {
1133 if (net_ratelimit())
1134 printk(KERN_WARNING "socket: no more sockets\n");
1135 return -ENFILE; /* Not exactly a match, but its the
1136 closest posix thing */
1137 }
1138
1139 sock->type = type;
1140
1da177e4 1141#if defined(CONFIG_KMOD)
89bddce5
SH
1142 /* Attempt to load a protocol module if the find failed.
1143 *
1144 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1145 * requested real, full-featured networking support upon configuration.
1146 * Otherwise module support will break!
1147 */
55737fda 1148 if (net_families[family] == NULL)
89bddce5 1149 request_module("net-pf-%d", family);
1da177e4
LT
1150#endif
1151
55737fda
SH
1152 rcu_read_lock();
1153 pf = rcu_dereference(net_families[family]);
1154 err = -EAFNOSUPPORT;
1155 if (!pf)
1156 goto out_release;
1da177e4
LT
1157
1158 /*
1159 * We will call the ->create function, that possibly is in a loadable
1160 * module, so we have to bump that loadable module refcnt first.
1161 */
55737fda 1162 if (!try_module_get(pf->owner))
1da177e4
LT
1163 goto out_release;
1164
55737fda
SH
1165 /* Now protected by module ref count */
1166 rcu_read_unlock();
1167
1b8d7ae4 1168 err = pf->create(net, sock, protocol);
55737fda 1169 if (err < 0)
1da177e4 1170 goto out_module_put;
a79af59e 1171
1da177e4
LT
1172 /*
1173 * Now to bump the refcnt of the [loadable] module that owns this
1174 * socket at sock_release time we decrement its refcnt.
1175 */
55737fda
SH
1176 if (!try_module_get(sock->ops->owner))
1177 goto out_module_busy;
1178
1da177e4
LT
1179 /*
1180 * Now that we're done with the ->create function, the [loadable]
1181 * module can have its refcnt decremented
1182 */
55737fda 1183 module_put(pf->owner);
7420ed23
VY
1184 err = security_socket_post_create(sock, family, type, protocol, kern);
1185 if (err)
3b185525 1186 goto out_sock_release;
55737fda 1187 *res = sock;
1da177e4 1188
55737fda
SH
1189 return 0;
1190
1191out_module_busy:
1192 err = -EAFNOSUPPORT;
1da177e4 1193out_module_put:
55737fda
SH
1194 sock->ops = NULL;
1195 module_put(pf->owner);
1196out_sock_release:
1da177e4 1197 sock_release(sock);
55737fda
SH
1198 return err;
1199
1200out_release:
1201 rcu_read_unlock();
1202 goto out_sock_release;
1da177e4
LT
1203}
1204
1205int sock_create(int family, int type, int protocol, struct socket **res)
1206{
1b8d7ae4 1207 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1208}
1209
1210int sock_create_kern(int family, int type, int protocol, struct socket **res)
1211{
1b8d7ae4 1212 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1213}
1214
1215asmlinkage long sys_socket(int family, int type, int protocol)
1216{
1217 int retval;
1218 struct socket *sock;
1219
1220 retval = sock_create(family, type, protocol, &sock);
1221 if (retval < 0)
1222 goto out;
1223
1224 retval = sock_map_fd(sock);
1225 if (retval < 0)
1226 goto out_release;
1227
1228out:
1229 /* It may be already another descriptor 8) Not kernel problem. */
1230 return retval;
1231
1232out_release:
1233 sock_release(sock);
1234 return retval;
1235}
1236
1237/*
1238 * Create a pair of connected sockets.
1239 */
1240
89bddce5
SH
1241asmlinkage long sys_socketpair(int family, int type, int protocol,
1242 int __user *usockvec)
1da177e4
LT
1243{
1244 struct socket *sock1, *sock2;
1245 int fd1, fd2, err;
db349509 1246 struct file *newfile1, *newfile2;
1da177e4
LT
1247
1248 /*
1249 * Obtain the first socket and check if the underlying protocol
1250 * supports the socketpair call.
1251 */
1252
1253 err = sock_create(family, type, protocol, &sock1);
1254 if (err < 0)
1255 goto out;
1256
1257 err = sock_create(family, type, protocol, &sock2);
1258 if (err < 0)
1259 goto out_release_1;
1260
1261 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1262 if (err < 0)
1da177e4
LT
1263 goto out_release_both;
1264
db349509 1265 fd1 = sock_alloc_fd(&newfile1);
bf3c23d1
DM
1266 if (unlikely(fd1 < 0)) {
1267 err = fd1;
db349509 1268 goto out_release_both;
bf3c23d1 1269 }
1da177e4 1270
db349509
AV
1271 fd2 = sock_alloc_fd(&newfile2);
1272 if (unlikely(fd2 < 0)) {
bf3c23d1 1273 err = fd2;
db349509
AV
1274 put_filp(newfile1);
1275 put_unused_fd(fd1);
1da177e4 1276 goto out_release_both;
db349509 1277 }
1da177e4 1278
db349509
AV
1279 err = sock_attach_fd(sock1, newfile1);
1280 if (unlikely(err < 0)) {
1281 goto out_fd2;
1282 }
1283
1284 err = sock_attach_fd(sock2, newfile2);
1285 if (unlikely(err < 0)) {
1286 fput(newfile1);
1287 goto out_fd1;
1288 }
1289
1290 err = audit_fd_pair(fd1, fd2);
1291 if (err < 0) {
1292 fput(newfile1);
1293 fput(newfile2);
1294 goto out_fd;
1295 }
1da177e4 1296
db349509
AV
1297 fd_install(fd1, newfile1);
1298 fd_install(fd2, newfile2);
1da177e4
LT
1299 /* fd1 and fd2 may be already another descriptors.
1300 * Not kernel problem.
1301 */
1302
89bddce5 1303 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1304 if (!err)
1305 err = put_user(fd2, &usockvec[1]);
1306 if (!err)
1307 return 0;
1308
1309 sys_close(fd2);
1310 sys_close(fd1);
1311 return err;
1312
1da177e4 1313out_release_both:
89bddce5 1314 sock_release(sock2);
1da177e4 1315out_release_1:
89bddce5 1316 sock_release(sock1);
1da177e4
LT
1317out:
1318 return err;
db349509
AV
1319
1320out_fd2:
1321 put_filp(newfile1);
1322 sock_release(sock1);
1323out_fd1:
1324 put_filp(newfile2);
1325 sock_release(sock2);
1326out_fd:
1327 put_unused_fd(fd1);
1328 put_unused_fd(fd2);
1329 goto out;
1da177e4
LT
1330}
1331
1da177e4
LT
1332/*
1333 * Bind a name to a socket. Nothing much to do here since it's
1334 * the protocol's responsibility to handle the local address.
1335 *
1336 * We move the socket address to kernel space before we call
1337 * the protocol layer (having also checked the address is ok).
1338 */
1339
1340asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1341{
1342 struct socket *sock;
1343 char address[MAX_SOCK_ADDR];
6cb153ca 1344 int err, fput_needed;
1da177e4 1345
89bddce5 1346 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1347 if (sock) {
89bddce5
SH
1348 err = move_addr_to_kernel(umyaddr, addrlen, address);
1349 if (err >= 0) {
1350 err = security_socket_bind(sock,
1351 (struct sockaddr *)address,
1352 addrlen);
6cb153ca
BL
1353 if (!err)
1354 err = sock->ops->bind(sock,
89bddce5
SH
1355 (struct sockaddr *)
1356 address, addrlen);
1da177e4 1357 }
6cb153ca 1358 fput_light(sock->file, fput_needed);
89bddce5 1359 }
1da177e4
LT
1360 return err;
1361}
1362
1da177e4
LT
1363/*
1364 * Perform a listen. Basically, we allow the protocol to do anything
1365 * necessary for a listen, and if that works, we mark the socket as
1366 * ready for listening.
1367 */
1368
7a42c217 1369int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1370
1371asmlinkage long sys_listen(int fd, int backlog)
1372{
1373 struct socket *sock;
6cb153ca 1374 int err, fput_needed;
89bddce5
SH
1375
1376 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1377 if (sock) {
1378 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1379 backlog = sysctl_somaxconn;
1380
1381 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1382 if (!err)
1383 err = sock->ops->listen(sock, backlog);
1da177e4 1384
6cb153ca 1385 fput_light(sock->file, fput_needed);
1da177e4
LT
1386 }
1387 return err;
1388}
1389
1da177e4
LT
1390/*
1391 * For accept, we attempt to create a new socket, set up the link
1392 * with the client, wake up the client, then return the new
1393 * connected fd. We collect the address of the connector in kernel
1394 * space and move it to user at the very end. This is unclean because
1395 * we open the socket then return an error.
1396 *
1397 * 1003.1g adds the ability to recvmsg() to query connection pending
1398 * status to recvmsg. We need to add that support in a way thats
1399 * clean when we restucture accept also.
1400 */
1401
89bddce5
SH
1402asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1403 int __user *upeer_addrlen)
1da177e4
LT
1404{
1405 struct socket *sock, *newsock;
39d8c1b6 1406 struct file *newfile;
6cb153ca 1407 int err, len, newfd, fput_needed;
1da177e4
LT
1408 char address[MAX_SOCK_ADDR];
1409
6cb153ca 1410 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1411 if (!sock)
1412 goto out;
1413
1414 err = -ENFILE;
89bddce5 1415 if (!(newsock = sock_alloc()))
1da177e4
LT
1416 goto out_put;
1417
1418 newsock->type = sock->type;
1419 newsock->ops = sock->ops;
1420
1da177e4
LT
1421 /*
1422 * We don't need try_module_get here, as the listening socket (sock)
1423 * has the protocol module (sock->ops->owner) held.
1424 */
1425 __module_get(newsock->ops->owner);
1426
39d8c1b6
DM
1427 newfd = sock_alloc_fd(&newfile);
1428 if (unlikely(newfd < 0)) {
1429 err = newfd;
9a1875e6
DM
1430 sock_release(newsock);
1431 goto out_put;
39d8c1b6
DM
1432 }
1433
1434 err = sock_attach_fd(newsock, newfile);
1435 if (err < 0)
79f4f642 1436 goto out_fd_simple;
39d8c1b6 1437
a79af59e
FF
1438 err = security_socket_accept(sock, newsock);
1439 if (err)
39d8c1b6 1440 goto out_fd;
a79af59e 1441
1da177e4
LT
1442 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1443 if (err < 0)
39d8c1b6 1444 goto out_fd;
1da177e4
LT
1445
1446 if (upeer_sockaddr) {
89bddce5
SH
1447 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1448 &len, 2) < 0) {
1da177e4 1449 err = -ECONNABORTED;
39d8c1b6 1450 goto out_fd;
1da177e4 1451 }
89bddce5
SH
1452 err = move_addr_to_user(address, len, upeer_sockaddr,
1453 upeer_addrlen);
1da177e4 1454 if (err < 0)
39d8c1b6 1455 goto out_fd;
1da177e4
LT
1456 }
1457
1458 /* File flags are not inherited via accept() unlike another OSes. */
1459
39d8c1b6
DM
1460 fd_install(newfd, newfile);
1461 err = newfd;
1da177e4
LT
1462
1463 security_socket_post_accept(sock, newsock);
1464
1465out_put:
6cb153ca 1466 fput_light(sock->file, fput_needed);
1da177e4
LT
1467out:
1468 return err;
79f4f642
AD
1469out_fd_simple:
1470 sock_release(newsock);
1471 put_filp(newfile);
1472 put_unused_fd(newfd);
1473 goto out_put;
39d8c1b6 1474out_fd:
9606a216 1475 fput(newfile);
39d8c1b6 1476 put_unused_fd(newfd);
1da177e4
LT
1477 goto out_put;
1478}
1479
1da177e4
LT
1480/*
1481 * Attempt to connect to a socket with the server address. The address
1482 * is in user space so we verify it is OK and move it to kernel space.
1483 *
1484 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1485 * break bindings
1486 *
1487 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1488 * other SEQPACKET protocols that take time to connect() as it doesn't
1489 * include the -EINPROGRESS status for such sockets.
1490 */
1491
89bddce5
SH
1492asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1493 int addrlen)
1da177e4
LT
1494{
1495 struct socket *sock;
1496 char address[MAX_SOCK_ADDR];
6cb153ca 1497 int err, fput_needed;
1da177e4 1498
6cb153ca 1499 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1500 if (!sock)
1501 goto out;
1502 err = move_addr_to_kernel(uservaddr, addrlen, address);
1503 if (err < 0)
1504 goto out_put;
1505
89bddce5
SH
1506 err =
1507 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1508 if (err)
1509 goto out_put;
1510
89bddce5 1511 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1512 sock->file->f_flags);
1513out_put:
6cb153ca 1514 fput_light(sock->file, fput_needed);
1da177e4
LT
1515out:
1516 return err;
1517}
1518
1519/*
1520 * Get the local address ('name') of a socket object. Move the obtained
1521 * name to user space.
1522 */
1523
89bddce5
SH
1524asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1525 int __user *usockaddr_len)
1da177e4
LT
1526{
1527 struct socket *sock;
1528 char address[MAX_SOCK_ADDR];
6cb153ca 1529 int len, err, fput_needed;
89bddce5 1530
6cb153ca 1531 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1532 if (!sock)
1533 goto out;
1534
1535 err = security_socket_getsockname(sock);
1536 if (err)
1537 goto out_put;
1538
1539 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1540 if (err)
1541 goto out_put;
1542 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1543
1544out_put:
6cb153ca 1545 fput_light(sock->file, fput_needed);
1da177e4
LT
1546out:
1547 return err;
1548}
1549
1550/*
1551 * Get the remote address ('name') of a socket object. Move the obtained
1552 * name to user space.
1553 */
1554
89bddce5
SH
1555asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1556 int __user *usockaddr_len)
1da177e4
LT
1557{
1558 struct socket *sock;
1559 char address[MAX_SOCK_ADDR];
6cb153ca 1560 int len, err, fput_needed;
1da177e4 1561
89bddce5
SH
1562 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1563 if (sock != NULL) {
1da177e4
LT
1564 err = security_socket_getpeername(sock);
1565 if (err) {
6cb153ca 1566 fput_light(sock->file, fput_needed);
1da177e4
LT
1567 return err;
1568 }
1569
89bddce5
SH
1570 err =
1571 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1572 1);
1da177e4 1573 if (!err)
89bddce5
SH
1574 err = move_addr_to_user(address, len, usockaddr,
1575 usockaddr_len);
6cb153ca 1576 fput_light(sock->file, fput_needed);
1da177e4
LT
1577 }
1578 return err;
1579}
1580
1581/*
1582 * Send a datagram to a given address. We move the address into kernel
1583 * space and check the user space data area is readable before invoking
1584 * the protocol.
1585 */
1586
89bddce5
SH
1587asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1588 unsigned flags, struct sockaddr __user *addr,
1589 int addr_len)
1da177e4
LT
1590{
1591 struct socket *sock;
1592 char address[MAX_SOCK_ADDR];
1593 int err;
1594 struct msghdr msg;
1595 struct iovec iov;
6cb153ca
BL
1596 int fput_needed;
1597 struct file *sock_file;
1598
1599 sock_file = fget_light(fd, &fput_needed);
4387ff75 1600 err = -EBADF;
6cb153ca 1601 if (!sock_file)
4387ff75 1602 goto out;
6cb153ca
BL
1603
1604 sock = sock_from_file(sock_file, &err);
1da177e4 1605 if (!sock)
6cb153ca 1606 goto out_put;
89bddce5
SH
1607 iov.iov_base = buff;
1608 iov.iov_len = len;
1609 msg.msg_name = NULL;
1610 msg.msg_iov = &iov;
1611 msg.msg_iovlen = 1;
1612 msg.msg_control = NULL;
1613 msg.msg_controllen = 0;
1614 msg.msg_namelen = 0;
6cb153ca 1615 if (addr) {
1da177e4
LT
1616 err = move_addr_to_kernel(addr, addr_len, address);
1617 if (err < 0)
1618 goto out_put;
89bddce5
SH
1619 msg.msg_name = address;
1620 msg.msg_namelen = addr_len;
1da177e4
LT
1621 }
1622 if (sock->file->f_flags & O_NONBLOCK)
1623 flags |= MSG_DONTWAIT;
1624 msg.msg_flags = flags;
1625 err = sock_sendmsg(sock, &msg, len);
1626
89bddce5 1627out_put:
6cb153ca 1628 fput_light(sock_file, fput_needed);
4387ff75 1629out:
1da177e4
LT
1630 return err;
1631}
1632
1633/*
89bddce5 1634 * Send a datagram down a socket.
1da177e4
LT
1635 */
1636
89bddce5 1637asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1638{
1639 return sys_sendto(fd, buff, len, flags, NULL, 0);
1640}
1641
1642/*
89bddce5 1643 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1644 * sender. We verify the buffers are writable and if needed move the
1645 * sender address from kernel to user space.
1646 */
1647
89bddce5
SH
1648asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1649 unsigned flags, struct sockaddr __user *addr,
1650 int __user *addr_len)
1da177e4
LT
1651{
1652 struct socket *sock;
1653 struct iovec iov;
1654 struct msghdr msg;
1655 char address[MAX_SOCK_ADDR];
89bddce5 1656 int err, err2;
6cb153ca
BL
1657 struct file *sock_file;
1658 int fput_needed;
1659
1660 sock_file = fget_light(fd, &fput_needed);
4387ff75 1661 err = -EBADF;
6cb153ca 1662 if (!sock_file)
4387ff75 1663 goto out;
1da177e4 1664
6cb153ca 1665 sock = sock_from_file(sock_file, &err);
1da177e4 1666 if (!sock)
4387ff75 1667 goto out_put;
1da177e4 1668
89bddce5
SH
1669 msg.msg_control = NULL;
1670 msg.msg_controllen = 0;
1671 msg.msg_iovlen = 1;
1672 msg.msg_iov = &iov;
1673 iov.iov_len = size;
1674 iov.iov_base = ubuf;
1675 msg.msg_name = address;
1676 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1677 if (sock->file->f_flags & O_NONBLOCK)
1678 flags |= MSG_DONTWAIT;
89bddce5 1679 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1680
89bddce5
SH
1681 if (err >= 0 && addr != NULL) {
1682 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1683 if (err2 < 0)
1684 err = err2;
1da177e4 1685 }
4387ff75 1686out_put:
6cb153ca 1687 fput_light(sock_file, fput_needed);
4387ff75 1688out:
1da177e4
LT
1689 return err;
1690}
1691
1692/*
89bddce5 1693 * Receive a datagram from a socket.
1da177e4
LT
1694 */
1695
89bddce5
SH
1696asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1697 unsigned flags)
1da177e4
LT
1698{
1699 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1700}
1701
1702/*
1703 * Set a socket option. Because we don't know the option lengths we have
1704 * to pass the user mode parameter for the protocols to sort out.
1705 */
1706
89bddce5
SH
1707asmlinkage long sys_setsockopt(int fd, int level, int optname,
1708 char __user *optval, int optlen)
1da177e4 1709{
6cb153ca 1710 int err, fput_needed;
1da177e4
LT
1711 struct socket *sock;
1712
1713 if (optlen < 0)
1714 return -EINVAL;
89bddce5
SH
1715
1716 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1717 if (sock != NULL) {
1718 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1719 if (err)
1720 goto out_put;
1da177e4
LT
1721
1722 if (level == SOL_SOCKET)
89bddce5
SH
1723 err =
1724 sock_setsockopt(sock, level, optname, optval,
1725 optlen);
1da177e4 1726 else
89bddce5
SH
1727 err =
1728 sock->ops->setsockopt(sock, level, optname, optval,
1729 optlen);
6cb153ca
BL
1730out_put:
1731 fput_light(sock->file, fput_needed);
1da177e4
LT
1732 }
1733 return err;
1734}
1735
1736/*
1737 * Get a socket option. Because we don't know the option lengths we have
1738 * to pass a user mode parameter for the protocols to sort out.
1739 */
1740
89bddce5
SH
1741asmlinkage long sys_getsockopt(int fd, int level, int optname,
1742 char __user *optval, int __user *optlen)
1da177e4 1743{
6cb153ca 1744 int err, fput_needed;
1da177e4
LT
1745 struct socket *sock;
1746
89bddce5
SH
1747 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1748 if (sock != NULL) {
6cb153ca
BL
1749 err = security_socket_getsockopt(sock, level, optname);
1750 if (err)
1751 goto out_put;
1da177e4
LT
1752
1753 if (level == SOL_SOCKET)
89bddce5
SH
1754 err =
1755 sock_getsockopt(sock, level, optname, optval,
1756 optlen);
1da177e4 1757 else
89bddce5
SH
1758 err =
1759 sock->ops->getsockopt(sock, level, optname, optval,
1760 optlen);
6cb153ca
BL
1761out_put:
1762 fput_light(sock->file, fput_needed);
1da177e4
LT
1763 }
1764 return err;
1765}
1766
1da177e4
LT
1767/*
1768 * Shutdown a socket.
1769 */
1770
1771asmlinkage long sys_shutdown(int fd, int how)
1772{
6cb153ca 1773 int err, fput_needed;
1da177e4
LT
1774 struct socket *sock;
1775
89bddce5
SH
1776 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1777 if (sock != NULL) {
1da177e4 1778 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1779 if (!err)
1780 err = sock->ops->shutdown(sock, how);
1781 fput_light(sock->file, fput_needed);
1da177e4
LT
1782 }
1783 return err;
1784}
1785
89bddce5 1786/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1787 * fields which are the same type (int / unsigned) on our platforms.
1788 */
1789#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1790#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1791#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1792
1da177e4
LT
1793/*
1794 * BSD sendmsg interface
1795 */
1796
1797asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1798{
89bddce5
SH
1799 struct compat_msghdr __user *msg_compat =
1800 (struct compat_msghdr __user *)msg;
1da177e4
LT
1801 struct socket *sock;
1802 char address[MAX_SOCK_ADDR];
1803 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1804 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1805 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1806 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1807 unsigned char *ctl_buf = ctl;
1808 struct msghdr msg_sys;
1809 int err, ctl_len, iov_size, total_len;
6cb153ca 1810 int fput_needed;
89bddce5 1811
1da177e4
LT
1812 err = -EFAULT;
1813 if (MSG_CMSG_COMPAT & flags) {
1814 if (get_compat_msghdr(&msg_sys, msg_compat))
1815 return -EFAULT;
89bddce5
SH
1816 }
1817 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1818 return -EFAULT;
1819
6cb153ca 1820 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1821 if (!sock)
1da177e4
LT
1822 goto out;
1823
1824 /* do not move before msg_sys is valid */
1825 err = -EMSGSIZE;
1826 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1827 goto out_put;
1828
89bddce5 1829 /* Check whether to allocate the iovec area */
1da177e4
LT
1830 err = -ENOMEM;
1831 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1832 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1833 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1834 if (!iov)
1835 goto out_put;
1836 }
1837
1838 /* This will also move the address data into kernel space */
1839 if (MSG_CMSG_COMPAT & flags) {
1840 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1841 } else
1842 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1843 if (err < 0)
1da177e4
LT
1844 goto out_freeiov;
1845 total_len = err;
1846
1847 err = -ENOBUFS;
1848
1849 if (msg_sys.msg_controllen > INT_MAX)
1850 goto out_freeiov;
89bddce5 1851 ctl_len = msg_sys.msg_controllen;
1da177e4 1852 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1853 err =
1854 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1855 sizeof(ctl));
1da177e4
LT
1856 if (err)
1857 goto out_freeiov;
1858 ctl_buf = msg_sys.msg_control;
8920e8f9 1859 ctl_len = msg_sys.msg_controllen;
1da177e4 1860 } else if (ctl_len) {
89bddce5 1861 if (ctl_len > sizeof(ctl)) {
1da177e4 1862 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1863 if (ctl_buf == NULL)
1da177e4
LT
1864 goto out_freeiov;
1865 }
1866 err = -EFAULT;
1867 /*
1868 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1869 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1870 * checking falls down on this.
1871 */
89bddce5
SH
1872 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1873 ctl_len))
1da177e4
LT
1874 goto out_freectl;
1875 msg_sys.msg_control = ctl_buf;
1876 }
1877 msg_sys.msg_flags = flags;
1878
1879 if (sock->file->f_flags & O_NONBLOCK)
1880 msg_sys.msg_flags |= MSG_DONTWAIT;
1881 err = sock_sendmsg(sock, &msg_sys, total_len);
1882
1883out_freectl:
89bddce5 1884 if (ctl_buf != ctl)
1da177e4
LT
1885 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1886out_freeiov:
1887 if (iov != iovstack)
1888 sock_kfree_s(sock->sk, iov, iov_size);
1889out_put:
6cb153ca 1890 fput_light(sock->file, fput_needed);
89bddce5 1891out:
1da177e4
LT
1892 return err;
1893}
1894
1895/*
1896 * BSD recvmsg interface
1897 */
1898
89bddce5
SH
1899asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1900 unsigned int flags)
1da177e4 1901{
89bddce5
SH
1902 struct compat_msghdr __user *msg_compat =
1903 (struct compat_msghdr __user *)msg;
1da177e4
LT
1904 struct socket *sock;
1905 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1906 struct iovec *iov = iovstack;
1da177e4
LT
1907 struct msghdr msg_sys;
1908 unsigned long cmsg_ptr;
1909 int err, iov_size, total_len, len;
6cb153ca 1910 int fput_needed;
1da177e4
LT
1911
1912 /* kernel mode address */
1913 char addr[MAX_SOCK_ADDR];
1914
1915 /* user mode address pointers */
1916 struct sockaddr __user *uaddr;
1917 int __user *uaddr_len;
89bddce5 1918
1da177e4
LT
1919 if (MSG_CMSG_COMPAT & flags) {
1920 if (get_compat_msghdr(&msg_sys, msg_compat))
1921 return -EFAULT;
89bddce5
SH
1922 }
1923 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1924 return -EFAULT;
1da177e4 1925
6cb153ca 1926 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1927 if (!sock)
1928 goto out;
1929
1930 err = -EMSGSIZE;
1931 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1932 goto out_put;
89bddce5
SH
1933
1934 /* Check whether to allocate the iovec area */
1da177e4
LT
1935 err = -ENOMEM;
1936 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1937 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1938 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1939 if (!iov)
1940 goto out_put;
1941 }
1942
1943 /*
89bddce5
SH
1944 * Save the user-mode address (verify_iovec will change the
1945 * kernel msghdr to use the kernel address space)
1da177e4 1946 */
89bddce5 1947
cfcabdcc 1948 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1949 uaddr_len = COMPAT_NAMELEN(msg);
1950 if (MSG_CMSG_COMPAT & flags) {
1951 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1952 } else
1953 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1954 if (err < 0)
1955 goto out_freeiov;
89bddce5 1956 total_len = err;
1da177e4
LT
1957
1958 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1959 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1960
1da177e4
LT
1961 if (sock->file->f_flags & O_NONBLOCK)
1962 flags |= MSG_DONTWAIT;
1963 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1964 if (err < 0)
1965 goto out_freeiov;
1966 len = err;
1967
1968 if (uaddr != NULL) {
89bddce5
SH
1969 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1970 uaddr_len);
1da177e4
LT
1971 if (err < 0)
1972 goto out_freeiov;
1973 }
37f7f421
DM
1974 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1975 COMPAT_FLAGS(msg));
1da177e4
LT
1976 if (err)
1977 goto out_freeiov;
1978 if (MSG_CMSG_COMPAT & flags)
89bddce5 1979 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1980 &msg_compat->msg_controllen);
1981 else
89bddce5 1982 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1983 &msg->msg_controllen);
1984 if (err)
1985 goto out_freeiov;
1986 err = len;
1987
1988out_freeiov:
1989 if (iov != iovstack)
1990 sock_kfree_s(sock->sk, iov, iov_size);
1991out_put:
6cb153ca 1992 fput_light(sock->file, fput_needed);
1da177e4
LT
1993out:
1994 return err;
1995}
1996
1997#ifdef __ARCH_WANT_SYS_SOCKETCALL
1998
1999/* Argument list sizes for sys_socketcall */
2000#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
2001static const unsigned char nargs[18]={
2002 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2003 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
2004 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
2005};
2006
1da177e4
LT
2007#undef AL
2008
2009/*
89bddce5 2010 * System call vectors.
1da177e4
LT
2011 *
2012 * Argument checking cleaned up. Saved 20% in size.
2013 * This function doesn't need to set the kernel lock because
89bddce5 2014 * it is set by the callees.
1da177e4
LT
2015 */
2016
2017asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2018{
2019 unsigned long a[6];
89bddce5 2020 unsigned long a0, a1;
1da177e4
LT
2021 int err;
2022
89bddce5 2023 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2024 return -EINVAL;
2025
2026 /* copy_from_user should be SMP safe. */
2027 if (copy_from_user(a, args, nargs[call]))
2028 return -EFAULT;
3ec3b2fb 2029
89bddce5 2030 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2031 if (err)
2032 return err;
2033
89bddce5
SH
2034 a0 = a[0];
2035 a1 = a[1];
2036
2037 switch (call) {
2038 case SYS_SOCKET:
2039 err = sys_socket(a0, a1, a[2]);
2040 break;
2041 case SYS_BIND:
2042 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2043 break;
2044 case SYS_CONNECT:
2045 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2046 break;
2047 case SYS_LISTEN:
2048 err = sys_listen(a0, a1);
2049 break;
2050 case SYS_ACCEPT:
2051 err =
2052 sys_accept(a0, (struct sockaddr __user *)a1,
2053 (int __user *)a[2]);
2054 break;
2055 case SYS_GETSOCKNAME:
2056 err =
2057 sys_getsockname(a0, (struct sockaddr __user *)a1,
2058 (int __user *)a[2]);
2059 break;
2060 case SYS_GETPEERNAME:
2061 err =
2062 sys_getpeername(a0, (struct sockaddr __user *)a1,
2063 (int __user *)a[2]);
2064 break;
2065 case SYS_SOCKETPAIR:
2066 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2067 break;
2068 case SYS_SEND:
2069 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2070 break;
2071 case SYS_SENDTO:
2072 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2073 (struct sockaddr __user *)a[4], a[5]);
2074 break;
2075 case SYS_RECV:
2076 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2077 break;
2078 case SYS_RECVFROM:
2079 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2080 (struct sockaddr __user *)a[4],
2081 (int __user *)a[5]);
2082 break;
2083 case SYS_SHUTDOWN:
2084 err = sys_shutdown(a0, a1);
2085 break;
2086 case SYS_SETSOCKOPT:
2087 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2088 break;
2089 case SYS_GETSOCKOPT:
2090 err =
2091 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2092 (int __user *)a[4]);
2093 break;
2094 case SYS_SENDMSG:
2095 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2096 break;
2097 case SYS_RECVMSG:
2098 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2099 break;
2100 default:
2101 err = -EINVAL;
2102 break;
1da177e4
LT
2103 }
2104 return err;
2105}
2106
89bddce5 2107#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2108
55737fda
SH
2109/**
2110 * sock_register - add a socket protocol handler
2111 * @ops: description of protocol
2112 *
1da177e4
LT
2113 * This function is called by a protocol handler that wants to
2114 * advertise its address family, and have it linked into the
55737fda
SH
2115 * socket interface. The value ops->family coresponds to the
2116 * socket system call protocol family.
1da177e4 2117 */
f0fd27d4 2118int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2119{
2120 int err;
2121
2122 if (ops->family >= NPROTO) {
89bddce5
SH
2123 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2124 NPROTO);
1da177e4
LT
2125 return -ENOBUFS;
2126 }
55737fda
SH
2127
2128 spin_lock(&net_family_lock);
2129 if (net_families[ops->family])
2130 err = -EEXIST;
2131 else {
89bddce5 2132 net_families[ops->family] = ops;
1da177e4
LT
2133 err = 0;
2134 }
55737fda
SH
2135 spin_unlock(&net_family_lock);
2136
89bddce5 2137 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2138 return err;
2139}
2140
55737fda
SH
2141/**
2142 * sock_unregister - remove a protocol handler
2143 * @family: protocol family to remove
2144 *
1da177e4
LT
2145 * This function is called by a protocol handler that wants to
2146 * remove its address family, and have it unlinked from the
55737fda
SH
2147 * new socket creation.
2148 *
2149 * If protocol handler is a module, then it can use module reference
2150 * counts to protect against new references. If protocol handler is not
2151 * a module then it needs to provide its own protection in
2152 * the ops->create routine.
1da177e4 2153 */
f0fd27d4 2154void sock_unregister(int family)
1da177e4 2155{
f0fd27d4 2156 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2157
55737fda 2158 spin_lock(&net_family_lock);
89bddce5 2159 net_families[family] = NULL;
55737fda
SH
2160 spin_unlock(&net_family_lock);
2161
2162 synchronize_rcu();
2163
89bddce5 2164 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2165}
2166
77d76ea3 2167static int __init sock_init(void)
1da177e4
LT
2168{
2169 /*
89bddce5 2170 * Initialize sock SLAB cache.
1da177e4 2171 */
89bddce5 2172
1da177e4
LT
2173 sk_init();
2174
1da177e4 2175 /*
89bddce5 2176 * Initialize skbuff SLAB cache
1da177e4
LT
2177 */
2178 skb_init();
1da177e4
LT
2179
2180 /*
89bddce5 2181 * Initialize the protocols module.
1da177e4
LT
2182 */
2183
2184 init_inodecache();
2185 register_filesystem(&sock_fs_type);
2186 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2187
2188 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2189 */
2190
2191#ifdef CONFIG_NETFILTER
2192 netfilter_init();
2193#endif
cbeb321a
DM
2194
2195 return 0;
1da177e4
LT
2196}
2197
77d76ea3
AK
2198core_initcall(sock_init); /* early initcall */
2199
1da177e4
LT
2200#ifdef CONFIG_PROC_FS
2201void socket_seq_show(struct seq_file *seq)
2202{
2203 int cpu;
2204 int counter = 0;
2205
6f912042 2206 for_each_possible_cpu(cpu)
89bddce5 2207 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2208
2209 /* It can be negative, by the way. 8) */
2210 if (counter < 0)
2211 counter = 0;
2212
2213 seq_printf(seq, "sockets: used %d\n", counter);
2214}
89bddce5 2215#endif /* CONFIG_PROC_FS */
1da177e4 2216
89bbfc95
SP
2217#ifdef CONFIG_COMPAT
2218static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2219 unsigned long arg)
89bbfc95
SP
2220{
2221 struct socket *sock = file->private_data;
2222 int ret = -ENOIOCTLCMD;
2223
2224 if (sock->ops->compat_ioctl)
2225 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2226
2227 return ret;
2228}
2229#endif
2230
ac5a488e
SS
2231int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2232{
2233 return sock->ops->bind(sock, addr, addrlen);
2234}
2235
2236int kernel_listen(struct socket *sock, int backlog)
2237{
2238 return sock->ops->listen(sock, backlog);
2239}
2240
2241int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2242{
2243 struct sock *sk = sock->sk;
2244 int err;
2245
2246 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2247 newsock);
2248 if (err < 0)
2249 goto done;
2250
2251 err = sock->ops->accept(sock, *newsock, flags);
2252 if (err < 0) {
2253 sock_release(*newsock);
fa8705b0 2254 *newsock = NULL;
ac5a488e
SS
2255 goto done;
2256 }
2257
2258 (*newsock)->ops = sock->ops;
2259
2260done:
2261 return err;
2262}
2263
2264int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2265 int flags)
ac5a488e
SS
2266{
2267 return sock->ops->connect(sock, addr, addrlen, flags);
2268}
2269
2270int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2271 int *addrlen)
2272{
2273 return sock->ops->getname(sock, addr, addrlen, 0);
2274}
2275
2276int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2277 int *addrlen)
2278{
2279 return sock->ops->getname(sock, addr, addrlen, 1);
2280}
2281
2282int kernel_getsockopt(struct socket *sock, int level, int optname,
2283 char *optval, int *optlen)
2284{
2285 mm_segment_t oldfs = get_fs();
2286 int err;
2287
2288 set_fs(KERNEL_DS);
2289 if (level == SOL_SOCKET)
2290 err = sock_getsockopt(sock, level, optname, optval, optlen);
2291 else
2292 err = sock->ops->getsockopt(sock, level, optname, optval,
2293 optlen);
2294 set_fs(oldfs);
2295 return err;
2296}
2297
2298int kernel_setsockopt(struct socket *sock, int level, int optname,
2299 char *optval, int optlen)
2300{
2301 mm_segment_t oldfs = get_fs();
2302 int err;
2303
2304 set_fs(KERNEL_DS);
2305 if (level == SOL_SOCKET)
2306 err = sock_setsockopt(sock, level, optname, optval, optlen);
2307 else
2308 err = sock->ops->setsockopt(sock, level, optname, optval,
2309 optlen);
2310 set_fs(oldfs);
2311 return err;
2312}
2313
2314int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2315 size_t size, int flags)
2316{
2317 if (sock->ops->sendpage)
2318 return sock->ops->sendpage(sock, page, offset, size, flags);
2319
2320 return sock_no_sendpage(sock, page, offset, size, flags);
2321}
2322
2323int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2324{
2325 mm_segment_t oldfs = get_fs();
2326 int err;
2327
2328 set_fs(KERNEL_DS);
2329 err = sock->ops->ioctl(sock, cmd, arg);
2330 set_fs(oldfs);
2331
2332 return err;
2333}
2334
91cf45f0
TM
2335int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2336{
2337 return sock->ops->shutdown(sock, how);
2338}
2339
1da177e4
LT
2340/* ABI emulation layers need these two */
2341EXPORT_SYMBOL(move_addr_to_kernel);
2342EXPORT_SYMBOL(move_addr_to_user);
2343EXPORT_SYMBOL(sock_create);
2344EXPORT_SYMBOL(sock_create_kern);
2345EXPORT_SYMBOL(sock_create_lite);
2346EXPORT_SYMBOL(sock_map_fd);
2347EXPORT_SYMBOL(sock_recvmsg);
2348EXPORT_SYMBOL(sock_register);
2349EXPORT_SYMBOL(sock_release);
2350EXPORT_SYMBOL(sock_sendmsg);
2351EXPORT_SYMBOL(sock_unregister);
2352EXPORT_SYMBOL(sock_wake_async);
2353EXPORT_SYMBOL(sockfd_lookup);
2354EXPORT_SYMBOL(kernel_sendmsg);
2355EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2356EXPORT_SYMBOL(kernel_bind);
2357EXPORT_SYMBOL(kernel_listen);
2358EXPORT_SYMBOL(kernel_accept);
2359EXPORT_SYMBOL(kernel_connect);
2360EXPORT_SYMBOL(kernel_getsockname);
2361EXPORT_SYMBOL(kernel_getpeername);
2362EXPORT_SYMBOL(kernel_getsockopt);
2363EXPORT_SYMBOL(kernel_setsockopt);
2364EXPORT_SYMBOL(kernel_sendpage);
2365EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2366EXPORT_SYMBOL(kernel_sock_shutdown);