Merge master.kernel.org:/pub/scm/linux/kernel/git/bart/ide-2.6
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
da7071d7 120static const struct file_operations socket_file_ops = {
1da177e4
LT
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
a35afb83 264 inode_init_once(&ei->vfs_inode);
1da177e4 265}
89bddce5 266
1da177e4
LT
267static int init_inodecache(void)
268{
269 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
270 sizeof(struct socket_alloc),
271 0,
272 (SLAB_HWCACHE_ALIGN |
273 SLAB_RECLAIM_ACCOUNT |
274 SLAB_MEM_SPREAD),
275 init_once,
276 NULL);
1da177e4
LT
277 if (sock_inode_cachep == NULL)
278 return -ENOMEM;
279 return 0;
280}
281
282static struct super_operations sockfs_ops = {
283 .alloc_inode = sock_alloc_inode,
284 .destroy_inode =sock_destroy_inode,
285 .statfs = simple_statfs,
286};
287
454e2398 288static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
289 int flags, const char *dev_name, void *data,
290 struct vfsmount *mnt)
1da177e4 291{
454e2398
DH
292 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
293 mnt);
1da177e4
LT
294}
295
ba89966c 296static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
297
298static struct file_system_type sock_fs_type = {
299 .name = "sockfs",
300 .get_sb = sockfs_get_sb,
301 .kill_sb = kill_anon_super,
302};
89bddce5 303
1da177e4
LT
304static int sockfs_delete_dentry(struct dentry *dentry)
305{
304e61e6
ED
306 /*
307 * At creation time, we pretended this dentry was hashed
308 * (by clearing DCACHE_UNHASHED bit in d_flags)
309 * At delete time, we restore the truth : not hashed.
310 * (so that dput() can proceed correctly)
311 */
312 dentry->d_flags |= DCACHE_UNHASHED;
313 return 0;
1da177e4 314}
c23fbb6b
ED
315
316/*
317 * sockfs_dname() is called from d_path().
318 */
319static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
320{
321 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
322 dentry->d_inode->i_ino);
323}
324
1da177e4 325static struct dentry_operations sockfs_dentry_operations = {
89bddce5 326 .d_delete = sockfs_delete_dentry,
c23fbb6b 327 .d_dname = sockfs_dname,
1da177e4
LT
328};
329
330/*
331 * Obtains the first available file descriptor and sets it up for use.
332 *
39d8c1b6
DM
333 * These functions create file structures and maps them to fd space
334 * of the current process. On success it returns file descriptor
1da177e4
LT
335 * and file struct implicitly stored in sock->file.
336 * Note that another thread may close file descriptor before we return
337 * from this function. We use the fact that now we do not refer
338 * to socket after mapping. If one day we will need it, this
339 * function will increment ref. count on file by 1.
340 *
341 * In any case returned fd MAY BE not valid!
342 * This race condition is unavoidable
343 * with shared fd spaces, we cannot solve it inside kernel,
344 * but we take care of internal coherence yet.
345 */
346
39d8c1b6 347static int sock_alloc_fd(struct file **filep)
1da177e4
LT
348{
349 int fd;
1da177e4
LT
350
351 fd = get_unused_fd();
39d8c1b6 352 if (likely(fd >= 0)) {
1da177e4
LT
353 struct file *file = get_empty_filp();
354
39d8c1b6
DM
355 *filep = file;
356 if (unlikely(!file)) {
1da177e4 357 put_unused_fd(fd);
39d8c1b6 358 return -ENFILE;
1da177e4 359 }
39d8c1b6
DM
360 } else
361 *filep = NULL;
362 return fd;
363}
1da177e4 364
39d8c1b6
DM
365static int sock_attach_fd(struct socket *sock, struct file *file)
366{
c23fbb6b 367 struct qstr name = { .name = "" };
39d8c1b6 368
c23fbb6b 369 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
3126a42c 370 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
371 return -ENOMEM;
372
3126a42c 373 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
374 /*
375 * We dont want to push this dentry into global dentry hash table.
376 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
377 * This permits a working /proc/$pid/fd/XXX on sockets
378 */
3126a42c
JS
379 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
380 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
381 file->f_path.mnt = mntget(sock_mnt);
382 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
383
384 sock->file = file;
385 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
386 file->f_mode = FMODE_READ | FMODE_WRITE;
387 file->f_flags = O_RDWR;
388 file->f_pos = 0;
389 file->private_data = sock;
1da177e4 390
39d8c1b6
DM
391 return 0;
392}
393
394int sock_map_fd(struct socket *sock)
395{
396 struct file *newfile;
397 int fd = sock_alloc_fd(&newfile);
398
399 if (likely(fd >= 0)) {
400 int err = sock_attach_fd(sock, newfile);
401
402 if (unlikely(err < 0)) {
403 put_filp(newfile);
1da177e4 404 put_unused_fd(fd);
39d8c1b6 405 return err;
1da177e4 406 }
39d8c1b6 407 fd_install(fd, newfile);
1da177e4 408 }
1da177e4
LT
409 return fd;
410}
411
6cb153ca
BL
412static struct socket *sock_from_file(struct file *file, int *err)
413{
6cb153ca
BL
414 if (file->f_op == &socket_file_ops)
415 return file->private_data; /* set in sock_map_fd */
416
23bb80d2
ED
417 *err = -ENOTSOCK;
418 return NULL;
6cb153ca
BL
419}
420
1da177e4
LT
421/**
422 * sockfd_lookup - Go from a file number to its socket slot
423 * @fd: file handle
424 * @err: pointer to an error code return
425 *
426 * The file handle passed in is locked and the socket it is bound
427 * too is returned. If an error occurs the err pointer is overwritten
428 * with a negative errno code and NULL is returned. The function checks
429 * for both invalid handles and passing a handle which is not a socket.
430 *
431 * On a success the socket object pointer is returned.
432 */
433
434struct socket *sockfd_lookup(int fd, int *err)
435{
436 struct file *file;
1da177e4
LT
437 struct socket *sock;
438
89bddce5
SH
439 file = fget(fd);
440 if (!file) {
1da177e4
LT
441 *err = -EBADF;
442 return NULL;
443 }
89bddce5 444
6cb153ca
BL
445 sock = sock_from_file(file, err);
446 if (!sock)
1da177e4 447 fput(file);
6cb153ca
BL
448 return sock;
449}
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
453 struct file *file;
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
6cb153ca
BL
457 file = fget_light(fd, fput_needed);
458 if (file) {
459 sock = sock_from_file(file, err);
460 if (sock)
461 return sock;
462 fput_light(file, *fput_needed);
1da177e4 463 }
6cb153ca 464 return NULL;
1da177e4
LT
465}
466
467/**
468 * sock_alloc - allocate a socket
89bddce5 469 *
1da177e4
LT
470 * Allocate a new inode and socket object. The two are bound together
471 * and initialised. The socket is then returned. If we are out of inodes
472 * NULL is returned.
473 */
474
475static struct socket *sock_alloc(void)
476{
89bddce5
SH
477 struct inode *inode;
478 struct socket *sock;
1da177e4
LT
479
480 inode = new_inode(sock_mnt->mnt_sb);
481 if (!inode)
482 return NULL;
483
484 sock = SOCKET_I(inode);
485
89bddce5 486 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
487 inode->i_uid = current->fsuid;
488 inode->i_gid = current->fsgid;
489
490 get_cpu_var(sockets_in_use)++;
491 put_cpu_var(sockets_in_use);
492 return sock;
493}
494
495/*
496 * In theory you can't get an open on this inode, but /proc provides
497 * a back door. Remember to keep it shut otherwise you'll let the
498 * creepy crawlies in.
499 */
89bddce5 500
1da177e4
LT
501static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
502{
503 return -ENXIO;
504}
505
4b6f5d20 506const struct file_operations bad_sock_fops = {
1da177e4
LT
507 .owner = THIS_MODULE,
508 .open = sock_no_open,
509};
510
511/**
512 * sock_release - close a socket
513 * @sock: socket to close
514 *
515 * The socket is released from the protocol stack if it has a release
516 * callback, and the inode is then released if the socket is bound to
89bddce5 517 * an inode not a file.
1da177e4 518 */
89bddce5 519
1da177e4
LT
520void sock_release(struct socket *sock)
521{
522 if (sock->ops) {
523 struct module *owner = sock->ops->owner;
524
525 sock->ops->release(sock);
526 sock->ops = NULL;
527 module_put(owner);
528 }
529
530 if (sock->fasync_list)
531 printk(KERN_ERR "sock_release: fasync list not empty!\n");
532
533 get_cpu_var(sockets_in_use)--;
534 put_cpu_var(sockets_in_use);
535 if (!sock->file) {
536 iput(SOCK_INODE(sock));
537 return;
538 }
89bddce5 539 sock->file = NULL;
1da177e4
LT
540}
541
89bddce5 542static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
543 struct msghdr *msg, size_t size)
544{
545 struct sock_iocb *si = kiocb_to_siocb(iocb);
546 int err;
547
548 si->sock = sock;
549 si->scm = NULL;
550 si->msg = msg;
551 si->size = size;
552
553 err = security_socket_sendmsg(sock, msg, size);
554 if (err)
555 return err;
556
557 return sock->ops->sendmsg(iocb, sock, msg, size);
558}
559
560int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
561{
562 struct kiocb iocb;
563 struct sock_iocb siocb;
564 int ret;
565
566 init_sync_kiocb(&iocb, NULL);
567 iocb.private = &siocb;
568 ret = __sock_sendmsg(&iocb, sock, msg, size);
569 if (-EIOCBQUEUED == ret)
570 ret = wait_on_sync_kiocb(&iocb);
571 return ret;
572}
573
574int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
575 struct kvec *vec, size_t num, size_t size)
576{
577 mm_segment_t oldfs = get_fs();
578 int result;
579
580 set_fs(KERNEL_DS);
581 /*
582 * the following is safe, since for compiler definitions of kvec and
583 * iovec are identical, yielding the same in-core layout and alignment
584 */
89bddce5 585 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
586 msg->msg_iovlen = num;
587 result = sock_sendmsg(sock, msg, size);
588 set_fs(oldfs);
589 return result;
590}
591
92f37fd2
ED
592/*
593 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
594 */
595void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
596 struct sk_buff *skb)
597{
598 ktime_t kt = skb->tstamp;
599
600 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
601 struct timeval tv;
602 /* Race occurred between timestamp enabling and packet
603 receiving. Fill in the current time for now. */
604 if (kt.tv64 == 0)
605 kt = ktime_get_real();
606 skb->tstamp = kt;
607 tv = ktime_to_timeval(kt);
608 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
609 } else {
610 struct timespec ts;
611 /* Race occurred between timestamp enabling and packet
612 receiving. Fill in the current time for now. */
613 if (kt.tv64 == 0)
614 kt = ktime_get_real();
615 skb->tstamp = kt;
616 ts = ktime_to_timespec(kt);
617 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
618 }
619}
620
7c81fd8b
ACM
621EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
622
89bddce5 623static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
624 struct msghdr *msg, size_t size, int flags)
625{
626 int err;
627 struct sock_iocb *si = kiocb_to_siocb(iocb);
628
629 si->sock = sock;
630 si->scm = NULL;
631 si->msg = msg;
632 si->size = size;
633 si->flags = flags;
634
635 err = security_socket_recvmsg(sock, msg, size, flags);
636 if (err)
637 return err;
638
639 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
640}
641
89bddce5 642int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
643 size_t size, int flags)
644{
645 struct kiocb iocb;
646 struct sock_iocb siocb;
647 int ret;
648
89bddce5 649 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
650 iocb.private = &siocb;
651 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
652 if (-EIOCBQUEUED == ret)
653 ret = wait_on_sync_kiocb(&iocb);
654 return ret;
655}
656
89bddce5
SH
657int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
658 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
659{
660 mm_segment_t oldfs = get_fs();
661 int result;
662
663 set_fs(KERNEL_DS);
664 /*
665 * the following is safe, since for compiler definitions of kvec and
666 * iovec are identical, yielding the same in-core layout and alignment
667 */
89bddce5 668 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
669 result = sock_recvmsg(sock, msg, size, flags);
670 set_fs(oldfs);
671 return result;
672}
673
674static void sock_aio_dtor(struct kiocb *iocb)
675{
676 kfree(iocb->private);
677}
678
ce1d4d3e
CH
679static ssize_t sock_sendpage(struct file *file, struct page *page,
680 int offset, size_t size, loff_t *ppos, int more)
1da177e4 681{
1da177e4
LT
682 struct socket *sock;
683 int flags;
684
ce1d4d3e
CH
685 sock = file->private_data;
686
687 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
688 if (more)
689 flags |= MSG_MORE;
690
691 return sock->ops->sendpage(sock, page, offset, size, flags);
692}
1da177e4 693
ce1d4d3e 694static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 695 struct sock_iocb *siocb)
ce1d4d3e
CH
696{
697 if (!is_sync_kiocb(iocb)) {
698 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
699 if (!siocb)
700 return NULL;
1da177e4
LT
701 iocb->ki_dtor = sock_aio_dtor;
702 }
1da177e4 703
ce1d4d3e 704 siocb->kiocb = iocb;
ce1d4d3e
CH
705 iocb->private = siocb;
706 return siocb;
1da177e4
LT
707}
708
ce1d4d3e 709static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
710 struct file *file, const struct iovec *iov,
711 unsigned long nr_segs)
ce1d4d3e
CH
712{
713 struct socket *sock = file->private_data;
714 size_t size = 0;
715 int i;
1da177e4 716
89bddce5
SH
717 for (i = 0; i < nr_segs; i++)
718 size += iov[i].iov_len;
1da177e4 719
ce1d4d3e
CH
720 msg->msg_name = NULL;
721 msg->msg_namelen = 0;
722 msg->msg_control = NULL;
723 msg->msg_controllen = 0;
89bddce5 724 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
725 msg->msg_iovlen = nr_segs;
726 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
727
728 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
729}
730
027445c3
BP
731static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
732 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
733{
734 struct sock_iocb siocb, *x;
735
1da177e4
LT
736 if (pos != 0)
737 return -ESPIPE;
027445c3
BP
738
739 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
740 return 0;
741
027445c3
BP
742
743 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
744 if (!x)
745 return -ENOMEM;
027445c3 746 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
747}
748
ce1d4d3e 749static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
750 struct file *file, const struct iovec *iov,
751 unsigned long nr_segs)
1da177e4 752{
ce1d4d3e
CH
753 struct socket *sock = file->private_data;
754 size_t size = 0;
755 int i;
1da177e4 756
89bddce5
SH
757 for (i = 0; i < nr_segs; i++)
758 size += iov[i].iov_len;
1da177e4 759
ce1d4d3e
CH
760 msg->msg_name = NULL;
761 msg->msg_namelen = 0;
762 msg->msg_control = NULL;
763 msg->msg_controllen = 0;
89bddce5 764 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
765 msg->msg_iovlen = nr_segs;
766 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
767 if (sock->type == SOCK_SEQPACKET)
768 msg->msg_flags |= MSG_EOR;
1da177e4 769
ce1d4d3e 770 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
771}
772
027445c3
BP
773static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
774 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
775{
776 struct sock_iocb siocb, *x;
1da177e4 777
ce1d4d3e
CH
778 if (pos != 0)
779 return -ESPIPE;
027445c3
BP
780
781 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 782 return 0;
1da177e4 783
027445c3 784 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
785 if (!x)
786 return -ENOMEM;
1da177e4 787
027445c3 788 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
789}
790
1da177e4
LT
791/*
792 * Atomic setting of ioctl hooks to avoid race
793 * with module unload.
794 */
795
4a3e2f71 796static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 797static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 798
89bddce5 799void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 800{
4a3e2f71 801 mutex_lock(&br_ioctl_mutex);
1da177e4 802 br_ioctl_hook = hook;
4a3e2f71 803 mutex_unlock(&br_ioctl_mutex);
1da177e4 804}
89bddce5 805
1da177e4
LT
806EXPORT_SYMBOL(brioctl_set);
807
4a3e2f71 808static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 809static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 810
89bddce5 811void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 812{
4a3e2f71 813 mutex_lock(&vlan_ioctl_mutex);
1da177e4 814 vlan_ioctl_hook = hook;
4a3e2f71 815 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 816}
89bddce5 817
1da177e4
LT
818EXPORT_SYMBOL(vlan_ioctl_set);
819
4a3e2f71 820static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 821static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 822
89bddce5 823void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 824{
4a3e2f71 825 mutex_lock(&dlci_ioctl_mutex);
1da177e4 826 dlci_ioctl_hook = hook;
4a3e2f71 827 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 828}
89bddce5 829
1da177e4
LT
830EXPORT_SYMBOL(dlci_ioctl_set);
831
832/*
833 * With an ioctl, arg may well be a user mode pointer, but we don't know
834 * what to do with it - that's up to the protocol still.
835 */
836
837static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
838{
839 struct socket *sock;
840 void __user *argp = (void __user *)arg;
841 int pid, err;
842
b69aee04 843 sock = file->private_data;
1da177e4
LT
844 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
845 err = dev_ioctl(cmd, argp);
846 } else
d86b5e0e 847#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
848 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
849 err = dev_ioctl(cmd, argp);
850 } else
89bddce5
SH
851#endif /* CONFIG_WIRELESS_EXT */
852 switch (cmd) {
1da177e4
LT
853 case FIOSETOWN:
854 case SIOCSPGRP:
855 err = -EFAULT;
856 if (get_user(pid, (int __user *)argp))
857 break;
858 err = f_setown(sock->file, pid, 1);
859 break;
860 case FIOGETOWN:
861 case SIOCGPGRP:
609d7fa9 862 err = put_user(f_getown(sock->file),
89bddce5 863 (int __user *)argp);
1da177e4
LT
864 break;
865 case SIOCGIFBR:
866 case SIOCSIFBR:
867 case SIOCBRADDBR:
868 case SIOCBRDELBR:
869 err = -ENOPKG;
870 if (!br_ioctl_hook)
871 request_module("bridge");
872
4a3e2f71 873 mutex_lock(&br_ioctl_mutex);
89bddce5 874 if (br_ioctl_hook)
1da177e4 875 err = br_ioctl_hook(cmd, argp);
4a3e2f71 876 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
877 break;
878 case SIOCGIFVLAN:
879 case SIOCSIFVLAN:
880 err = -ENOPKG;
881 if (!vlan_ioctl_hook)
882 request_module("8021q");
883
4a3e2f71 884 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
885 if (vlan_ioctl_hook)
886 err = vlan_ioctl_hook(argp);
4a3e2f71 887 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 888 break;
1da177e4
LT
889 case SIOCADDDLCI:
890 case SIOCDELDLCI:
891 err = -ENOPKG;
892 if (!dlci_ioctl_hook)
893 request_module("dlci");
894
895 if (dlci_ioctl_hook) {
4a3e2f71 896 mutex_lock(&dlci_ioctl_mutex);
1da177e4 897 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 898 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
899 }
900 break;
901 default:
902 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
903
904 /*
905 * If this ioctl is unknown try to hand it down
906 * to the NIC driver.
907 */
908 if (err == -ENOIOCTLCMD)
909 err = dev_ioctl(cmd, argp);
1da177e4 910 break;
89bddce5 911 }
1da177e4
LT
912 return err;
913}
914
915int sock_create_lite(int family, int type, int protocol, struct socket **res)
916{
917 int err;
918 struct socket *sock = NULL;
89bddce5 919
1da177e4
LT
920 err = security_socket_create(family, type, protocol, 1);
921 if (err)
922 goto out;
923
924 sock = sock_alloc();
925 if (!sock) {
926 err = -ENOMEM;
927 goto out;
928 }
929
1da177e4 930 sock->type = type;
7420ed23
VY
931 err = security_socket_post_create(sock, family, type, protocol, 1);
932 if (err)
933 goto out_release;
934
1da177e4
LT
935out:
936 *res = sock;
937 return err;
7420ed23
VY
938out_release:
939 sock_release(sock);
940 sock = NULL;
941 goto out;
1da177e4
LT
942}
943
944/* No kernel lock held - perfect */
89bddce5 945static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
946{
947 struct socket *sock;
948
949 /*
89bddce5 950 * We can't return errors to poll, so it's either yes or no.
1da177e4 951 */
b69aee04 952 sock = file->private_data;
1da177e4
LT
953 return sock->ops->poll(file, sock, wait);
954}
955
89bddce5 956static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 957{
b69aee04 958 struct socket *sock = file->private_data;
1da177e4
LT
959
960 return sock->ops->mmap(file, sock, vma);
961}
962
20380731 963static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
964{
965 /*
89bddce5
SH
966 * It was possible the inode is NULL we were
967 * closing an unfinished socket.
1da177e4
LT
968 */
969
89bddce5 970 if (!inode) {
1da177e4
LT
971 printk(KERN_DEBUG "sock_close: NULL inode\n");
972 return 0;
973 }
974 sock_fasync(-1, filp, 0);
975 sock_release(SOCKET_I(inode));
976 return 0;
977}
978
979/*
980 * Update the socket async list
981 *
982 * Fasync_list locking strategy.
983 *
984 * 1. fasync_list is modified only under process context socket lock
985 * i.e. under semaphore.
986 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
987 * or under socket lock.
988 * 3. fasync_list can be used from softirq context, so that
989 * modification under socket lock have to be enhanced with
990 * write_lock_bh(&sk->sk_callback_lock).
991 * --ANK (990710)
992 */
993
994static int sock_fasync(int fd, struct file *filp, int on)
995{
89bddce5 996 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
997 struct socket *sock;
998 struct sock *sk;
999
89bddce5 1000 if (on) {
8b3a7005 1001 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1002 if (fna == NULL)
1da177e4
LT
1003 return -ENOMEM;
1004 }
1005
b69aee04 1006 sock = filp->private_data;
1da177e4 1007
89bddce5
SH
1008 sk = sock->sk;
1009 if (sk == NULL) {
1da177e4
LT
1010 kfree(fna);
1011 return -EINVAL;
1012 }
1013
1014 lock_sock(sk);
1015
89bddce5 1016 prev = &(sock->fasync_list);
1da177e4 1017
89bddce5
SH
1018 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1019 if (fa->fa_file == filp)
1da177e4
LT
1020 break;
1021
89bddce5
SH
1022 if (on) {
1023 if (fa != NULL) {
1da177e4 1024 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1025 fa->fa_fd = fd;
1da177e4
LT
1026 write_unlock_bh(&sk->sk_callback_lock);
1027
1028 kfree(fna);
1029 goto out;
1030 }
89bddce5
SH
1031 fna->fa_file = filp;
1032 fna->fa_fd = fd;
1033 fna->magic = FASYNC_MAGIC;
1034 fna->fa_next = sock->fasync_list;
1da177e4 1035 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1036 sock->fasync_list = fna;
1da177e4 1037 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1038 } else {
1039 if (fa != NULL) {
1da177e4 1040 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1041 *prev = fa->fa_next;
1da177e4
LT
1042 write_unlock_bh(&sk->sk_callback_lock);
1043 kfree(fa);
1044 }
1045 }
1046
1047out:
1048 release_sock(sock->sk);
1049 return 0;
1050}
1051
1052/* This function may be called only under socket lock or callback_lock */
1053
1054int sock_wake_async(struct socket *sock, int how, int band)
1055{
1056 if (!sock || !sock->fasync_list)
1057 return -1;
89bddce5 1058 switch (how) {
1da177e4 1059 case 1:
89bddce5 1060
1da177e4
LT
1061 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1062 break;
1063 goto call_kill;
1064 case 2:
1065 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1066 break;
1067 /* fall through */
1068 case 0:
89bddce5 1069call_kill:
1da177e4
LT
1070 __kill_fasync(sock->fasync_list, SIGIO, band);
1071 break;
1072 case 3:
1073 __kill_fasync(sock->fasync_list, SIGURG, band);
1074 }
1075 return 0;
1076}
1077
89bddce5
SH
1078static int __sock_create(int family, int type, int protocol,
1079 struct socket **res, int kern)
1da177e4
LT
1080{
1081 int err;
1082 struct socket *sock;
55737fda 1083 const struct net_proto_family *pf;
1da177e4
LT
1084
1085 /*
89bddce5 1086 * Check protocol is in range
1da177e4
LT
1087 */
1088 if (family < 0 || family >= NPROTO)
1089 return -EAFNOSUPPORT;
1090 if (type < 0 || type >= SOCK_MAX)
1091 return -EINVAL;
1092
1093 /* Compatibility.
1094
1095 This uglymoron is moved from INET layer to here to avoid
1096 deadlock in module load.
1097 */
1098 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1099 static int warned;
1da177e4
LT
1100 if (!warned) {
1101 warned = 1;
89bddce5
SH
1102 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1103 current->comm);
1da177e4
LT
1104 }
1105 family = PF_PACKET;
1106 }
1107
1108 err = security_socket_create(family, type, protocol, kern);
1109 if (err)
1110 return err;
89bddce5 1111
55737fda
SH
1112 /*
1113 * Allocate the socket and allow the family to set things up. if
1114 * the protocol is 0, the family is instructed to select an appropriate
1115 * default.
1116 */
1117 sock = sock_alloc();
1118 if (!sock) {
1119 if (net_ratelimit())
1120 printk(KERN_WARNING "socket: no more sockets\n");
1121 return -ENFILE; /* Not exactly a match, but its the
1122 closest posix thing */
1123 }
1124
1125 sock->type = type;
1126
1da177e4 1127#if defined(CONFIG_KMOD)
89bddce5
SH
1128 /* Attempt to load a protocol module if the find failed.
1129 *
1130 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1131 * requested real, full-featured networking support upon configuration.
1132 * Otherwise module support will break!
1133 */
55737fda 1134 if (net_families[family] == NULL)
89bddce5 1135 request_module("net-pf-%d", family);
1da177e4
LT
1136#endif
1137
55737fda
SH
1138 rcu_read_lock();
1139 pf = rcu_dereference(net_families[family]);
1140 err = -EAFNOSUPPORT;
1141 if (!pf)
1142 goto out_release;
1da177e4
LT
1143
1144 /*
1145 * We will call the ->create function, that possibly is in a loadable
1146 * module, so we have to bump that loadable module refcnt first.
1147 */
55737fda 1148 if (!try_module_get(pf->owner))
1da177e4
LT
1149 goto out_release;
1150
55737fda
SH
1151 /* Now protected by module ref count */
1152 rcu_read_unlock();
1153
1154 err = pf->create(sock, protocol);
1155 if (err < 0)
1da177e4 1156 goto out_module_put;
a79af59e 1157
1da177e4
LT
1158 /*
1159 * Now to bump the refcnt of the [loadable] module that owns this
1160 * socket at sock_release time we decrement its refcnt.
1161 */
55737fda
SH
1162 if (!try_module_get(sock->ops->owner))
1163 goto out_module_busy;
1164
1da177e4
LT
1165 /*
1166 * Now that we're done with the ->create function, the [loadable]
1167 * module can have its refcnt decremented
1168 */
55737fda 1169 module_put(pf->owner);
7420ed23
VY
1170 err = security_socket_post_create(sock, family, type, protocol, kern);
1171 if (err)
1172 goto out_release;
55737fda 1173 *res = sock;
1da177e4 1174
55737fda
SH
1175 return 0;
1176
1177out_module_busy:
1178 err = -EAFNOSUPPORT;
1da177e4 1179out_module_put:
55737fda
SH
1180 sock->ops = NULL;
1181 module_put(pf->owner);
1182out_sock_release:
1da177e4 1183 sock_release(sock);
55737fda
SH
1184 return err;
1185
1186out_release:
1187 rcu_read_unlock();
1188 goto out_sock_release;
1da177e4
LT
1189}
1190
1191int sock_create(int family, int type, int protocol, struct socket **res)
1192{
1193 return __sock_create(family, type, protocol, res, 0);
1194}
1195
1196int sock_create_kern(int family, int type, int protocol, struct socket **res)
1197{
1198 return __sock_create(family, type, protocol, res, 1);
1199}
1200
1201asmlinkage long sys_socket(int family, int type, int protocol)
1202{
1203 int retval;
1204 struct socket *sock;
1205
1206 retval = sock_create(family, type, protocol, &sock);
1207 if (retval < 0)
1208 goto out;
1209
1210 retval = sock_map_fd(sock);
1211 if (retval < 0)
1212 goto out_release;
1213
1214out:
1215 /* It may be already another descriptor 8) Not kernel problem. */
1216 return retval;
1217
1218out_release:
1219 sock_release(sock);
1220 return retval;
1221}
1222
1223/*
1224 * Create a pair of connected sockets.
1225 */
1226
89bddce5
SH
1227asmlinkage long sys_socketpair(int family, int type, int protocol,
1228 int __user *usockvec)
1da177e4
LT
1229{
1230 struct socket *sock1, *sock2;
1231 int fd1, fd2, err;
db349509 1232 struct file *newfile1, *newfile2;
1da177e4
LT
1233
1234 /*
1235 * Obtain the first socket and check if the underlying protocol
1236 * supports the socketpair call.
1237 */
1238
1239 err = sock_create(family, type, protocol, &sock1);
1240 if (err < 0)
1241 goto out;
1242
1243 err = sock_create(family, type, protocol, &sock2);
1244 if (err < 0)
1245 goto out_release_1;
1246
1247 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1248 if (err < 0)
1da177e4
LT
1249 goto out_release_both;
1250
db349509
AV
1251 fd1 = sock_alloc_fd(&newfile1);
1252 if (unlikely(fd1 < 0))
1253 goto out_release_both;
1da177e4 1254
db349509
AV
1255 fd2 = sock_alloc_fd(&newfile2);
1256 if (unlikely(fd2 < 0)) {
1257 put_filp(newfile1);
1258 put_unused_fd(fd1);
1da177e4 1259 goto out_release_both;
db349509 1260 }
1da177e4 1261
db349509
AV
1262 err = sock_attach_fd(sock1, newfile1);
1263 if (unlikely(err < 0)) {
1264 goto out_fd2;
1265 }
1266
1267 err = sock_attach_fd(sock2, newfile2);
1268 if (unlikely(err < 0)) {
1269 fput(newfile1);
1270 goto out_fd1;
1271 }
1272
1273 err = audit_fd_pair(fd1, fd2);
1274 if (err < 0) {
1275 fput(newfile1);
1276 fput(newfile2);
1277 goto out_fd;
1278 }
1da177e4 1279
db349509
AV
1280 fd_install(fd1, newfile1);
1281 fd_install(fd2, newfile2);
1da177e4
LT
1282 /* fd1 and fd2 may be already another descriptors.
1283 * Not kernel problem.
1284 */
1285
89bddce5 1286 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1287 if (!err)
1288 err = put_user(fd2, &usockvec[1]);
1289 if (!err)
1290 return 0;
1291
1292 sys_close(fd2);
1293 sys_close(fd1);
1294 return err;
1295
1da177e4 1296out_release_both:
89bddce5 1297 sock_release(sock2);
1da177e4 1298out_release_1:
89bddce5 1299 sock_release(sock1);
1da177e4
LT
1300out:
1301 return err;
db349509
AV
1302
1303out_fd2:
1304 put_filp(newfile1);
1305 sock_release(sock1);
1306out_fd1:
1307 put_filp(newfile2);
1308 sock_release(sock2);
1309out_fd:
1310 put_unused_fd(fd1);
1311 put_unused_fd(fd2);
1312 goto out;
1da177e4
LT
1313}
1314
1da177e4
LT
1315/*
1316 * Bind a name to a socket. Nothing much to do here since it's
1317 * the protocol's responsibility to handle the local address.
1318 *
1319 * We move the socket address to kernel space before we call
1320 * the protocol layer (having also checked the address is ok).
1321 */
1322
1323asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1324{
1325 struct socket *sock;
1326 char address[MAX_SOCK_ADDR];
6cb153ca 1327 int err, fput_needed;
1da177e4 1328
89bddce5 1329 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1330 if (sock) {
89bddce5
SH
1331 err = move_addr_to_kernel(umyaddr, addrlen, address);
1332 if (err >= 0) {
1333 err = security_socket_bind(sock,
1334 (struct sockaddr *)address,
1335 addrlen);
6cb153ca
BL
1336 if (!err)
1337 err = sock->ops->bind(sock,
89bddce5
SH
1338 (struct sockaddr *)
1339 address, addrlen);
1da177e4 1340 }
6cb153ca 1341 fput_light(sock->file, fput_needed);
89bddce5 1342 }
1da177e4
LT
1343 return err;
1344}
1345
1da177e4
LT
1346/*
1347 * Perform a listen. Basically, we allow the protocol to do anything
1348 * necessary for a listen, and if that works, we mark the socket as
1349 * ready for listening.
1350 */
1351
7a42c217 1352int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1353
1354asmlinkage long sys_listen(int fd, int backlog)
1355{
1356 struct socket *sock;
6cb153ca 1357 int err, fput_needed;
89bddce5
SH
1358
1359 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1360 if (sock) {
1361 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1362 backlog = sysctl_somaxconn;
1363
1364 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1365 if (!err)
1366 err = sock->ops->listen(sock, backlog);
1da177e4 1367
6cb153ca 1368 fput_light(sock->file, fput_needed);
1da177e4
LT
1369 }
1370 return err;
1371}
1372
1da177e4
LT
1373/*
1374 * For accept, we attempt to create a new socket, set up the link
1375 * with the client, wake up the client, then return the new
1376 * connected fd. We collect the address of the connector in kernel
1377 * space and move it to user at the very end. This is unclean because
1378 * we open the socket then return an error.
1379 *
1380 * 1003.1g adds the ability to recvmsg() to query connection pending
1381 * status to recvmsg. We need to add that support in a way thats
1382 * clean when we restucture accept also.
1383 */
1384
89bddce5
SH
1385asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1386 int __user *upeer_addrlen)
1da177e4
LT
1387{
1388 struct socket *sock, *newsock;
39d8c1b6 1389 struct file *newfile;
6cb153ca 1390 int err, len, newfd, fput_needed;
1da177e4
LT
1391 char address[MAX_SOCK_ADDR];
1392
6cb153ca 1393 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1394 if (!sock)
1395 goto out;
1396
1397 err = -ENFILE;
89bddce5 1398 if (!(newsock = sock_alloc()))
1da177e4
LT
1399 goto out_put;
1400
1401 newsock->type = sock->type;
1402 newsock->ops = sock->ops;
1403
1da177e4
LT
1404 /*
1405 * We don't need try_module_get here, as the listening socket (sock)
1406 * has the protocol module (sock->ops->owner) held.
1407 */
1408 __module_get(newsock->ops->owner);
1409
39d8c1b6
DM
1410 newfd = sock_alloc_fd(&newfile);
1411 if (unlikely(newfd < 0)) {
1412 err = newfd;
9a1875e6
DM
1413 sock_release(newsock);
1414 goto out_put;
39d8c1b6
DM
1415 }
1416
1417 err = sock_attach_fd(newsock, newfile);
1418 if (err < 0)
79f4f642 1419 goto out_fd_simple;
39d8c1b6 1420
a79af59e
FF
1421 err = security_socket_accept(sock, newsock);
1422 if (err)
39d8c1b6 1423 goto out_fd;
a79af59e 1424
1da177e4
LT
1425 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1426 if (err < 0)
39d8c1b6 1427 goto out_fd;
1da177e4
LT
1428
1429 if (upeer_sockaddr) {
89bddce5
SH
1430 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1431 &len, 2) < 0) {
1da177e4 1432 err = -ECONNABORTED;
39d8c1b6 1433 goto out_fd;
1da177e4 1434 }
89bddce5
SH
1435 err = move_addr_to_user(address, len, upeer_sockaddr,
1436 upeer_addrlen);
1da177e4 1437 if (err < 0)
39d8c1b6 1438 goto out_fd;
1da177e4
LT
1439 }
1440
1441 /* File flags are not inherited via accept() unlike another OSes. */
1442
39d8c1b6
DM
1443 fd_install(newfd, newfile);
1444 err = newfd;
1da177e4
LT
1445
1446 security_socket_post_accept(sock, newsock);
1447
1448out_put:
6cb153ca 1449 fput_light(sock->file, fput_needed);
1da177e4
LT
1450out:
1451 return err;
79f4f642
AD
1452out_fd_simple:
1453 sock_release(newsock);
1454 put_filp(newfile);
1455 put_unused_fd(newfd);
1456 goto out_put;
39d8c1b6 1457out_fd:
9606a216 1458 fput(newfile);
39d8c1b6 1459 put_unused_fd(newfd);
1da177e4
LT
1460 goto out_put;
1461}
1462
1da177e4
LT
1463/*
1464 * Attempt to connect to a socket with the server address. The address
1465 * is in user space so we verify it is OK and move it to kernel space.
1466 *
1467 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1468 * break bindings
1469 *
1470 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1471 * other SEQPACKET protocols that take time to connect() as it doesn't
1472 * include the -EINPROGRESS status for such sockets.
1473 */
1474
89bddce5
SH
1475asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1476 int addrlen)
1da177e4
LT
1477{
1478 struct socket *sock;
1479 char address[MAX_SOCK_ADDR];
6cb153ca 1480 int err, fput_needed;
1da177e4 1481
6cb153ca 1482 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1483 if (!sock)
1484 goto out;
1485 err = move_addr_to_kernel(uservaddr, addrlen, address);
1486 if (err < 0)
1487 goto out_put;
1488
89bddce5
SH
1489 err =
1490 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1491 if (err)
1492 goto out_put;
1493
89bddce5 1494 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1495 sock->file->f_flags);
1496out_put:
6cb153ca 1497 fput_light(sock->file, fput_needed);
1da177e4
LT
1498out:
1499 return err;
1500}
1501
1502/*
1503 * Get the local address ('name') of a socket object. Move the obtained
1504 * name to user space.
1505 */
1506
89bddce5
SH
1507asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1508 int __user *usockaddr_len)
1da177e4
LT
1509{
1510 struct socket *sock;
1511 char address[MAX_SOCK_ADDR];
6cb153ca 1512 int len, err, fput_needed;
89bddce5 1513
6cb153ca 1514 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1515 if (!sock)
1516 goto out;
1517
1518 err = security_socket_getsockname(sock);
1519 if (err)
1520 goto out_put;
1521
1522 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1523 if (err)
1524 goto out_put;
1525 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1526
1527out_put:
6cb153ca 1528 fput_light(sock->file, fput_needed);
1da177e4
LT
1529out:
1530 return err;
1531}
1532
1533/*
1534 * Get the remote address ('name') of a socket object. Move the obtained
1535 * name to user space.
1536 */
1537
89bddce5
SH
1538asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1539 int __user *usockaddr_len)
1da177e4
LT
1540{
1541 struct socket *sock;
1542 char address[MAX_SOCK_ADDR];
6cb153ca 1543 int len, err, fput_needed;
1da177e4 1544
89bddce5
SH
1545 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1546 if (sock != NULL) {
1da177e4
LT
1547 err = security_socket_getpeername(sock);
1548 if (err) {
6cb153ca 1549 fput_light(sock->file, fput_needed);
1da177e4
LT
1550 return err;
1551 }
1552
89bddce5
SH
1553 err =
1554 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1555 1);
1da177e4 1556 if (!err)
89bddce5
SH
1557 err = move_addr_to_user(address, len, usockaddr,
1558 usockaddr_len);
6cb153ca 1559 fput_light(sock->file, fput_needed);
1da177e4
LT
1560 }
1561 return err;
1562}
1563
1564/*
1565 * Send a datagram to a given address. We move the address into kernel
1566 * space and check the user space data area is readable before invoking
1567 * the protocol.
1568 */
1569
89bddce5
SH
1570asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1571 unsigned flags, struct sockaddr __user *addr,
1572 int addr_len)
1da177e4
LT
1573{
1574 struct socket *sock;
1575 char address[MAX_SOCK_ADDR];
1576 int err;
1577 struct msghdr msg;
1578 struct iovec iov;
6cb153ca
BL
1579 int fput_needed;
1580 struct file *sock_file;
1581
1582 sock_file = fget_light(fd, &fput_needed);
4387ff75 1583 err = -EBADF;
6cb153ca 1584 if (!sock_file)
4387ff75 1585 goto out;
6cb153ca
BL
1586
1587 sock = sock_from_file(sock_file, &err);
1da177e4 1588 if (!sock)
6cb153ca 1589 goto out_put;
89bddce5
SH
1590 iov.iov_base = buff;
1591 iov.iov_len = len;
1592 msg.msg_name = NULL;
1593 msg.msg_iov = &iov;
1594 msg.msg_iovlen = 1;
1595 msg.msg_control = NULL;
1596 msg.msg_controllen = 0;
1597 msg.msg_namelen = 0;
6cb153ca 1598 if (addr) {
1da177e4
LT
1599 err = move_addr_to_kernel(addr, addr_len, address);
1600 if (err < 0)
1601 goto out_put;
89bddce5
SH
1602 msg.msg_name = address;
1603 msg.msg_namelen = addr_len;
1da177e4
LT
1604 }
1605 if (sock->file->f_flags & O_NONBLOCK)
1606 flags |= MSG_DONTWAIT;
1607 msg.msg_flags = flags;
1608 err = sock_sendmsg(sock, &msg, len);
1609
89bddce5 1610out_put:
6cb153ca 1611 fput_light(sock_file, fput_needed);
4387ff75 1612out:
1da177e4
LT
1613 return err;
1614}
1615
1616/*
89bddce5 1617 * Send a datagram down a socket.
1da177e4
LT
1618 */
1619
89bddce5 1620asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1621{
1622 return sys_sendto(fd, buff, len, flags, NULL, 0);
1623}
1624
1625/*
89bddce5 1626 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1627 * sender. We verify the buffers are writable and if needed move the
1628 * sender address from kernel to user space.
1629 */
1630
89bddce5
SH
1631asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1632 unsigned flags, struct sockaddr __user *addr,
1633 int __user *addr_len)
1da177e4
LT
1634{
1635 struct socket *sock;
1636 struct iovec iov;
1637 struct msghdr msg;
1638 char address[MAX_SOCK_ADDR];
89bddce5 1639 int err, err2;
6cb153ca
BL
1640 struct file *sock_file;
1641 int fput_needed;
1642
1643 sock_file = fget_light(fd, &fput_needed);
4387ff75 1644 err = -EBADF;
6cb153ca 1645 if (!sock_file)
4387ff75 1646 goto out;
1da177e4 1647
6cb153ca 1648 sock = sock_from_file(sock_file, &err);
1da177e4 1649 if (!sock)
4387ff75 1650 goto out_put;
1da177e4 1651
89bddce5
SH
1652 msg.msg_control = NULL;
1653 msg.msg_controllen = 0;
1654 msg.msg_iovlen = 1;
1655 msg.msg_iov = &iov;
1656 iov.iov_len = size;
1657 iov.iov_base = ubuf;
1658 msg.msg_name = address;
1659 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1660 if (sock->file->f_flags & O_NONBLOCK)
1661 flags |= MSG_DONTWAIT;
89bddce5 1662 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1663
89bddce5
SH
1664 if (err >= 0 && addr != NULL) {
1665 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1666 if (err2 < 0)
1667 err = err2;
1da177e4 1668 }
4387ff75 1669out_put:
6cb153ca 1670 fput_light(sock_file, fput_needed);
4387ff75 1671out:
1da177e4
LT
1672 return err;
1673}
1674
1675/*
89bddce5 1676 * Receive a datagram from a socket.
1da177e4
LT
1677 */
1678
89bddce5
SH
1679asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1680 unsigned flags)
1da177e4
LT
1681{
1682 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1683}
1684
1685/*
1686 * Set a socket option. Because we don't know the option lengths we have
1687 * to pass the user mode parameter for the protocols to sort out.
1688 */
1689
89bddce5
SH
1690asmlinkage long sys_setsockopt(int fd, int level, int optname,
1691 char __user *optval, int optlen)
1da177e4 1692{
6cb153ca 1693 int err, fput_needed;
1da177e4
LT
1694 struct socket *sock;
1695
1696 if (optlen < 0)
1697 return -EINVAL;
89bddce5
SH
1698
1699 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1700 if (sock != NULL) {
1701 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1702 if (err)
1703 goto out_put;
1da177e4
LT
1704
1705 if (level == SOL_SOCKET)
89bddce5
SH
1706 err =
1707 sock_setsockopt(sock, level, optname, optval,
1708 optlen);
1da177e4 1709 else
89bddce5
SH
1710 err =
1711 sock->ops->setsockopt(sock, level, optname, optval,
1712 optlen);
6cb153ca
BL
1713out_put:
1714 fput_light(sock->file, fput_needed);
1da177e4
LT
1715 }
1716 return err;
1717}
1718
1719/*
1720 * Get a socket option. Because we don't know the option lengths we have
1721 * to pass a user mode parameter for the protocols to sort out.
1722 */
1723
89bddce5
SH
1724asmlinkage long sys_getsockopt(int fd, int level, int optname,
1725 char __user *optval, int __user *optlen)
1da177e4 1726{
6cb153ca 1727 int err, fput_needed;
1da177e4
LT
1728 struct socket *sock;
1729
89bddce5
SH
1730 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1731 if (sock != NULL) {
6cb153ca
BL
1732 err = security_socket_getsockopt(sock, level, optname);
1733 if (err)
1734 goto out_put;
1da177e4
LT
1735
1736 if (level == SOL_SOCKET)
89bddce5
SH
1737 err =
1738 sock_getsockopt(sock, level, optname, optval,
1739 optlen);
1da177e4 1740 else
89bddce5
SH
1741 err =
1742 sock->ops->getsockopt(sock, level, optname, optval,
1743 optlen);
6cb153ca
BL
1744out_put:
1745 fput_light(sock->file, fput_needed);
1da177e4
LT
1746 }
1747 return err;
1748}
1749
1da177e4
LT
1750/*
1751 * Shutdown a socket.
1752 */
1753
1754asmlinkage long sys_shutdown(int fd, int how)
1755{
6cb153ca 1756 int err, fput_needed;
1da177e4
LT
1757 struct socket *sock;
1758
89bddce5
SH
1759 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1760 if (sock != NULL) {
1da177e4 1761 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1762 if (!err)
1763 err = sock->ops->shutdown(sock, how);
1764 fput_light(sock->file, fput_needed);
1da177e4
LT
1765 }
1766 return err;
1767}
1768
89bddce5 1769/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1770 * fields which are the same type (int / unsigned) on our platforms.
1771 */
1772#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1773#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1774#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1775
1da177e4
LT
1776/*
1777 * BSD sendmsg interface
1778 */
1779
1780asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1781{
89bddce5
SH
1782 struct compat_msghdr __user *msg_compat =
1783 (struct compat_msghdr __user *)msg;
1da177e4
LT
1784 struct socket *sock;
1785 char address[MAX_SOCK_ADDR];
1786 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1787 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1788 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1789 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1790 unsigned char *ctl_buf = ctl;
1791 struct msghdr msg_sys;
1792 int err, ctl_len, iov_size, total_len;
6cb153ca 1793 int fput_needed;
89bddce5 1794
1da177e4
LT
1795 err = -EFAULT;
1796 if (MSG_CMSG_COMPAT & flags) {
1797 if (get_compat_msghdr(&msg_sys, msg_compat))
1798 return -EFAULT;
89bddce5
SH
1799 }
1800 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1801 return -EFAULT;
1802
6cb153ca 1803 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1804 if (!sock)
1da177e4
LT
1805 goto out;
1806
1807 /* do not move before msg_sys is valid */
1808 err = -EMSGSIZE;
1809 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1810 goto out_put;
1811
89bddce5 1812 /* Check whether to allocate the iovec area */
1da177e4
LT
1813 err = -ENOMEM;
1814 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1815 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1816 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1817 if (!iov)
1818 goto out_put;
1819 }
1820
1821 /* This will also move the address data into kernel space */
1822 if (MSG_CMSG_COMPAT & flags) {
1823 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1824 } else
1825 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1826 if (err < 0)
1da177e4
LT
1827 goto out_freeiov;
1828 total_len = err;
1829
1830 err = -ENOBUFS;
1831
1832 if (msg_sys.msg_controllen > INT_MAX)
1833 goto out_freeiov;
89bddce5 1834 ctl_len = msg_sys.msg_controllen;
1da177e4 1835 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1836 err =
1837 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1838 sizeof(ctl));
1da177e4
LT
1839 if (err)
1840 goto out_freeiov;
1841 ctl_buf = msg_sys.msg_control;
8920e8f9 1842 ctl_len = msg_sys.msg_controllen;
1da177e4 1843 } else if (ctl_len) {
89bddce5 1844 if (ctl_len > sizeof(ctl)) {
1da177e4 1845 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1846 if (ctl_buf == NULL)
1da177e4
LT
1847 goto out_freeiov;
1848 }
1849 err = -EFAULT;
1850 /*
1851 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1852 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1853 * checking falls down on this.
1854 */
89bddce5
SH
1855 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1856 ctl_len))
1da177e4
LT
1857 goto out_freectl;
1858 msg_sys.msg_control = ctl_buf;
1859 }
1860 msg_sys.msg_flags = flags;
1861
1862 if (sock->file->f_flags & O_NONBLOCK)
1863 msg_sys.msg_flags |= MSG_DONTWAIT;
1864 err = sock_sendmsg(sock, &msg_sys, total_len);
1865
1866out_freectl:
89bddce5 1867 if (ctl_buf != ctl)
1da177e4
LT
1868 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1869out_freeiov:
1870 if (iov != iovstack)
1871 sock_kfree_s(sock->sk, iov, iov_size);
1872out_put:
6cb153ca 1873 fput_light(sock->file, fput_needed);
89bddce5 1874out:
1da177e4
LT
1875 return err;
1876}
1877
1878/*
1879 * BSD recvmsg interface
1880 */
1881
89bddce5
SH
1882asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1883 unsigned int flags)
1da177e4 1884{
89bddce5
SH
1885 struct compat_msghdr __user *msg_compat =
1886 (struct compat_msghdr __user *)msg;
1da177e4
LT
1887 struct socket *sock;
1888 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1889 struct iovec *iov = iovstack;
1da177e4
LT
1890 struct msghdr msg_sys;
1891 unsigned long cmsg_ptr;
1892 int err, iov_size, total_len, len;
6cb153ca 1893 int fput_needed;
1da177e4
LT
1894
1895 /* kernel mode address */
1896 char addr[MAX_SOCK_ADDR];
1897
1898 /* user mode address pointers */
1899 struct sockaddr __user *uaddr;
1900 int __user *uaddr_len;
89bddce5 1901
1da177e4
LT
1902 if (MSG_CMSG_COMPAT & flags) {
1903 if (get_compat_msghdr(&msg_sys, msg_compat))
1904 return -EFAULT;
89bddce5
SH
1905 }
1906 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1907 return -EFAULT;
1da177e4 1908
6cb153ca 1909 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1910 if (!sock)
1911 goto out;
1912
1913 err = -EMSGSIZE;
1914 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1915 goto out_put;
89bddce5
SH
1916
1917 /* Check whether to allocate the iovec area */
1da177e4
LT
1918 err = -ENOMEM;
1919 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1920 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1921 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1922 if (!iov)
1923 goto out_put;
1924 }
1925
1926 /*
89bddce5
SH
1927 * Save the user-mode address (verify_iovec will change the
1928 * kernel msghdr to use the kernel address space)
1da177e4 1929 */
89bddce5
SH
1930
1931 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1932 uaddr_len = COMPAT_NAMELEN(msg);
1933 if (MSG_CMSG_COMPAT & flags) {
1934 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1935 } else
1936 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1937 if (err < 0)
1938 goto out_freeiov;
89bddce5 1939 total_len = err;
1da177e4
LT
1940
1941 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1942 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1943
1da177e4
LT
1944 if (sock->file->f_flags & O_NONBLOCK)
1945 flags |= MSG_DONTWAIT;
1946 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1947 if (err < 0)
1948 goto out_freeiov;
1949 len = err;
1950
1951 if (uaddr != NULL) {
89bddce5
SH
1952 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1953 uaddr_len);
1da177e4
LT
1954 if (err < 0)
1955 goto out_freeiov;
1956 }
37f7f421
DM
1957 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1958 COMPAT_FLAGS(msg));
1da177e4
LT
1959 if (err)
1960 goto out_freeiov;
1961 if (MSG_CMSG_COMPAT & flags)
89bddce5 1962 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1963 &msg_compat->msg_controllen);
1964 else
89bddce5 1965 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1966 &msg->msg_controllen);
1967 if (err)
1968 goto out_freeiov;
1969 err = len;
1970
1971out_freeiov:
1972 if (iov != iovstack)
1973 sock_kfree_s(sock->sk, iov, iov_size);
1974out_put:
6cb153ca 1975 fput_light(sock->file, fput_needed);
1da177e4
LT
1976out:
1977 return err;
1978}
1979
1980#ifdef __ARCH_WANT_SYS_SOCKETCALL
1981
1982/* Argument list sizes for sys_socketcall */
1983#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1984static const unsigned char nargs[18]={
1985 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1986 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1987 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1988};
1989
1da177e4
LT
1990#undef AL
1991
1992/*
89bddce5 1993 * System call vectors.
1da177e4
LT
1994 *
1995 * Argument checking cleaned up. Saved 20% in size.
1996 * This function doesn't need to set the kernel lock because
89bddce5 1997 * it is set by the callees.
1da177e4
LT
1998 */
1999
2000asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2001{
2002 unsigned long a[6];
89bddce5 2003 unsigned long a0, a1;
1da177e4
LT
2004 int err;
2005
89bddce5 2006 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2007 return -EINVAL;
2008
2009 /* copy_from_user should be SMP safe. */
2010 if (copy_from_user(a, args, nargs[call]))
2011 return -EFAULT;
3ec3b2fb 2012
89bddce5 2013 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2014 if (err)
2015 return err;
2016
89bddce5
SH
2017 a0 = a[0];
2018 a1 = a[1];
2019
2020 switch (call) {
2021 case SYS_SOCKET:
2022 err = sys_socket(a0, a1, a[2]);
2023 break;
2024 case SYS_BIND:
2025 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2026 break;
2027 case SYS_CONNECT:
2028 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2029 break;
2030 case SYS_LISTEN:
2031 err = sys_listen(a0, a1);
2032 break;
2033 case SYS_ACCEPT:
2034 err =
2035 sys_accept(a0, (struct sockaddr __user *)a1,
2036 (int __user *)a[2]);
2037 break;
2038 case SYS_GETSOCKNAME:
2039 err =
2040 sys_getsockname(a0, (struct sockaddr __user *)a1,
2041 (int __user *)a[2]);
2042 break;
2043 case SYS_GETPEERNAME:
2044 err =
2045 sys_getpeername(a0, (struct sockaddr __user *)a1,
2046 (int __user *)a[2]);
2047 break;
2048 case SYS_SOCKETPAIR:
2049 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2050 break;
2051 case SYS_SEND:
2052 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2053 break;
2054 case SYS_SENDTO:
2055 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2056 (struct sockaddr __user *)a[4], a[5]);
2057 break;
2058 case SYS_RECV:
2059 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2060 break;
2061 case SYS_RECVFROM:
2062 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2063 (struct sockaddr __user *)a[4],
2064 (int __user *)a[5]);
2065 break;
2066 case SYS_SHUTDOWN:
2067 err = sys_shutdown(a0, a1);
2068 break;
2069 case SYS_SETSOCKOPT:
2070 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2071 break;
2072 case SYS_GETSOCKOPT:
2073 err =
2074 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2075 (int __user *)a[4]);
2076 break;
2077 case SYS_SENDMSG:
2078 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2079 break;
2080 case SYS_RECVMSG:
2081 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2082 break;
2083 default:
2084 err = -EINVAL;
2085 break;
1da177e4
LT
2086 }
2087 return err;
2088}
2089
89bddce5 2090#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2091
55737fda
SH
2092/**
2093 * sock_register - add a socket protocol handler
2094 * @ops: description of protocol
2095 *
1da177e4
LT
2096 * This function is called by a protocol handler that wants to
2097 * advertise its address family, and have it linked into the
55737fda
SH
2098 * socket interface. The value ops->family coresponds to the
2099 * socket system call protocol family.
1da177e4 2100 */
f0fd27d4 2101int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2102{
2103 int err;
2104
2105 if (ops->family >= NPROTO) {
89bddce5
SH
2106 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2107 NPROTO);
1da177e4
LT
2108 return -ENOBUFS;
2109 }
55737fda
SH
2110
2111 spin_lock(&net_family_lock);
2112 if (net_families[ops->family])
2113 err = -EEXIST;
2114 else {
89bddce5 2115 net_families[ops->family] = ops;
1da177e4
LT
2116 err = 0;
2117 }
55737fda
SH
2118 spin_unlock(&net_family_lock);
2119
89bddce5 2120 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2121 return err;
2122}
2123
55737fda
SH
2124/**
2125 * sock_unregister - remove a protocol handler
2126 * @family: protocol family to remove
2127 *
1da177e4
LT
2128 * This function is called by a protocol handler that wants to
2129 * remove its address family, and have it unlinked from the
55737fda
SH
2130 * new socket creation.
2131 *
2132 * If protocol handler is a module, then it can use module reference
2133 * counts to protect against new references. If protocol handler is not
2134 * a module then it needs to provide its own protection in
2135 * the ops->create routine.
1da177e4 2136 */
f0fd27d4 2137void sock_unregister(int family)
1da177e4 2138{
f0fd27d4 2139 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2140
55737fda 2141 spin_lock(&net_family_lock);
89bddce5 2142 net_families[family] = NULL;
55737fda
SH
2143 spin_unlock(&net_family_lock);
2144
2145 synchronize_rcu();
2146
89bddce5 2147 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2148}
2149
77d76ea3 2150static int __init sock_init(void)
1da177e4
LT
2151{
2152 /*
89bddce5 2153 * Initialize sock SLAB cache.
1da177e4 2154 */
89bddce5 2155
1da177e4
LT
2156 sk_init();
2157
1da177e4 2158 /*
89bddce5 2159 * Initialize skbuff SLAB cache
1da177e4
LT
2160 */
2161 skb_init();
1da177e4
LT
2162
2163 /*
89bddce5 2164 * Initialize the protocols module.
1da177e4
LT
2165 */
2166
2167 init_inodecache();
2168 register_filesystem(&sock_fs_type);
2169 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2170
2171 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2172 */
2173
2174#ifdef CONFIG_NETFILTER
2175 netfilter_init();
2176#endif
cbeb321a
DM
2177
2178 return 0;
1da177e4
LT
2179}
2180
77d76ea3
AK
2181core_initcall(sock_init); /* early initcall */
2182
1da177e4
LT
2183#ifdef CONFIG_PROC_FS
2184void socket_seq_show(struct seq_file *seq)
2185{
2186 int cpu;
2187 int counter = 0;
2188
6f912042 2189 for_each_possible_cpu(cpu)
89bddce5 2190 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2191
2192 /* It can be negative, by the way. 8) */
2193 if (counter < 0)
2194 counter = 0;
2195
2196 seq_printf(seq, "sockets: used %d\n", counter);
2197}
89bddce5 2198#endif /* CONFIG_PROC_FS */
1da177e4 2199
89bbfc95
SP
2200#ifdef CONFIG_COMPAT
2201static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2202 unsigned long arg)
89bbfc95
SP
2203{
2204 struct socket *sock = file->private_data;
2205 int ret = -ENOIOCTLCMD;
2206
2207 if (sock->ops->compat_ioctl)
2208 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2209
2210 return ret;
2211}
2212#endif
2213
ac5a488e
SS
2214int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2215{
2216 return sock->ops->bind(sock, addr, addrlen);
2217}
2218
2219int kernel_listen(struct socket *sock, int backlog)
2220{
2221 return sock->ops->listen(sock, backlog);
2222}
2223
2224int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2225{
2226 struct sock *sk = sock->sk;
2227 int err;
2228
2229 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2230 newsock);
2231 if (err < 0)
2232 goto done;
2233
2234 err = sock->ops->accept(sock, *newsock, flags);
2235 if (err < 0) {
2236 sock_release(*newsock);
2237 goto done;
2238 }
2239
2240 (*newsock)->ops = sock->ops;
2241
2242done:
2243 return err;
2244}
2245
2246int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2247 int flags)
ac5a488e
SS
2248{
2249 return sock->ops->connect(sock, addr, addrlen, flags);
2250}
2251
2252int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2253 int *addrlen)
2254{
2255 return sock->ops->getname(sock, addr, addrlen, 0);
2256}
2257
2258int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2259 int *addrlen)
2260{
2261 return sock->ops->getname(sock, addr, addrlen, 1);
2262}
2263
2264int kernel_getsockopt(struct socket *sock, int level, int optname,
2265 char *optval, int *optlen)
2266{
2267 mm_segment_t oldfs = get_fs();
2268 int err;
2269
2270 set_fs(KERNEL_DS);
2271 if (level == SOL_SOCKET)
2272 err = sock_getsockopt(sock, level, optname, optval, optlen);
2273 else
2274 err = sock->ops->getsockopt(sock, level, optname, optval,
2275 optlen);
2276 set_fs(oldfs);
2277 return err;
2278}
2279
2280int kernel_setsockopt(struct socket *sock, int level, int optname,
2281 char *optval, int optlen)
2282{
2283 mm_segment_t oldfs = get_fs();
2284 int err;
2285
2286 set_fs(KERNEL_DS);
2287 if (level == SOL_SOCKET)
2288 err = sock_setsockopt(sock, level, optname, optval, optlen);
2289 else
2290 err = sock->ops->setsockopt(sock, level, optname, optval,
2291 optlen);
2292 set_fs(oldfs);
2293 return err;
2294}
2295
2296int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2297 size_t size, int flags)
2298{
2299 if (sock->ops->sendpage)
2300 return sock->ops->sendpage(sock, page, offset, size, flags);
2301
2302 return sock_no_sendpage(sock, page, offset, size, flags);
2303}
2304
2305int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2306{
2307 mm_segment_t oldfs = get_fs();
2308 int err;
2309
2310 set_fs(KERNEL_DS);
2311 err = sock->ops->ioctl(sock, cmd, arg);
2312 set_fs(oldfs);
2313
2314 return err;
2315}
2316
1da177e4
LT
2317/* ABI emulation layers need these two */
2318EXPORT_SYMBOL(move_addr_to_kernel);
2319EXPORT_SYMBOL(move_addr_to_user);
2320EXPORT_SYMBOL(sock_create);
2321EXPORT_SYMBOL(sock_create_kern);
2322EXPORT_SYMBOL(sock_create_lite);
2323EXPORT_SYMBOL(sock_map_fd);
2324EXPORT_SYMBOL(sock_recvmsg);
2325EXPORT_SYMBOL(sock_register);
2326EXPORT_SYMBOL(sock_release);
2327EXPORT_SYMBOL(sock_sendmsg);
2328EXPORT_SYMBOL(sock_unregister);
2329EXPORT_SYMBOL(sock_wake_async);
2330EXPORT_SYMBOL(sockfd_lookup);
2331EXPORT_SYMBOL(kernel_sendmsg);
2332EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2333EXPORT_SYMBOL(kernel_bind);
2334EXPORT_SYMBOL(kernel_listen);
2335EXPORT_SYMBOL(kernel_accept);
2336EXPORT_SYMBOL(kernel_connect);
2337EXPORT_SYMBOL(kernel_getsockname);
2338EXPORT_SYMBOL(kernel_getpeername);
2339EXPORT_SYMBOL(kernel_getsockopt);
2340EXPORT_SYMBOL(kernel_setsockopt);
2341EXPORT_SYMBOL(kernel_sendpage);
2342EXPORT_SYMBOL(kernel_sock_ioctl);