[NET]: Make /proc/net per network namespace
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
da7071d7 120static const struct file_operations socket_file_ops = {
1da177e4
LT
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
a35afb83 264 inode_init_once(&ei->vfs_inode);
1da177e4 265}
89bddce5 266
1da177e4
LT
267static int init_inodecache(void)
268{
269 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
270 sizeof(struct socket_alloc),
271 0,
272 (SLAB_HWCACHE_ALIGN |
273 SLAB_RECLAIM_ACCOUNT |
274 SLAB_MEM_SPREAD),
20c2df83 275 init_once);
1da177e4
LT
276 if (sock_inode_cachep == NULL)
277 return -ENOMEM;
278 return 0;
279}
280
281static struct super_operations sockfs_ops = {
282 .alloc_inode = sock_alloc_inode,
283 .destroy_inode =sock_destroy_inode,
284 .statfs = simple_statfs,
285};
286
454e2398 287static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
288 int flags, const char *dev_name, void *data,
289 struct vfsmount *mnt)
1da177e4 290{
454e2398
DH
291 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
292 mnt);
1da177e4
LT
293}
294
ba89966c 295static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
296
297static struct file_system_type sock_fs_type = {
298 .name = "sockfs",
299 .get_sb = sockfs_get_sb,
300 .kill_sb = kill_anon_super,
301};
89bddce5 302
1da177e4
LT
303static int sockfs_delete_dentry(struct dentry *dentry)
304{
304e61e6
ED
305 /*
306 * At creation time, we pretended this dentry was hashed
307 * (by clearing DCACHE_UNHASHED bit in d_flags)
308 * At delete time, we restore the truth : not hashed.
309 * (so that dput() can proceed correctly)
310 */
311 dentry->d_flags |= DCACHE_UNHASHED;
312 return 0;
1da177e4 313}
c23fbb6b
ED
314
315/*
316 * sockfs_dname() is called from d_path().
317 */
318static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
319{
320 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
321 dentry->d_inode->i_ino);
322}
323
1da177e4 324static struct dentry_operations sockfs_dentry_operations = {
89bddce5 325 .d_delete = sockfs_delete_dentry,
c23fbb6b 326 .d_dname = sockfs_dname,
1da177e4
LT
327};
328
329/*
330 * Obtains the first available file descriptor and sets it up for use.
331 *
39d8c1b6
DM
332 * These functions create file structures and maps them to fd space
333 * of the current process. On success it returns file descriptor
1da177e4
LT
334 * and file struct implicitly stored in sock->file.
335 * Note that another thread may close file descriptor before we return
336 * from this function. We use the fact that now we do not refer
337 * to socket after mapping. If one day we will need it, this
338 * function will increment ref. count on file by 1.
339 *
340 * In any case returned fd MAY BE not valid!
341 * This race condition is unavoidable
342 * with shared fd spaces, we cannot solve it inside kernel,
343 * but we take care of internal coherence yet.
344 */
345
39d8c1b6 346static int sock_alloc_fd(struct file **filep)
1da177e4
LT
347{
348 int fd;
1da177e4
LT
349
350 fd = get_unused_fd();
39d8c1b6 351 if (likely(fd >= 0)) {
1da177e4
LT
352 struct file *file = get_empty_filp();
353
39d8c1b6
DM
354 *filep = file;
355 if (unlikely(!file)) {
1da177e4 356 put_unused_fd(fd);
39d8c1b6 357 return -ENFILE;
1da177e4 358 }
39d8c1b6
DM
359 } else
360 *filep = NULL;
361 return fd;
362}
1da177e4 363
39d8c1b6
DM
364static int sock_attach_fd(struct socket *sock, struct file *file)
365{
c23fbb6b 366 struct qstr name = { .name = "" };
39d8c1b6 367
c23fbb6b 368 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
3126a42c 369 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
370 return -ENOMEM;
371
3126a42c 372 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
373 /*
374 * We dont want to push this dentry into global dentry hash table.
375 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
376 * This permits a working /proc/$pid/fd/XXX on sockets
377 */
3126a42c
JS
378 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
379 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
380 file->f_path.mnt = mntget(sock_mnt);
381 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
382
383 sock->file = file;
384 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
385 file->f_mode = FMODE_READ | FMODE_WRITE;
386 file->f_flags = O_RDWR;
387 file->f_pos = 0;
388 file->private_data = sock;
1da177e4 389
39d8c1b6
DM
390 return 0;
391}
392
393int sock_map_fd(struct socket *sock)
394{
395 struct file *newfile;
396 int fd = sock_alloc_fd(&newfile);
397
398 if (likely(fd >= 0)) {
399 int err = sock_attach_fd(sock, newfile);
400
401 if (unlikely(err < 0)) {
402 put_filp(newfile);
1da177e4 403 put_unused_fd(fd);
39d8c1b6 404 return err;
1da177e4 405 }
39d8c1b6 406 fd_install(fd, newfile);
1da177e4 407 }
1da177e4
LT
408 return fd;
409}
410
6cb153ca
BL
411static struct socket *sock_from_file(struct file *file, int *err)
412{
6cb153ca
BL
413 if (file->f_op == &socket_file_ops)
414 return file->private_data; /* set in sock_map_fd */
415
23bb80d2
ED
416 *err = -ENOTSOCK;
417 return NULL;
6cb153ca
BL
418}
419
1da177e4
LT
420/**
421 * sockfd_lookup - Go from a file number to its socket slot
422 * @fd: file handle
423 * @err: pointer to an error code return
424 *
425 * The file handle passed in is locked and the socket it is bound
426 * too is returned. If an error occurs the err pointer is overwritten
427 * with a negative errno code and NULL is returned. The function checks
428 * for both invalid handles and passing a handle which is not a socket.
429 *
430 * On a success the socket object pointer is returned.
431 */
432
433struct socket *sockfd_lookup(int fd, int *err)
434{
435 struct file *file;
1da177e4
LT
436 struct socket *sock;
437
89bddce5
SH
438 file = fget(fd);
439 if (!file) {
1da177e4
LT
440 *err = -EBADF;
441 return NULL;
442 }
89bddce5 443
6cb153ca
BL
444 sock = sock_from_file(file, err);
445 if (!sock)
1da177e4 446 fput(file);
6cb153ca
BL
447 return sock;
448}
1da177e4 449
6cb153ca
BL
450static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
451{
452 struct file *file;
453 struct socket *sock;
454
3672558c 455 *err = -EBADF;
6cb153ca
BL
456 file = fget_light(fd, fput_needed);
457 if (file) {
458 sock = sock_from_file(file, err);
459 if (sock)
460 return sock;
461 fput_light(file, *fput_needed);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
466/**
467 * sock_alloc - allocate a socket
89bddce5 468 *
1da177e4
LT
469 * Allocate a new inode and socket object. The two are bound together
470 * and initialised. The socket is then returned. If we are out of inodes
471 * NULL is returned.
472 */
473
474static struct socket *sock_alloc(void)
475{
89bddce5
SH
476 struct inode *inode;
477 struct socket *sock;
1da177e4
LT
478
479 inode = new_inode(sock_mnt->mnt_sb);
480 if (!inode)
481 return NULL;
482
483 sock = SOCKET_I(inode);
484
89bddce5 485 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
486 inode->i_uid = current->fsuid;
487 inode->i_gid = current->fsgid;
488
489 get_cpu_var(sockets_in_use)++;
490 put_cpu_var(sockets_in_use);
491 return sock;
492}
493
494/*
495 * In theory you can't get an open on this inode, but /proc provides
496 * a back door. Remember to keep it shut otherwise you'll let the
497 * creepy crawlies in.
498 */
89bddce5 499
1da177e4
LT
500static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
501{
502 return -ENXIO;
503}
504
4b6f5d20 505const struct file_operations bad_sock_fops = {
1da177e4
LT
506 .owner = THIS_MODULE,
507 .open = sock_no_open,
508};
509
510/**
511 * sock_release - close a socket
512 * @sock: socket to close
513 *
514 * The socket is released from the protocol stack if it has a release
515 * callback, and the inode is then released if the socket is bound to
89bddce5 516 * an inode not a file.
1da177e4 517 */
89bddce5 518
1da177e4
LT
519void sock_release(struct socket *sock)
520{
521 if (sock->ops) {
522 struct module *owner = sock->ops->owner;
523
524 sock->ops->release(sock);
525 sock->ops = NULL;
526 module_put(owner);
527 }
528
529 if (sock->fasync_list)
530 printk(KERN_ERR "sock_release: fasync list not empty!\n");
531
532 get_cpu_var(sockets_in_use)--;
533 put_cpu_var(sockets_in_use);
534 if (!sock->file) {
535 iput(SOCK_INODE(sock));
536 return;
537 }
89bddce5 538 sock->file = NULL;
1da177e4
LT
539}
540
89bddce5 541static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
542 struct msghdr *msg, size_t size)
543{
544 struct sock_iocb *si = kiocb_to_siocb(iocb);
545 int err;
546
547 si->sock = sock;
548 si->scm = NULL;
549 si->msg = msg;
550 si->size = size;
551
552 err = security_socket_sendmsg(sock, msg, size);
553 if (err)
554 return err;
555
556 return sock->ops->sendmsg(iocb, sock, msg, size);
557}
558
559int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
560{
561 struct kiocb iocb;
562 struct sock_iocb siocb;
563 int ret;
564
565 init_sync_kiocb(&iocb, NULL);
566 iocb.private = &siocb;
567 ret = __sock_sendmsg(&iocb, sock, msg, size);
568 if (-EIOCBQUEUED == ret)
569 ret = wait_on_sync_kiocb(&iocb);
570 return ret;
571}
572
573int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
574 struct kvec *vec, size_t num, size_t size)
575{
576 mm_segment_t oldfs = get_fs();
577 int result;
578
579 set_fs(KERNEL_DS);
580 /*
581 * the following is safe, since for compiler definitions of kvec and
582 * iovec are identical, yielding the same in-core layout and alignment
583 */
89bddce5 584 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
585 msg->msg_iovlen = num;
586 result = sock_sendmsg(sock, msg, size);
587 set_fs(oldfs);
588 return result;
589}
590
92f37fd2
ED
591/*
592 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
593 */
594void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
595 struct sk_buff *skb)
596{
597 ktime_t kt = skb->tstamp;
598
599 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
600 struct timeval tv;
601 /* Race occurred between timestamp enabling and packet
602 receiving. Fill in the current time for now. */
603 if (kt.tv64 == 0)
604 kt = ktime_get_real();
605 skb->tstamp = kt;
606 tv = ktime_to_timeval(kt);
607 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
608 } else {
609 struct timespec ts;
610 /* Race occurred between timestamp enabling and packet
611 receiving. Fill in the current time for now. */
612 if (kt.tv64 == 0)
613 kt = ktime_get_real();
614 skb->tstamp = kt;
615 ts = ktime_to_timespec(kt);
616 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
617 }
618}
619
7c81fd8b
ACM
620EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
621
89bddce5 622static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
623 struct msghdr *msg, size_t size, int flags)
624{
625 int err;
626 struct sock_iocb *si = kiocb_to_siocb(iocb);
627
628 si->sock = sock;
629 si->scm = NULL;
630 si->msg = msg;
631 si->size = size;
632 si->flags = flags;
633
634 err = security_socket_recvmsg(sock, msg, size, flags);
635 if (err)
636 return err;
637
638 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
639}
640
89bddce5 641int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
642 size_t size, int flags)
643{
644 struct kiocb iocb;
645 struct sock_iocb siocb;
646 int ret;
647
89bddce5 648 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
649 iocb.private = &siocb;
650 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
651 if (-EIOCBQUEUED == ret)
652 ret = wait_on_sync_kiocb(&iocb);
653 return ret;
654}
655
89bddce5
SH
656int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
657 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
658{
659 mm_segment_t oldfs = get_fs();
660 int result;
661
662 set_fs(KERNEL_DS);
663 /*
664 * the following is safe, since for compiler definitions of kvec and
665 * iovec are identical, yielding the same in-core layout and alignment
666 */
89bddce5 667 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
668 result = sock_recvmsg(sock, msg, size, flags);
669 set_fs(oldfs);
670 return result;
671}
672
673static void sock_aio_dtor(struct kiocb *iocb)
674{
675 kfree(iocb->private);
676}
677
ce1d4d3e
CH
678static ssize_t sock_sendpage(struct file *file, struct page *page,
679 int offset, size_t size, loff_t *ppos, int more)
1da177e4 680{
1da177e4
LT
681 struct socket *sock;
682 int flags;
683
ce1d4d3e
CH
684 sock = file->private_data;
685
686 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
687 if (more)
688 flags |= MSG_MORE;
689
690 return sock->ops->sendpage(sock, page, offset, size, flags);
691}
1da177e4 692
ce1d4d3e 693static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 694 struct sock_iocb *siocb)
ce1d4d3e
CH
695{
696 if (!is_sync_kiocb(iocb)) {
697 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
698 if (!siocb)
699 return NULL;
1da177e4
LT
700 iocb->ki_dtor = sock_aio_dtor;
701 }
1da177e4 702
ce1d4d3e 703 siocb->kiocb = iocb;
ce1d4d3e
CH
704 iocb->private = siocb;
705 return siocb;
1da177e4
LT
706}
707
ce1d4d3e 708static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
709 struct file *file, const struct iovec *iov,
710 unsigned long nr_segs)
ce1d4d3e
CH
711{
712 struct socket *sock = file->private_data;
713 size_t size = 0;
714 int i;
1da177e4 715
89bddce5
SH
716 for (i = 0; i < nr_segs; i++)
717 size += iov[i].iov_len;
1da177e4 718
ce1d4d3e
CH
719 msg->msg_name = NULL;
720 msg->msg_namelen = 0;
721 msg->msg_control = NULL;
722 msg->msg_controllen = 0;
89bddce5 723 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
724 msg->msg_iovlen = nr_segs;
725 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
726
727 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
728}
729
027445c3
BP
730static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
731 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
732{
733 struct sock_iocb siocb, *x;
734
1da177e4
LT
735 if (pos != 0)
736 return -ESPIPE;
027445c3
BP
737
738 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
739 return 0;
740
027445c3
BP
741
742 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
743 if (!x)
744 return -ENOMEM;
027445c3 745 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
746}
747
ce1d4d3e 748static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
749 struct file *file, const struct iovec *iov,
750 unsigned long nr_segs)
1da177e4 751{
ce1d4d3e
CH
752 struct socket *sock = file->private_data;
753 size_t size = 0;
754 int i;
1da177e4 755
89bddce5
SH
756 for (i = 0; i < nr_segs; i++)
757 size += iov[i].iov_len;
1da177e4 758
ce1d4d3e
CH
759 msg->msg_name = NULL;
760 msg->msg_namelen = 0;
761 msg->msg_control = NULL;
762 msg->msg_controllen = 0;
89bddce5 763 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
764 msg->msg_iovlen = nr_segs;
765 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
766 if (sock->type == SOCK_SEQPACKET)
767 msg->msg_flags |= MSG_EOR;
1da177e4 768
ce1d4d3e 769 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
770}
771
027445c3
BP
772static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
773 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
774{
775 struct sock_iocb siocb, *x;
1da177e4 776
ce1d4d3e
CH
777 if (pos != 0)
778 return -ESPIPE;
027445c3 779
027445c3 780 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
781 if (!x)
782 return -ENOMEM;
1da177e4 783
027445c3 784 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
785}
786
1da177e4
LT
787/*
788 * Atomic setting of ioctl hooks to avoid race
789 * with module unload.
790 */
791
4a3e2f71 792static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 793static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 794
89bddce5 795void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 796{
4a3e2f71 797 mutex_lock(&br_ioctl_mutex);
1da177e4 798 br_ioctl_hook = hook;
4a3e2f71 799 mutex_unlock(&br_ioctl_mutex);
1da177e4 800}
89bddce5 801
1da177e4
LT
802EXPORT_SYMBOL(brioctl_set);
803
4a3e2f71 804static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 805static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 806
89bddce5 807void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 808{
4a3e2f71 809 mutex_lock(&vlan_ioctl_mutex);
1da177e4 810 vlan_ioctl_hook = hook;
4a3e2f71 811 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 812}
89bddce5 813
1da177e4
LT
814EXPORT_SYMBOL(vlan_ioctl_set);
815
4a3e2f71 816static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 817static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 818
89bddce5 819void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 820{
4a3e2f71 821 mutex_lock(&dlci_ioctl_mutex);
1da177e4 822 dlci_ioctl_hook = hook;
4a3e2f71 823 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 824}
89bddce5 825
1da177e4
LT
826EXPORT_SYMBOL(dlci_ioctl_set);
827
828/*
829 * With an ioctl, arg may well be a user mode pointer, but we don't know
830 * what to do with it - that's up to the protocol still.
831 */
832
833static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
834{
835 struct socket *sock;
836 void __user *argp = (void __user *)arg;
837 int pid, err;
838
b69aee04 839 sock = file->private_data;
1da177e4
LT
840 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
841 err = dev_ioctl(cmd, argp);
842 } else
d86b5e0e 843#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
844 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
845 err = dev_ioctl(cmd, argp);
846 } else
89bddce5
SH
847#endif /* CONFIG_WIRELESS_EXT */
848 switch (cmd) {
1da177e4
LT
849 case FIOSETOWN:
850 case SIOCSPGRP:
851 err = -EFAULT;
852 if (get_user(pid, (int __user *)argp))
853 break;
854 err = f_setown(sock->file, pid, 1);
855 break;
856 case FIOGETOWN:
857 case SIOCGPGRP:
609d7fa9 858 err = put_user(f_getown(sock->file),
89bddce5 859 (int __user *)argp);
1da177e4
LT
860 break;
861 case SIOCGIFBR:
862 case SIOCSIFBR:
863 case SIOCBRADDBR:
864 case SIOCBRDELBR:
865 err = -ENOPKG;
866 if (!br_ioctl_hook)
867 request_module("bridge");
868
4a3e2f71 869 mutex_lock(&br_ioctl_mutex);
89bddce5 870 if (br_ioctl_hook)
1da177e4 871 err = br_ioctl_hook(cmd, argp);
4a3e2f71 872 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
873 break;
874 case SIOCGIFVLAN:
875 case SIOCSIFVLAN:
876 err = -ENOPKG;
877 if (!vlan_ioctl_hook)
878 request_module("8021q");
879
4a3e2f71 880 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
881 if (vlan_ioctl_hook)
882 err = vlan_ioctl_hook(argp);
4a3e2f71 883 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 884 break;
1da177e4
LT
885 case SIOCADDDLCI:
886 case SIOCDELDLCI:
887 err = -ENOPKG;
888 if (!dlci_ioctl_hook)
889 request_module("dlci");
890
891 if (dlci_ioctl_hook) {
4a3e2f71 892 mutex_lock(&dlci_ioctl_mutex);
1da177e4 893 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 894 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
895 }
896 break;
897 default:
898 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
899
900 /*
901 * If this ioctl is unknown try to hand it down
902 * to the NIC driver.
903 */
904 if (err == -ENOIOCTLCMD)
905 err = dev_ioctl(cmd, argp);
1da177e4 906 break;
89bddce5 907 }
1da177e4
LT
908 return err;
909}
910
911int sock_create_lite(int family, int type, int protocol, struct socket **res)
912{
913 int err;
914 struct socket *sock = NULL;
89bddce5 915
1da177e4
LT
916 err = security_socket_create(family, type, protocol, 1);
917 if (err)
918 goto out;
919
920 sock = sock_alloc();
921 if (!sock) {
922 err = -ENOMEM;
923 goto out;
924 }
925
1da177e4 926 sock->type = type;
7420ed23
VY
927 err = security_socket_post_create(sock, family, type, protocol, 1);
928 if (err)
929 goto out_release;
930
1da177e4
LT
931out:
932 *res = sock;
933 return err;
7420ed23
VY
934out_release:
935 sock_release(sock);
936 sock = NULL;
937 goto out;
1da177e4
LT
938}
939
940/* No kernel lock held - perfect */
89bddce5 941static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
942{
943 struct socket *sock;
944
945 /*
89bddce5 946 * We can't return errors to poll, so it's either yes or no.
1da177e4 947 */
b69aee04 948 sock = file->private_data;
1da177e4
LT
949 return sock->ops->poll(file, sock, wait);
950}
951
89bddce5 952static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 953{
b69aee04 954 struct socket *sock = file->private_data;
1da177e4
LT
955
956 return sock->ops->mmap(file, sock, vma);
957}
958
20380731 959static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
960{
961 /*
89bddce5
SH
962 * It was possible the inode is NULL we were
963 * closing an unfinished socket.
1da177e4
LT
964 */
965
89bddce5 966 if (!inode) {
1da177e4
LT
967 printk(KERN_DEBUG "sock_close: NULL inode\n");
968 return 0;
969 }
970 sock_fasync(-1, filp, 0);
971 sock_release(SOCKET_I(inode));
972 return 0;
973}
974
975/*
976 * Update the socket async list
977 *
978 * Fasync_list locking strategy.
979 *
980 * 1. fasync_list is modified only under process context socket lock
981 * i.e. under semaphore.
982 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
983 * or under socket lock.
984 * 3. fasync_list can be used from softirq context, so that
985 * modification under socket lock have to be enhanced with
986 * write_lock_bh(&sk->sk_callback_lock).
987 * --ANK (990710)
988 */
989
990static int sock_fasync(int fd, struct file *filp, int on)
991{
89bddce5 992 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
993 struct socket *sock;
994 struct sock *sk;
995
89bddce5 996 if (on) {
8b3a7005 997 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 998 if (fna == NULL)
1da177e4
LT
999 return -ENOMEM;
1000 }
1001
b69aee04 1002 sock = filp->private_data;
1da177e4 1003
89bddce5
SH
1004 sk = sock->sk;
1005 if (sk == NULL) {
1da177e4
LT
1006 kfree(fna);
1007 return -EINVAL;
1008 }
1009
1010 lock_sock(sk);
1011
89bddce5 1012 prev = &(sock->fasync_list);
1da177e4 1013
89bddce5
SH
1014 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1015 if (fa->fa_file == filp)
1da177e4
LT
1016 break;
1017
89bddce5
SH
1018 if (on) {
1019 if (fa != NULL) {
1da177e4 1020 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1021 fa->fa_fd = fd;
1da177e4
LT
1022 write_unlock_bh(&sk->sk_callback_lock);
1023
1024 kfree(fna);
1025 goto out;
1026 }
89bddce5
SH
1027 fna->fa_file = filp;
1028 fna->fa_fd = fd;
1029 fna->magic = FASYNC_MAGIC;
1030 fna->fa_next = sock->fasync_list;
1da177e4 1031 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1032 sock->fasync_list = fna;
1da177e4 1033 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1034 } else {
1035 if (fa != NULL) {
1da177e4 1036 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1037 *prev = fa->fa_next;
1da177e4
LT
1038 write_unlock_bh(&sk->sk_callback_lock);
1039 kfree(fa);
1040 }
1041 }
1042
1043out:
1044 release_sock(sock->sk);
1045 return 0;
1046}
1047
1048/* This function may be called only under socket lock or callback_lock */
1049
1050int sock_wake_async(struct socket *sock, int how, int band)
1051{
1052 if (!sock || !sock->fasync_list)
1053 return -1;
89bddce5 1054 switch (how) {
1da177e4 1055 case 1:
89bddce5 1056
1da177e4
LT
1057 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1058 break;
1059 goto call_kill;
1060 case 2:
1061 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1062 break;
1063 /* fall through */
1064 case 0:
89bddce5 1065call_kill:
1da177e4
LT
1066 __kill_fasync(sock->fasync_list, SIGIO, band);
1067 break;
1068 case 3:
1069 __kill_fasync(sock->fasync_list, SIGURG, band);
1070 }
1071 return 0;
1072}
1073
89bddce5
SH
1074static int __sock_create(int family, int type, int protocol,
1075 struct socket **res, int kern)
1da177e4
LT
1076{
1077 int err;
1078 struct socket *sock;
55737fda 1079 const struct net_proto_family *pf;
1da177e4
LT
1080
1081 /*
89bddce5 1082 * Check protocol is in range
1da177e4
LT
1083 */
1084 if (family < 0 || family >= NPROTO)
1085 return -EAFNOSUPPORT;
1086 if (type < 0 || type >= SOCK_MAX)
1087 return -EINVAL;
1088
1089 /* Compatibility.
1090
1091 This uglymoron is moved from INET layer to here to avoid
1092 deadlock in module load.
1093 */
1094 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1095 static int warned;
1da177e4
LT
1096 if (!warned) {
1097 warned = 1;
89bddce5
SH
1098 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1099 current->comm);
1da177e4
LT
1100 }
1101 family = PF_PACKET;
1102 }
1103
1104 err = security_socket_create(family, type, protocol, kern);
1105 if (err)
1106 return err;
89bddce5 1107
55737fda
SH
1108 /*
1109 * Allocate the socket and allow the family to set things up. if
1110 * the protocol is 0, the family is instructed to select an appropriate
1111 * default.
1112 */
1113 sock = sock_alloc();
1114 if (!sock) {
1115 if (net_ratelimit())
1116 printk(KERN_WARNING "socket: no more sockets\n");
1117 return -ENFILE; /* Not exactly a match, but its the
1118 closest posix thing */
1119 }
1120
1121 sock->type = type;
1122
1da177e4 1123#if defined(CONFIG_KMOD)
89bddce5
SH
1124 /* Attempt to load a protocol module if the find failed.
1125 *
1126 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1127 * requested real, full-featured networking support upon configuration.
1128 * Otherwise module support will break!
1129 */
55737fda 1130 if (net_families[family] == NULL)
89bddce5 1131 request_module("net-pf-%d", family);
1da177e4
LT
1132#endif
1133
55737fda
SH
1134 rcu_read_lock();
1135 pf = rcu_dereference(net_families[family]);
1136 err = -EAFNOSUPPORT;
1137 if (!pf)
1138 goto out_release;
1da177e4
LT
1139
1140 /*
1141 * We will call the ->create function, that possibly is in a loadable
1142 * module, so we have to bump that loadable module refcnt first.
1143 */
55737fda 1144 if (!try_module_get(pf->owner))
1da177e4
LT
1145 goto out_release;
1146
55737fda
SH
1147 /* Now protected by module ref count */
1148 rcu_read_unlock();
1149
1150 err = pf->create(sock, protocol);
1151 if (err < 0)
1da177e4 1152 goto out_module_put;
a79af59e 1153
1da177e4
LT
1154 /*
1155 * Now to bump the refcnt of the [loadable] module that owns this
1156 * socket at sock_release time we decrement its refcnt.
1157 */
55737fda
SH
1158 if (!try_module_get(sock->ops->owner))
1159 goto out_module_busy;
1160
1da177e4
LT
1161 /*
1162 * Now that we're done with the ->create function, the [loadable]
1163 * module can have its refcnt decremented
1164 */
55737fda 1165 module_put(pf->owner);
7420ed23
VY
1166 err = security_socket_post_create(sock, family, type, protocol, kern);
1167 if (err)
3b185525 1168 goto out_sock_release;
55737fda 1169 *res = sock;
1da177e4 1170
55737fda
SH
1171 return 0;
1172
1173out_module_busy:
1174 err = -EAFNOSUPPORT;
1da177e4 1175out_module_put:
55737fda
SH
1176 sock->ops = NULL;
1177 module_put(pf->owner);
1178out_sock_release:
1da177e4 1179 sock_release(sock);
55737fda
SH
1180 return err;
1181
1182out_release:
1183 rcu_read_unlock();
1184 goto out_sock_release;
1da177e4
LT
1185}
1186
1187int sock_create(int family, int type, int protocol, struct socket **res)
1188{
1189 return __sock_create(family, type, protocol, res, 0);
1190}
1191
1192int sock_create_kern(int family, int type, int protocol, struct socket **res)
1193{
1194 return __sock_create(family, type, protocol, res, 1);
1195}
1196
1197asmlinkage long sys_socket(int family, int type, int protocol)
1198{
1199 int retval;
1200 struct socket *sock;
1201
1202 retval = sock_create(family, type, protocol, &sock);
1203 if (retval < 0)
1204 goto out;
1205
1206 retval = sock_map_fd(sock);
1207 if (retval < 0)
1208 goto out_release;
1209
1210out:
1211 /* It may be already another descriptor 8) Not kernel problem. */
1212 return retval;
1213
1214out_release:
1215 sock_release(sock);
1216 return retval;
1217}
1218
1219/*
1220 * Create a pair of connected sockets.
1221 */
1222
89bddce5
SH
1223asmlinkage long sys_socketpair(int family, int type, int protocol,
1224 int __user *usockvec)
1da177e4
LT
1225{
1226 struct socket *sock1, *sock2;
1227 int fd1, fd2, err;
db349509 1228 struct file *newfile1, *newfile2;
1da177e4
LT
1229
1230 /*
1231 * Obtain the first socket and check if the underlying protocol
1232 * supports the socketpair call.
1233 */
1234
1235 err = sock_create(family, type, protocol, &sock1);
1236 if (err < 0)
1237 goto out;
1238
1239 err = sock_create(family, type, protocol, &sock2);
1240 if (err < 0)
1241 goto out_release_1;
1242
1243 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1244 if (err < 0)
1da177e4
LT
1245 goto out_release_both;
1246
db349509
AV
1247 fd1 = sock_alloc_fd(&newfile1);
1248 if (unlikely(fd1 < 0))
1249 goto out_release_both;
1da177e4 1250
db349509
AV
1251 fd2 = sock_alloc_fd(&newfile2);
1252 if (unlikely(fd2 < 0)) {
1253 put_filp(newfile1);
1254 put_unused_fd(fd1);
1da177e4 1255 goto out_release_both;
db349509 1256 }
1da177e4 1257
db349509
AV
1258 err = sock_attach_fd(sock1, newfile1);
1259 if (unlikely(err < 0)) {
1260 goto out_fd2;
1261 }
1262
1263 err = sock_attach_fd(sock2, newfile2);
1264 if (unlikely(err < 0)) {
1265 fput(newfile1);
1266 goto out_fd1;
1267 }
1268
1269 err = audit_fd_pair(fd1, fd2);
1270 if (err < 0) {
1271 fput(newfile1);
1272 fput(newfile2);
1273 goto out_fd;
1274 }
1da177e4 1275
db349509
AV
1276 fd_install(fd1, newfile1);
1277 fd_install(fd2, newfile2);
1da177e4
LT
1278 /* fd1 and fd2 may be already another descriptors.
1279 * Not kernel problem.
1280 */
1281
89bddce5 1282 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1283 if (!err)
1284 err = put_user(fd2, &usockvec[1]);
1285 if (!err)
1286 return 0;
1287
1288 sys_close(fd2);
1289 sys_close(fd1);
1290 return err;
1291
1da177e4 1292out_release_both:
89bddce5 1293 sock_release(sock2);
1da177e4 1294out_release_1:
89bddce5 1295 sock_release(sock1);
1da177e4
LT
1296out:
1297 return err;
db349509
AV
1298
1299out_fd2:
1300 put_filp(newfile1);
1301 sock_release(sock1);
1302out_fd1:
1303 put_filp(newfile2);
1304 sock_release(sock2);
1305out_fd:
1306 put_unused_fd(fd1);
1307 put_unused_fd(fd2);
1308 goto out;
1da177e4
LT
1309}
1310
1da177e4
LT
1311/*
1312 * Bind a name to a socket. Nothing much to do here since it's
1313 * the protocol's responsibility to handle the local address.
1314 *
1315 * We move the socket address to kernel space before we call
1316 * the protocol layer (having also checked the address is ok).
1317 */
1318
1319asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1320{
1321 struct socket *sock;
1322 char address[MAX_SOCK_ADDR];
6cb153ca 1323 int err, fput_needed;
1da177e4 1324
89bddce5 1325 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1326 if (sock) {
89bddce5
SH
1327 err = move_addr_to_kernel(umyaddr, addrlen, address);
1328 if (err >= 0) {
1329 err = security_socket_bind(sock,
1330 (struct sockaddr *)address,
1331 addrlen);
6cb153ca
BL
1332 if (!err)
1333 err = sock->ops->bind(sock,
89bddce5
SH
1334 (struct sockaddr *)
1335 address, addrlen);
1da177e4 1336 }
6cb153ca 1337 fput_light(sock->file, fput_needed);
89bddce5 1338 }
1da177e4
LT
1339 return err;
1340}
1341
1da177e4
LT
1342/*
1343 * Perform a listen. Basically, we allow the protocol to do anything
1344 * necessary for a listen, and if that works, we mark the socket as
1345 * ready for listening.
1346 */
1347
7a42c217 1348int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1349
1350asmlinkage long sys_listen(int fd, int backlog)
1351{
1352 struct socket *sock;
6cb153ca 1353 int err, fput_needed;
89bddce5
SH
1354
1355 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1356 if (sock) {
1357 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1358 backlog = sysctl_somaxconn;
1359
1360 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1361 if (!err)
1362 err = sock->ops->listen(sock, backlog);
1da177e4 1363
6cb153ca 1364 fput_light(sock->file, fput_needed);
1da177e4
LT
1365 }
1366 return err;
1367}
1368
1da177e4
LT
1369/*
1370 * For accept, we attempt to create a new socket, set up the link
1371 * with the client, wake up the client, then return the new
1372 * connected fd. We collect the address of the connector in kernel
1373 * space and move it to user at the very end. This is unclean because
1374 * we open the socket then return an error.
1375 *
1376 * 1003.1g adds the ability to recvmsg() to query connection pending
1377 * status to recvmsg. We need to add that support in a way thats
1378 * clean when we restucture accept also.
1379 */
1380
89bddce5
SH
1381asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1382 int __user *upeer_addrlen)
1da177e4
LT
1383{
1384 struct socket *sock, *newsock;
39d8c1b6 1385 struct file *newfile;
6cb153ca 1386 int err, len, newfd, fput_needed;
1da177e4
LT
1387 char address[MAX_SOCK_ADDR];
1388
6cb153ca 1389 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1390 if (!sock)
1391 goto out;
1392
1393 err = -ENFILE;
89bddce5 1394 if (!(newsock = sock_alloc()))
1da177e4
LT
1395 goto out_put;
1396
1397 newsock->type = sock->type;
1398 newsock->ops = sock->ops;
1399
1da177e4
LT
1400 /*
1401 * We don't need try_module_get here, as the listening socket (sock)
1402 * has the protocol module (sock->ops->owner) held.
1403 */
1404 __module_get(newsock->ops->owner);
1405
39d8c1b6
DM
1406 newfd = sock_alloc_fd(&newfile);
1407 if (unlikely(newfd < 0)) {
1408 err = newfd;
9a1875e6
DM
1409 sock_release(newsock);
1410 goto out_put;
39d8c1b6
DM
1411 }
1412
1413 err = sock_attach_fd(newsock, newfile);
1414 if (err < 0)
79f4f642 1415 goto out_fd_simple;
39d8c1b6 1416
a79af59e
FF
1417 err = security_socket_accept(sock, newsock);
1418 if (err)
39d8c1b6 1419 goto out_fd;
a79af59e 1420
1da177e4
LT
1421 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1422 if (err < 0)
39d8c1b6 1423 goto out_fd;
1da177e4
LT
1424
1425 if (upeer_sockaddr) {
89bddce5
SH
1426 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1427 &len, 2) < 0) {
1da177e4 1428 err = -ECONNABORTED;
39d8c1b6 1429 goto out_fd;
1da177e4 1430 }
89bddce5
SH
1431 err = move_addr_to_user(address, len, upeer_sockaddr,
1432 upeer_addrlen);
1da177e4 1433 if (err < 0)
39d8c1b6 1434 goto out_fd;
1da177e4
LT
1435 }
1436
1437 /* File flags are not inherited via accept() unlike another OSes. */
1438
39d8c1b6
DM
1439 fd_install(newfd, newfile);
1440 err = newfd;
1da177e4
LT
1441
1442 security_socket_post_accept(sock, newsock);
1443
1444out_put:
6cb153ca 1445 fput_light(sock->file, fput_needed);
1da177e4
LT
1446out:
1447 return err;
79f4f642
AD
1448out_fd_simple:
1449 sock_release(newsock);
1450 put_filp(newfile);
1451 put_unused_fd(newfd);
1452 goto out_put;
39d8c1b6 1453out_fd:
9606a216 1454 fput(newfile);
39d8c1b6 1455 put_unused_fd(newfd);
1da177e4
LT
1456 goto out_put;
1457}
1458
1da177e4
LT
1459/*
1460 * Attempt to connect to a socket with the server address. The address
1461 * is in user space so we verify it is OK and move it to kernel space.
1462 *
1463 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1464 * break bindings
1465 *
1466 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1467 * other SEQPACKET protocols that take time to connect() as it doesn't
1468 * include the -EINPROGRESS status for such sockets.
1469 */
1470
89bddce5
SH
1471asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1472 int addrlen)
1da177e4
LT
1473{
1474 struct socket *sock;
1475 char address[MAX_SOCK_ADDR];
6cb153ca 1476 int err, fput_needed;
1da177e4 1477
6cb153ca 1478 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1479 if (!sock)
1480 goto out;
1481 err = move_addr_to_kernel(uservaddr, addrlen, address);
1482 if (err < 0)
1483 goto out_put;
1484
89bddce5
SH
1485 err =
1486 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1487 if (err)
1488 goto out_put;
1489
89bddce5 1490 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1491 sock->file->f_flags);
1492out_put:
6cb153ca 1493 fput_light(sock->file, fput_needed);
1da177e4
LT
1494out:
1495 return err;
1496}
1497
1498/*
1499 * Get the local address ('name') of a socket object. Move the obtained
1500 * name to user space.
1501 */
1502
89bddce5
SH
1503asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1504 int __user *usockaddr_len)
1da177e4
LT
1505{
1506 struct socket *sock;
1507 char address[MAX_SOCK_ADDR];
6cb153ca 1508 int len, err, fput_needed;
89bddce5 1509
6cb153ca 1510 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1511 if (!sock)
1512 goto out;
1513
1514 err = security_socket_getsockname(sock);
1515 if (err)
1516 goto out_put;
1517
1518 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1519 if (err)
1520 goto out_put;
1521 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1522
1523out_put:
6cb153ca 1524 fput_light(sock->file, fput_needed);
1da177e4
LT
1525out:
1526 return err;
1527}
1528
1529/*
1530 * Get the remote address ('name') of a socket object. Move the obtained
1531 * name to user space.
1532 */
1533
89bddce5
SH
1534asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1535 int __user *usockaddr_len)
1da177e4
LT
1536{
1537 struct socket *sock;
1538 char address[MAX_SOCK_ADDR];
6cb153ca 1539 int len, err, fput_needed;
1da177e4 1540
89bddce5
SH
1541 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1542 if (sock != NULL) {
1da177e4
LT
1543 err = security_socket_getpeername(sock);
1544 if (err) {
6cb153ca 1545 fput_light(sock->file, fput_needed);
1da177e4
LT
1546 return err;
1547 }
1548
89bddce5
SH
1549 err =
1550 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1551 1);
1da177e4 1552 if (!err)
89bddce5
SH
1553 err = move_addr_to_user(address, len, usockaddr,
1554 usockaddr_len);
6cb153ca 1555 fput_light(sock->file, fput_needed);
1da177e4
LT
1556 }
1557 return err;
1558}
1559
1560/*
1561 * Send a datagram to a given address. We move the address into kernel
1562 * space and check the user space data area is readable before invoking
1563 * the protocol.
1564 */
1565
89bddce5
SH
1566asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1567 unsigned flags, struct sockaddr __user *addr,
1568 int addr_len)
1da177e4
LT
1569{
1570 struct socket *sock;
1571 char address[MAX_SOCK_ADDR];
1572 int err;
1573 struct msghdr msg;
1574 struct iovec iov;
6cb153ca
BL
1575 int fput_needed;
1576 struct file *sock_file;
1577
1578 sock_file = fget_light(fd, &fput_needed);
4387ff75 1579 err = -EBADF;
6cb153ca 1580 if (!sock_file)
4387ff75 1581 goto out;
6cb153ca
BL
1582
1583 sock = sock_from_file(sock_file, &err);
1da177e4 1584 if (!sock)
6cb153ca 1585 goto out_put;
89bddce5
SH
1586 iov.iov_base = buff;
1587 iov.iov_len = len;
1588 msg.msg_name = NULL;
1589 msg.msg_iov = &iov;
1590 msg.msg_iovlen = 1;
1591 msg.msg_control = NULL;
1592 msg.msg_controllen = 0;
1593 msg.msg_namelen = 0;
6cb153ca 1594 if (addr) {
1da177e4
LT
1595 err = move_addr_to_kernel(addr, addr_len, address);
1596 if (err < 0)
1597 goto out_put;
89bddce5
SH
1598 msg.msg_name = address;
1599 msg.msg_namelen = addr_len;
1da177e4
LT
1600 }
1601 if (sock->file->f_flags & O_NONBLOCK)
1602 flags |= MSG_DONTWAIT;
1603 msg.msg_flags = flags;
1604 err = sock_sendmsg(sock, &msg, len);
1605
89bddce5 1606out_put:
6cb153ca 1607 fput_light(sock_file, fput_needed);
4387ff75 1608out:
1da177e4
LT
1609 return err;
1610}
1611
1612/*
89bddce5 1613 * Send a datagram down a socket.
1da177e4
LT
1614 */
1615
89bddce5 1616asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1617{
1618 return sys_sendto(fd, buff, len, flags, NULL, 0);
1619}
1620
1621/*
89bddce5 1622 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1623 * sender. We verify the buffers are writable and if needed move the
1624 * sender address from kernel to user space.
1625 */
1626
89bddce5
SH
1627asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1628 unsigned flags, struct sockaddr __user *addr,
1629 int __user *addr_len)
1da177e4
LT
1630{
1631 struct socket *sock;
1632 struct iovec iov;
1633 struct msghdr msg;
1634 char address[MAX_SOCK_ADDR];
89bddce5 1635 int err, err2;
6cb153ca
BL
1636 struct file *sock_file;
1637 int fput_needed;
1638
1639 sock_file = fget_light(fd, &fput_needed);
4387ff75 1640 err = -EBADF;
6cb153ca 1641 if (!sock_file)
4387ff75 1642 goto out;
1da177e4 1643
6cb153ca 1644 sock = sock_from_file(sock_file, &err);
1da177e4 1645 if (!sock)
4387ff75 1646 goto out_put;
1da177e4 1647
89bddce5
SH
1648 msg.msg_control = NULL;
1649 msg.msg_controllen = 0;
1650 msg.msg_iovlen = 1;
1651 msg.msg_iov = &iov;
1652 iov.iov_len = size;
1653 iov.iov_base = ubuf;
1654 msg.msg_name = address;
1655 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1656 if (sock->file->f_flags & O_NONBLOCK)
1657 flags |= MSG_DONTWAIT;
89bddce5 1658 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1659
89bddce5
SH
1660 if (err >= 0 && addr != NULL) {
1661 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1662 if (err2 < 0)
1663 err = err2;
1da177e4 1664 }
4387ff75 1665out_put:
6cb153ca 1666 fput_light(sock_file, fput_needed);
4387ff75 1667out:
1da177e4
LT
1668 return err;
1669}
1670
1671/*
89bddce5 1672 * Receive a datagram from a socket.
1da177e4
LT
1673 */
1674
89bddce5
SH
1675asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1676 unsigned flags)
1da177e4
LT
1677{
1678 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1679}
1680
1681/*
1682 * Set a socket option. Because we don't know the option lengths we have
1683 * to pass the user mode parameter for the protocols to sort out.
1684 */
1685
89bddce5
SH
1686asmlinkage long sys_setsockopt(int fd, int level, int optname,
1687 char __user *optval, int optlen)
1da177e4 1688{
6cb153ca 1689 int err, fput_needed;
1da177e4
LT
1690 struct socket *sock;
1691
1692 if (optlen < 0)
1693 return -EINVAL;
89bddce5
SH
1694
1695 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1696 if (sock != NULL) {
1697 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1698 if (err)
1699 goto out_put;
1da177e4
LT
1700
1701 if (level == SOL_SOCKET)
89bddce5
SH
1702 err =
1703 sock_setsockopt(sock, level, optname, optval,
1704 optlen);
1da177e4 1705 else
89bddce5
SH
1706 err =
1707 sock->ops->setsockopt(sock, level, optname, optval,
1708 optlen);
6cb153ca
BL
1709out_put:
1710 fput_light(sock->file, fput_needed);
1da177e4
LT
1711 }
1712 return err;
1713}
1714
1715/*
1716 * Get a socket option. Because we don't know the option lengths we have
1717 * to pass a user mode parameter for the protocols to sort out.
1718 */
1719
89bddce5
SH
1720asmlinkage long sys_getsockopt(int fd, int level, int optname,
1721 char __user *optval, int __user *optlen)
1da177e4 1722{
6cb153ca 1723 int err, fput_needed;
1da177e4
LT
1724 struct socket *sock;
1725
89bddce5
SH
1726 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1727 if (sock != NULL) {
6cb153ca
BL
1728 err = security_socket_getsockopt(sock, level, optname);
1729 if (err)
1730 goto out_put;
1da177e4
LT
1731
1732 if (level == SOL_SOCKET)
89bddce5
SH
1733 err =
1734 sock_getsockopt(sock, level, optname, optval,
1735 optlen);
1da177e4 1736 else
89bddce5
SH
1737 err =
1738 sock->ops->getsockopt(sock, level, optname, optval,
1739 optlen);
6cb153ca
BL
1740out_put:
1741 fput_light(sock->file, fput_needed);
1da177e4
LT
1742 }
1743 return err;
1744}
1745
1da177e4
LT
1746/*
1747 * Shutdown a socket.
1748 */
1749
1750asmlinkage long sys_shutdown(int fd, int how)
1751{
6cb153ca 1752 int err, fput_needed;
1da177e4
LT
1753 struct socket *sock;
1754
89bddce5
SH
1755 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1756 if (sock != NULL) {
1da177e4 1757 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1758 if (!err)
1759 err = sock->ops->shutdown(sock, how);
1760 fput_light(sock->file, fput_needed);
1da177e4
LT
1761 }
1762 return err;
1763}
1764
89bddce5 1765/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1766 * fields which are the same type (int / unsigned) on our platforms.
1767 */
1768#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1769#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1770#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1771
1da177e4
LT
1772/*
1773 * BSD sendmsg interface
1774 */
1775
1776asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1777{
89bddce5
SH
1778 struct compat_msghdr __user *msg_compat =
1779 (struct compat_msghdr __user *)msg;
1da177e4
LT
1780 struct socket *sock;
1781 char address[MAX_SOCK_ADDR];
1782 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1783 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1784 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1785 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1786 unsigned char *ctl_buf = ctl;
1787 struct msghdr msg_sys;
1788 int err, ctl_len, iov_size, total_len;
6cb153ca 1789 int fput_needed;
89bddce5 1790
1da177e4
LT
1791 err = -EFAULT;
1792 if (MSG_CMSG_COMPAT & flags) {
1793 if (get_compat_msghdr(&msg_sys, msg_compat))
1794 return -EFAULT;
89bddce5
SH
1795 }
1796 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1797 return -EFAULT;
1798
6cb153ca 1799 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1800 if (!sock)
1da177e4
LT
1801 goto out;
1802
1803 /* do not move before msg_sys is valid */
1804 err = -EMSGSIZE;
1805 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1806 goto out_put;
1807
89bddce5 1808 /* Check whether to allocate the iovec area */
1da177e4
LT
1809 err = -ENOMEM;
1810 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1811 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1812 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1813 if (!iov)
1814 goto out_put;
1815 }
1816
1817 /* This will also move the address data into kernel space */
1818 if (MSG_CMSG_COMPAT & flags) {
1819 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1820 } else
1821 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1822 if (err < 0)
1da177e4
LT
1823 goto out_freeiov;
1824 total_len = err;
1825
1826 err = -ENOBUFS;
1827
1828 if (msg_sys.msg_controllen > INT_MAX)
1829 goto out_freeiov;
89bddce5 1830 ctl_len = msg_sys.msg_controllen;
1da177e4 1831 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1832 err =
1833 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1834 sizeof(ctl));
1da177e4
LT
1835 if (err)
1836 goto out_freeiov;
1837 ctl_buf = msg_sys.msg_control;
8920e8f9 1838 ctl_len = msg_sys.msg_controllen;
1da177e4 1839 } else if (ctl_len) {
89bddce5 1840 if (ctl_len > sizeof(ctl)) {
1da177e4 1841 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1842 if (ctl_buf == NULL)
1da177e4
LT
1843 goto out_freeiov;
1844 }
1845 err = -EFAULT;
1846 /*
1847 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1848 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1849 * checking falls down on this.
1850 */
89bddce5
SH
1851 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1852 ctl_len))
1da177e4
LT
1853 goto out_freectl;
1854 msg_sys.msg_control = ctl_buf;
1855 }
1856 msg_sys.msg_flags = flags;
1857
1858 if (sock->file->f_flags & O_NONBLOCK)
1859 msg_sys.msg_flags |= MSG_DONTWAIT;
1860 err = sock_sendmsg(sock, &msg_sys, total_len);
1861
1862out_freectl:
89bddce5 1863 if (ctl_buf != ctl)
1da177e4
LT
1864 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1865out_freeiov:
1866 if (iov != iovstack)
1867 sock_kfree_s(sock->sk, iov, iov_size);
1868out_put:
6cb153ca 1869 fput_light(sock->file, fput_needed);
89bddce5 1870out:
1da177e4
LT
1871 return err;
1872}
1873
1874/*
1875 * BSD recvmsg interface
1876 */
1877
89bddce5
SH
1878asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1879 unsigned int flags)
1da177e4 1880{
89bddce5
SH
1881 struct compat_msghdr __user *msg_compat =
1882 (struct compat_msghdr __user *)msg;
1da177e4
LT
1883 struct socket *sock;
1884 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1885 struct iovec *iov = iovstack;
1da177e4
LT
1886 struct msghdr msg_sys;
1887 unsigned long cmsg_ptr;
1888 int err, iov_size, total_len, len;
6cb153ca 1889 int fput_needed;
1da177e4
LT
1890
1891 /* kernel mode address */
1892 char addr[MAX_SOCK_ADDR];
1893
1894 /* user mode address pointers */
1895 struct sockaddr __user *uaddr;
1896 int __user *uaddr_len;
89bddce5 1897
1da177e4
LT
1898 if (MSG_CMSG_COMPAT & flags) {
1899 if (get_compat_msghdr(&msg_sys, msg_compat))
1900 return -EFAULT;
89bddce5
SH
1901 }
1902 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1903 return -EFAULT;
1da177e4 1904
6cb153ca 1905 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1906 if (!sock)
1907 goto out;
1908
1909 err = -EMSGSIZE;
1910 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1911 goto out_put;
89bddce5
SH
1912
1913 /* Check whether to allocate the iovec area */
1da177e4
LT
1914 err = -ENOMEM;
1915 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1916 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1917 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1918 if (!iov)
1919 goto out_put;
1920 }
1921
1922 /*
89bddce5
SH
1923 * Save the user-mode address (verify_iovec will change the
1924 * kernel msghdr to use the kernel address space)
1da177e4 1925 */
89bddce5
SH
1926
1927 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1928 uaddr_len = COMPAT_NAMELEN(msg);
1929 if (MSG_CMSG_COMPAT & flags) {
1930 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1931 } else
1932 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1933 if (err < 0)
1934 goto out_freeiov;
89bddce5 1935 total_len = err;
1da177e4
LT
1936
1937 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1938 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1939
1da177e4
LT
1940 if (sock->file->f_flags & O_NONBLOCK)
1941 flags |= MSG_DONTWAIT;
1942 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1943 if (err < 0)
1944 goto out_freeiov;
1945 len = err;
1946
1947 if (uaddr != NULL) {
89bddce5
SH
1948 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1949 uaddr_len);
1da177e4
LT
1950 if (err < 0)
1951 goto out_freeiov;
1952 }
37f7f421
DM
1953 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1954 COMPAT_FLAGS(msg));
1da177e4
LT
1955 if (err)
1956 goto out_freeiov;
1957 if (MSG_CMSG_COMPAT & flags)
89bddce5 1958 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1959 &msg_compat->msg_controllen);
1960 else
89bddce5 1961 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1962 &msg->msg_controllen);
1963 if (err)
1964 goto out_freeiov;
1965 err = len;
1966
1967out_freeiov:
1968 if (iov != iovstack)
1969 sock_kfree_s(sock->sk, iov, iov_size);
1970out_put:
6cb153ca 1971 fput_light(sock->file, fput_needed);
1da177e4
LT
1972out:
1973 return err;
1974}
1975
1976#ifdef __ARCH_WANT_SYS_SOCKETCALL
1977
1978/* Argument list sizes for sys_socketcall */
1979#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1980static const unsigned char nargs[18]={
1981 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1982 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1983 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1984};
1985
1da177e4
LT
1986#undef AL
1987
1988/*
89bddce5 1989 * System call vectors.
1da177e4
LT
1990 *
1991 * Argument checking cleaned up. Saved 20% in size.
1992 * This function doesn't need to set the kernel lock because
89bddce5 1993 * it is set by the callees.
1da177e4
LT
1994 */
1995
1996asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1997{
1998 unsigned long a[6];
89bddce5 1999 unsigned long a0, a1;
1da177e4
LT
2000 int err;
2001
89bddce5 2002 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2003 return -EINVAL;
2004
2005 /* copy_from_user should be SMP safe. */
2006 if (copy_from_user(a, args, nargs[call]))
2007 return -EFAULT;
3ec3b2fb 2008
89bddce5 2009 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2010 if (err)
2011 return err;
2012
89bddce5
SH
2013 a0 = a[0];
2014 a1 = a[1];
2015
2016 switch (call) {
2017 case SYS_SOCKET:
2018 err = sys_socket(a0, a1, a[2]);
2019 break;
2020 case SYS_BIND:
2021 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2022 break;
2023 case SYS_CONNECT:
2024 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2025 break;
2026 case SYS_LISTEN:
2027 err = sys_listen(a0, a1);
2028 break;
2029 case SYS_ACCEPT:
2030 err =
2031 sys_accept(a0, (struct sockaddr __user *)a1,
2032 (int __user *)a[2]);
2033 break;
2034 case SYS_GETSOCKNAME:
2035 err =
2036 sys_getsockname(a0, (struct sockaddr __user *)a1,
2037 (int __user *)a[2]);
2038 break;
2039 case SYS_GETPEERNAME:
2040 err =
2041 sys_getpeername(a0, (struct sockaddr __user *)a1,
2042 (int __user *)a[2]);
2043 break;
2044 case SYS_SOCKETPAIR:
2045 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2046 break;
2047 case SYS_SEND:
2048 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2049 break;
2050 case SYS_SENDTO:
2051 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2052 (struct sockaddr __user *)a[4], a[5]);
2053 break;
2054 case SYS_RECV:
2055 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2056 break;
2057 case SYS_RECVFROM:
2058 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2059 (struct sockaddr __user *)a[4],
2060 (int __user *)a[5]);
2061 break;
2062 case SYS_SHUTDOWN:
2063 err = sys_shutdown(a0, a1);
2064 break;
2065 case SYS_SETSOCKOPT:
2066 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2067 break;
2068 case SYS_GETSOCKOPT:
2069 err =
2070 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2071 (int __user *)a[4]);
2072 break;
2073 case SYS_SENDMSG:
2074 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2075 break;
2076 case SYS_RECVMSG:
2077 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2078 break;
2079 default:
2080 err = -EINVAL;
2081 break;
1da177e4
LT
2082 }
2083 return err;
2084}
2085
89bddce5 2086#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2087
55737fda
SH
2088/**
2089 * sock_register - add a socket protocol handler
2090 * @ops: description of protocol
2091 *
1da177e4
LT
2092 * This function is called by a protocol handler that wants to
2093 * advertise its address family, and have it linked into the
55737fda
SH
2094 * socket interface. The value ops->family coresponds to the
2095 * socket system call protocol family.
1da177e4 2096 */
f0fd27d4 2097int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2098{
2099 int err;
2100
2101 if (ops->family >= NPROTO) {
89bddce5
SH
2102 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2103 NPROTO);
1da177e4
LT
2104 return -ENOBUFS;
2105 }
55737fda
SH
2106
2107 spin_lock(&net_family_lock);
2108 if (net_families[ops->family])
2109 err = -EEXIST;
2110 else {
89bddce5 2111 net_families[ops->family] = ops;
1da177e4
LT
2112 err = 0;
2113 }
55737fda
SH
2114 spin_unlock(&net_family_lock);
2115
89bddce5 2116 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2117 return err;
2118}
2119
55737fda
SH
2120/**
2121 * sock_unregister - remove a protocol handler
2122 * @family: protocol family to remove
2123 *
1da177e4
LT
2124 * This function is called by a protocol handler that wants to
2125 * remove its address family, and have it unlinked from the
55737fda
SH
2126 * new socket creation.
2127 *
2128 * If protocol handler is a module, then it can use module reference
2129 * counts to protect against new references. If protocol handler is not
2130 * a module then it needs to provide its own protection in
2131 * the ops->create routine.
1da177e4 2132 */
f0fd27d4 2133void sock_unregister(int family)
1da177e4 2134{
f0fd27d4 2135 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2136
55737fda 2137 spin_lock(&net_family_lock);
89bddce5 2138 net_families[family] = NULL;
55737fda
SH
2139 spin_unlock(&net_family_lock);
2140
2141 synchronize_rcu();
2142
89bddce5 2143 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2144}
2145
77d76ea3 2146static int __init sock_init(void)
1da177e4
LT
2147{
2148 /*
89bddce5 2149 * Initialize sock SLAB cache.
1da177e4 2150 */
89bddce5 2151
1da177e4
LT
2152 sk_init();
2153
1da177e4 2154 /*
89bddce5 2155 * Initialize skbuff SLAB cache
1da177e4
LT
2156 */
2157 skb_init();
1da177e4
LT
2158
2159 /*
89bddce5 2160 * Initialize the protocols module.
1da177e4
LT
2161 */
2162
2163 init_inodecache();
2164 register_filesystem(&sock_fs_type);
2165 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2166
2167 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2168 */
2169
2170#ifdef CONFIG_NETFILTER
2171 netfilter_init();
2172#endif
cbeb321a
DM
2173
2174 return 0;
1da177e4
LT
2175}
2176
77d76ea3
AK
2177core_initcall(sock_init); /* early initcall */
2178
1da177e4
LT
2179#ifdef CONFIG_PROC_FS
2180void socket_seq_show(struct seq_file *seq)
2181{
2182 int cpu;
2183 int counter = 0;
2184
6f912042 2185 for_each_possible_cpu(cpu)
89bddce5 2186 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2187
2188 /* It can be negative, by the way. 8) */
2189 if (counter < 0)
2190 counter = 0;
2191
2192 seq_printf(seq, "sockets: used %d\n", counter);
2193}
89bddce5 2194#endif /* CONFIG_PROC_FS */
1da177e4 2195
89bbfc95
SP
2196#ifdef CONFIG_COMPAT
2197static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2198 unsigned long arg)
89bbfc95
SP
2199{
2200 struct socket *sock = file->private_data;
2201 int ret = -ENOIOCTLCMD;
2202
2203 if (sock->ops->compat_ioctl)
2204 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2205
2206 return ret;
2207}
2208#endif
2209
ac5a488e
SS
2210int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2211{
2212 return sock->ops->bind(sock, addr, addrlen);
2213}
2214
2215int kernel_listen(struct socket *sock, int backlog)
2216{
2217 return sock->ops->listen(sock, backlog);
2218}
2219
2220int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2221{
2222 struct sock *sk = sock->sk;
2223 int err;
2224
2225 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2226 newsock);
2227 if (err < 0)
2228 goto done;
2229
2230 err = sock->ops->accept(sock, *newsock, flags);
2231 if (err < 0) {
2232 sock_release(*newsock);
2233 goto done;
2234 }
2235
2236 (*newsock)->ops = sock->ops;
2237
2238done:
2239 return err;
2240}
2241
2242int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2243 int flags)
ac5a488e
SS
2244{
2245 return sock->ops->connect(sock, addr, addrlen, flags);
2246}
2247
2248int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2249 int *addrlen)
2250{
2251 return sock->ops->getname(sock, addr, addrlen, 0);
2252}
2253
2254int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2255 int *addrlen)
2256{
2257 return sock->ops->getname(sock, addr, addrlen, 1);
2258}
2259
2260int kernel_getsockopt(struct socket *sock, int level, int optname,
2261 char *optval, int *optlen)
2262{
2263 mm_segment_t oldfs = get_fs();
2264 int err;
2265
2266 set_fs(KERNEL_DS);
2267 if (level == SOL_SOCKET)
2268 err = sock_getsockopt(sock, level, optname, optval, optlen);
2269 else
2270 err = sock->ops->getsockopt(sock, level, optname, optval,
2271 optlen);
2272 set_fs(oldfs);
2273 return err;
2274}
2275
2276int kernel_setsockopt(struct socket *sock, int level, int optname,
2277 char *optval, int optlen)
2278{
2279 mm_segment_t oldfs = get_fs();
2280 int err;
2281
2282 set_fs(KERNEL_DS);
2283 if (level == SOL_SOCKET)
2284 err = sock_setsockopt(sock, level, optname, optval, optlen);
2285 else
2286 err = sock->ops->setsockopt(sock, level, optname, optval,
2287 optlen);
2288 set_fs(oldfs);
2289 return err;
2290}
2291
2292int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2293 size_t size, int flags)
2294{
2295 if (sock->ops->sendpage)
2296 return sock->ops->sendpage(sock, page, offset, size, flags);
2297
2298 return sock_no_sendpage(sock, page, offset, size, flags);
2299}
2300
2301int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2302{
2303 mm_segment_t oldfs = get_fs();
2304 int err;
2305
2306 set_fs(KERNEL_DS);
2307 err = sock->ops->ioctl(sock, cmd, arg);
2308 set_fs(oldfs);
2309
2310 return err;
2311}
2312
1da177e4
LT
2313/* ABI emulation layers need these two */
2314EXPORT_SYMBOL(move_addr_to_kernel);
2315EXPORT_SYMBOL(move_addr_to_user);
2316EXPORT_SYMBOL(sock_create);
2317EXPORT_SYMBOL(sock_create_kern);
2318EXPORT_SYMBOL(sock_create_lite);
2319EXPORT_SYMBOL(sock_map_fd);
2320EXPORT_SYMBOL(sock_recvmsg);
2321EXPORT_SYMBOL(sock_register);
2322EXPORT_SYMBOL(sock_release);
2323EXPORT_SYMBOL(sock_sendmsg);
2324EXPORT_SYMBOL(sock_unregister);
2325EXPORT_SYMBOL(sock_wake_async);
2326EXPORT_SYMBOL(sockfd_lookup);
2327EXPORT_SYMBOL(kernel_sendmsg);
2328EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2329EXPORT_SYMBOL(kernel_bind);
2330EXPORT_SYMBOL(kernel_listen);
2331EXPORT_SYMBOL(kernel_accept);
2332EXPORT_SYMBOL(kernel_connect);
2333EXPORT_SYMBOL(kernel_getsockname);
2334EXPORT_SYMBOL(kernel_getpeername);
2335EXPORT_SYMBOL(kernel_getsockopt);
2336EXPORT_SYMBOL(kernel_setsockopt);
2337EXPORT_SYMBOL(kernel_sendpage);
2338EXPORT_SYMBOL(kernel_sock_ioctl);