flag parameters: socket and socketpair
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
87de87d5 93#include <net/wext.h>
1da177e4
LT
94
95#include <net/sock.h>
96#include <linux/netfilter.h>
97
98static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
99static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
101static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
102 unsigned long nr_segs, loff_t pos);
89bddce5 103static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
104
105static int sock_close(struct inode *inode, struct file *file);
106static unsigned int sock_poll(struct file *file,
107 struct poll_table_struct *wait);
89bddce5 108static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
109#ifdef CONFIG_COMPAT
110static long compat_sock_ioctl(struct file *file,
89bddce5 111 unsigned int cmd, unsigned long arg);
89bbfc95 112#endif
1da177e4 113static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
114static ssize_t sock_sendpage(struct file *file, struct page *page,
115 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
116static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
117 struct pipe_inode_info *pipe, size_t len,
118 unsigned int flags);
1da177e4 119
1da177e4
LT
120/*
121 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
122 * in the operation structures but are done directly via the socketcall() multiplexor.
123 */
124
da7071d7 125static const struct file_operations socket_file_ops = {
1da177e4
LT
126 .owner = THIS_MODULE,
127 .llseek = no_llseek,
128 .aio_read = sock_aio_read,
129 .aio_write = sock_aio_write,
130 .poll = sock_poll,
131 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133 .compat_ioctl = compat_sock_ioctl,
134#endif
1da177e4
LT
135 .mmap = sock_mmap,
136 .open = sock_no_open, /* special open code to disallow open via /proc */
137 .release = sock_close,
138 .fasync = sock_fasync,
5274f052
JA
139 .sendpage = sock_sendpage,
140 .splice_write = generic_splice_sendpage,
9c55e01c 141 .splice_read = sock_splice_read,
1da177e4
LT
142};
143
144/*
145 * The protocol list. Each protocol is registered in here.
146 */
147
1da177e4 148static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 149static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 150
1da177e4
LT
151/*
152 * Statistics counters of the socket lists
153 */
154
155static DEFINE_PER_CPU(int, sockets_in_use) = 0;
156
157/*
89bddce5
SH
158 * Support routines.
159 * Move socket addresses back and forth across the kernel/user
160 * divide and look after the messy bits.
1da177e4
LT
161 */
162
89bddce5 163#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
164 16 for IP, 16 for IPX,
165 24 for IPv6,
89bddce5 166 about 80 for AX.25
1da177e4
LT
167 must be at least one bigger than
168 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 169 :unix_mkname()).
1da177e4 170 */
89bddce5 171
1da177e4
LT
172/**
173 * move_addr_to_kernel - copy a socket address into kernel space
174 * @uaddr: Address in user space
175 * @kaddr: Address in kernel space
176 * @ulen: Length in user space
177 *
178 * The address is copied into kernel space. If the provided address is
179 * too long an error code of -EINVAL is returned. If the copy gives
180 * invalid addresses -EFAULT is returned. On a success 0 is returned.
181 */
182
230b1839 183int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 184{
230b1839 185 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 186 return -EINVAL;
89bddce5 187 if (ulen == 0)
1da177e4 188 return 0;
89bddce5 189 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 190 return -EFAULT;
3ec3b2fb 191 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
192}
193
194/**
195 * move_addr_to_user - copy an address to user space
196 * @kaddr: kernel space address
197 * @klen: length of address in kernel
198 * @uaddr: user space address
199 * @ulen: pointer to user length field
200 *
201 * The value pointed to by ulen on entry is the buffer length available.
202 * This is overwritten with the buffer space used. -EINVAL is returned
203 * if an overlong buffer is specified or a negative buffer size. -EFAULT
204 * is returned if either the buffer or the length field are not
205 * accessible.
206 * After copying the data up to the limit the user specifies, the true
207 * length of the data is written over the length limit the user
208 * specified. Zero is returned for a success.
209 */
89bddce5 210
230b1839 211int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 212 int __user *ulen)
1da177e4
LT
213{
214 int err;
215 int len;
216
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
230b1839 222 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
237#define SOCKFS_MAGIC 0x534F434B
238
e18b890b 239static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
240
241static struct inode *sock_alloc_inode(struct super_block *sb)
242{
243 struct socket_alloc *ei;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
248 init_waitqueue_head(&ei->socket.wait);
89bddce5 249
1da177e4
LT
250 ei->socket.fasync_list = NULL;
251 ei->socket.state = SS_UNCONNECTED;
252 ei->socket.flags = 0;
253 ei->socket.ops = NULL;
254 ei->socket.sk = NULL;
255 ei->socket.file = NULL;
1da177e4
LT
256
257 return &ei->vfs_inode;
258}
259
260static void sock_destroy_inode(struct inode *inode)
261{
262 kmem_cache_free(sock_inode_cachep,
263 container_of(inode, struct socket_alloc, vfs_inode));
264}
265
4ba9b9d0 266static void init_once(struct kmem_cache *cachep, void *foo)
1da177e4 267{
89bddce5 268 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 269
a35afb83 270 inode_init_once(&ei->vfs_inode);
1da177e4 271}
89bddce5 272
1da177e4
LT
273static int init_inodecache(void)
274{
275 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
276 sizeof(struct socket_alloc),
277 0,
278 (SLAB_HWCACHE_ALIGN |
279 SLAB_RECLAIM_ACCOUNT |
280 SLAB_MEM_SPREAD),
20c2df83 281 init_once);
1da177e4
LT
282 if (sock_inode_cachep == NULL)
283 return -ENOMEM;
284 return 0;
285}
286
287static struct super_operations sockfs_ops = {
288 .alloc_inode = sock_alloc_inode,
289 .destroy_inode =sock_destroy_inode,
290 .statfs = simple_statfs,
291};
292
454e2398 293static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
294 int flags, const char *dev_name, void *data,
295 struct vfsmount *mnt)
1da177e4 296{
454e2398
DH
297 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
298 mnt);
1da177e4
LT
299}
300
ba89966c 301static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
302
303static struct file_system_type sock_fs_type = {
304 .name = "sockfs",
305 .get_sb = sockfs_get_sb,
306 .kill_sb = kill_anon_super,
307};
89bddce5 308
1da177e4
LT
309static int sockfs_delete_dentry(struct dentry *dentry)
310{
304e61e6
ED
311 /*
312 * At creation time, we pretended this dentry was hashed
313 * (by clearing DCACHE_UNHASHED bit in d_flags)
314 * At delete time, we restore the truth : not hashed.
315 * (so that dput() can proceed correctly)
316 */
317 dentry->d_flags |= DCACHE_UNHASHED;
318 return 0;
1da177e4 319}
c23fbb6b
ED
320
321/*
322 * sockfs_dname() is called from d_path().
323 */
324static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
325{
326 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
327 dentry->d_inode->i_ino);
328}
329
1da177e4 330static struct dentry_operations sockfs_dentry_operations = {
89bddce5 331 .d_delete = sockfs_delete_dentry,
c23fbb6b 332 .d_dname = sockfs_dname,
1da177e4
LT
333};
334
335/*
336 * Obtains the first available file descriptor and sets it up for use.
337 *
39d8c1b6
DM
338 * These functions create file structures and maps them to fd space
339 * of the current process. On success it returns file descriptor
1da177e4
LT
340 * and file struct implicitly stored in sock->file.
341 * Note that another thread may close file descriptor before we return
342 * from this function. We use the fact that now we do not refer
343 * to socket after mapping. If one day we will need it, this
344 * function will increment ref. count on file by 1.
345 *
346 * In any case returned fd MAY BE not valid!
347 * This race condition is unavoidable
348 * with shared fd spaces, we cannot solve it inside kernel,
349 * but we take care of internal coherence yet.
350 */
351
a677a039 352static int sock_alloc_fd(struct file **filep, int flags)
1da177e4
LT
353{
354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
39d8c1b6 357 if (likely(fd >= 0)) {
1da177e4
LT
358 struct file *file = get_empty_filp();
359
39d8c1b6
DM
360 *filep = file;
361 if (unlikely(!file)) {
1da177e4 362 put_unused_fd(fd);
39d8c1b6 363 return -ENFILE;
1da177e4 364 }
39d8c1b6
DM
365 } else
366 *filep = NULL;
367 return fd;
368}
1da177e4 369
39d8c1b6
DM
370static int sock_attach_fd(struct socket *sock, struct file *file)
371{
ce8d2cdf 372 struct dentry *dentry;
c23fbb6b 373 struct qstr name = { .name = "" };
39d8c1b6 374
ce8d2cdf
DH
375 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
376 if (unlikely(!dentry))
39d8c1b6
DM
377 return -ENOMEM;
378
ce8d2cdf 379 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
380 /*
381 * We dont want to push this dentry into global dentry hash table.
382 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
383 * This permits a working /proc/$pid/fd/XXX on sockets
384 */
ce8d2cdf
DH
385 dentry->d_flags &= ~DCACHE_UNHASHED;
386 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
387
388 sock->file = file;
ce8d2cdf
DH
389 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
390 &socket_file_ops);
391 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6
DM
392 file->f_flags = O_RDWR;
393 file->f_pos = 0;
394 file->private_data = sock;
1da177e4 395
39d8c1b6
DM
396 return 0;
397}
398
a677a039 399int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
400{
401 struct file *newfile;
a677a039 402 int fd = sock_alloc_fd(&newfile, flags);
39d8c1b6
DM
403
404 if (likely(fd >= 0)) {
405 int err = sock_attach_fd(sock, newfile);
406
407 if (unlikely(err < 0)) {
408 put_filp(newfile);
1da177e4 409 put_unused_fd(fd);
39d8c1b6 410 return err;
1da177e4 411 }
39d8c1b6 412 fd_install(fd, newfile);
1da177e4 413 }
1da177e4
LT
414 return fd;
415}
416
6cb153ca
BL
417static struct socket *sock_from_file(struct file *file, int *err)
418{
6cb153ca
BL
419 if (file->f_op == &socket_file_ops)
420 return file->private_data; /* set in sock_map_fd */
421
23bb80d2
ED
422 *err = -ENOTSOCK;
423 return NULL;
6cb153ca
BL
424}
425
1da177e4
LT
426/**
427 * sockfd_lookup - Go from a file number to its socket slot
428 * @fd: file handle
429 * @err: pointer to an error code return
430 *
431 * The file handle passed in is locked and the socket it is bound
432 * too is returned. If an error occurs the err pointer is overwritten
433 * with a negative errno code and NULL is returned. The function checks
434 * for both invalid handles and passing a handle which is not a socket.
435 *
436 * On a success the socket object pointer is returned.
437 */
438
439struct socket *sockfd_lookup(int fd, int *err)
440{
441 struct file *file;
1da177e4
LT
442 struct socket *sock;
443
89bddce5
SH
444 file = fget(fd);
445 if (!file) {
1da177e4
LT
446 *err = -EBADF;
447 return NULL;
448 }
89bddce5 449
6cb153ca
BL
450 sock = sock_from_file(file, err);
451 if (!sock)
1da177e4 452 fput(file);
6cb153ca
BL
453 return sock;
454}
1da177e4 455
6cb153ca
BL
456static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
457{
458 struct file *file;
459 struct socket *sock;
460
3672558c 461 *err = -EBADF;
6cb153ca
BL
462 file = fget_light(fd, fput_needed);
463 if (file) {
464 sock = sock_from_file(file, err);
465 if (sock)
466 return sock;
467 fput_light(file, *fput_needed);
1da177e4 468 }
6cb153ca 469 return NULL;
1da177e4
LT
470}
471
472/**
473 * sock_alloc - allocate a socket
89bddce5 474 *
1da177e4
LT
475 * Allocate a new inode and socket object. The two are bound together
476 * and initialised. The socket is then returned. If we are out of inodes
477 * NULL is returned.
478 */
479
480static struct socket *sock_alloc(void)
481{
89bddce5
SH
482 struct inode *inode;
483 struct socket *sock;
1da177e4
LT
484
485 inode = new_inode(sock_mnt->mnt_sb);
486 if (!inode)
487 return NULL;
488
489 sock = SOCKET_I(inode);
490
89bddce5 491 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
492 inode->i_uid = current->fsuid;
493 inode->i_gid = current->fsgid;
494
495 get_cpu_var(sockets_in_use)++;
496 put_cpu_var(sockets_in_use);
497 return sock;
498}
499
500/*
501 * In theory you can't get an open on this inode, but /proc provides
502 * a back door. Remember to keep it shut otherwise you'll let the
503 * creepy crawlies in.
504 */
89bddce5 505
1da177e4
LT
506static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
507{
508 return -ENXIO;
509}
510
4b6f5d20 511const struct file_operations bad_sock_fops = {
1da177e4
LT
512 .owner = THIS_MODULE,
513 .open = sock_no_open,
514};
515
516/**
517 * sock_release - close a socket
518 * @sock: socket to close
519 *
520 * The socket is released from the protocol stack if it has a release
521 * callback, and the inode is then released if the socket is bound to
89bddce5 522 * an inode not a file.
1da177e4 523 */
89bddce5 524
1da177e4
LT
525void sock_release(struct socket *sock)
526{
527 if (sock->ops) {
528 struct module *owner = sock->ops->owner;
529
530 sock->ops->release(sock);
531 sock->ops = NULL;
532 module_put(owner);
533 }
534
535 if (sock->fasync_list)
536 printk(KERN_ERR "sock_release: fasync list not empty!\n");
537
538 get_cpu_var(sockets_in_use)--;
539 put_cpu_var(sockets_in_use);
540 if (!sock->file) {
541 iput(SOCK_INODE(sock));
542 return;
543 }
89bddce5 544 sock->file = NULL;
1da177e4
LT
545}
546
89bddce5 547static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
548 struct msghdr *msg, size_t size)
549{
550 struct sock_iocb *si = kiocb_to_siocb(iocb);
551 int err;
552
553 si->sock = sock;
554 si->scm = NULL;
555 si->msg = msg;
556 si->size = size;
557
558 err = security_socket_sendmsg(sock, msg, size);
559 if (err)
560 return err;
561
562 return sock->ops->sendmsg(iocb, sock, msg, size);
563}
564
565int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
566{
567 struct kiocb iocb;
568 struct sock_iocb siocb;
569 int ret;
570
571 init_sync_kiocb(&iocb, NULL);
572 iocb.private = &siocb;
573 ret = __sock_sendmsg(&iocb, sock, msg, size);
574 if (-EIOCBQUEUED == ret)
575 ret = wait_on_sync_kiocb(&iocb);
576 return ret;
577}
578
579int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
580 struct kvec *vec, size_t num, size_t size)
581{
582 mm_segment_t oldfs = get_fs();
583 int result;
584
585 set_fs(KERNEL_DS);
586 /*
587 * the following is safe, since for compiler definitions of kvec and
588 * iovec are identical, yielding the same in-core layout and alignment
589 */
89bddce5 590 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
591 msg->msg_iovlen = num;
592 result = sock_sendmsg(sock, msg, size);
593 set_fs(oldfs);
594 return result;
595}
596
92f37fd2
ED
597/*
598 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
599 */
600void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
601 struct sk_buff *skb)
602{
603 ktime_t kt = skb->tstamp;
604
605 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
606 struct timeval tv;
607 /* Race occurred between timestamp enabling and packet
608 receiving. Fill in the current time for now. */
609 if (kt.tv64 == 0)
610 kt = ktime_get_real();
611 skb->tstamp = kt;
612 tv = ktime_to_timeval(kt);
613 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
614 } else {
615 struct timespec ts;
616 /* Race occurred between timestamp enabling and packet
617 receiving. Fill in the current time for now. */
618 if (kt.tv64 == 0)
619 kt = ktime_get_real();
620 skb->tstamp = kt;
621 ts = ktime_to_timespec(kt);
622 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
623 }
624}
625
7c81fd8b
ACM
626EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
627
89bddce5 628static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
629 struct msghdr *msg, size_t size, int flags)
630{
631 int err;
632 struct sock_iocb *si = kiocb_to_siocb(iocb);
633
634 si->sock = sock;
635 si->scm = NULL;
636 si->msg = msg;
637 si->size = size;
638 si->flags = flags;
639
640 err = security_socket_recvmsg(sock, msg, size, flags);
641 if (err)
642 return err;
643
644 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
645}
646
89bddce5 647int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
648 size_t size, int flags)
649{
650 struct kiocb iocb;
651 struct sock_iocb siocb;
652 int ret;
653
89bddce5 654 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
655 iocb.private = &siocb;
656 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
657 if (-EIOCBQUEUED == ret)
658 ret = wait_on_sync_kiocb(&iocb);
659 return ret;
660}
661
89bddce5
SH
662int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
663 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
664{
665 mm_segment_t oldfs = get_fs();
666 int result;
667
668 set_fs(KERNEL_DS);
669 /*
670 * the following is safe, since for compiler definitions of kvec and
671 * iovec are identical, yielding the same in-core layout and alignment
672 */
89bddce5 673 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
674 result = sock_recvmsg(sock, msg, size, flags);
675 set_fs(oldfs);
676 return result;
677}
678
679static void sock_aio_dtor(struct kiocb *iocb)
680{
681 kfree(iocb->private);
682}
683
ce1d4d3e
CH
684static ssize_t sock_sendpage(struct file *file, struct page *page,
685 int offset, size_t size, loff_t *ppos, int more)
1da177e4 686{
1da177e4
LT
687 struct socket *sock;
688 int flags;
689
ce1d4d3e
CH
690 sock = file->private_data;
691
692 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
693 if (more)
694 flags |= MSG_MORE;
695
696 return sock->ops->sendpage(sock, page, offset, size, flags);
697}
1da177e4 698
9c55e01c
JA
699static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
700 struct pipe_inode_info *pipe, size_t len,
701 unsigned int flags)
702{
703 struct socket *sock = file->private_data;
704
997b37da
RDC
705 if (unlikely(!sock->ops->splice_read))
706 return -EINVAL;
707
9c55e01c
JA
708 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
709}
710
ce1d4d3e 711static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 712 struct sock_iocb *siocb)
ce1d4d3e
CH
713{
714 if (!is_sync_kiocb(iocb)) {
715 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
716 if (!siocb)
717 return NULL;
1da177e4
LT
718 iocb->ki_dtor = sock_aio_dtor;
719 }
1da177e4 720
ce1d4d3e 721 siocb->kiocb = iocb;
ce1d4d3e
CH
722 iocb->private = siocb;
723 return siocb;
1da177e4
LT
724}
725
ce1d4d3e 726static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
727 struct file *file, const struct iovec *iov,
728 unsigned long nr_segs)
ce1d4d3e
CH
729{
730 struct socket *sock = file->private_data;
731 size_t size = 0;
732 int i;
1da177e4 733
89bddce5
SH
734 for (i = 0; i < nr_segs; i++)
735 size += iov[i].iov_len;
1da177e4 736
ce1d4d3e
CH
737 msg->msg_name = NULL;
738 msg->msg_namelen = 0;
739 msg->msg_control = NULL;
740 msg->msg_controllen = 0;
89bddce5 741 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
742 msg->msg_iovlen = nr_segs;
743 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
744
745 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
746}
747
027445c3
BP
748static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
749 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
750{
751 struct sock_iocb siocb, *x;
752
1da177e4
LT
753 if (pos != 0)
754 return -ESPIPE;
027445c3
BP
755
756 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
757 return 0;
758
027445c3
BP
759
760 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
761 if (!x)
762 return -ENOMEM;
027445c3 763 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
764}
765
ce1d4d3e 766static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
767 struct file *file, const struct iovec *iov,
768 unsigned long nr_segs)
1da177e4 769{
ce1d4d3e
CH
770 struct socket *sock = file->private_data;
771 size_t size = 0;
772 int i;
1da177e4 773
89bddce5
SH
774 for (i = 0; i < nr_segs; i++)
775 size += iov[i].iov_len;
1da177e4 776
ce1d4d3e
CH
777 msg->msg_name = NULL;
778 msg->msg_namelen = 0;
779 msg->msg_control = NULL;
780 msg->msg_controllen = 0;
89bddce5 781 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
782 msg->msg_iovlen = nr_segs;
783 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
784 if (sock->type == SOCK_SEQPACKET)
785 msg->msg_flags |= MSG_EOR;
1da177e4 786
ce1d4d3e 787 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
788}
789
027445c3
BP
790static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
791 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
792{
793 struct sock_iocb siocb, *x;
1da177e4 794
ce1d4d3e
CH
795 if (pos != 0)
796 return -ESPIPE;
027445c3 797
027445c3 798 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
799 if (!x)
800 return -ENOMEM;
1da177e4 801
027445c3 802 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
803}
804
1da177e4
LT
805/*
806 * Atomic setting of ioctl hooks to avoid race
807 * with module unload.
808 */
809
4a3e2f71 810static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 811static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 812
881d966b 813void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 814{
4a3e2f71 815 mutex_lock(&br_ioctl_mutex);
1da177e4 816 br_ioctl_hook = hook;
4a3e2f71 817 mutex_unlock(&br_ioctl_mutex);
1da177e4 818}
89bddce5 819
1da177e4
LT
820EXPORT_SYMBOL(brioctl_set);
821
4a3e2f71 822static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 823static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 824
881d966b 825void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 826{
4a3e2f71 827 mutex_lock(&vlan_ioctl_mutex);
1da177e4 828 vlan_ioctl_hook = hook;
4a3e2f71 829 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 830}
89bddce5 831
1da177e4
LT
832EXPORT_SYMBOL(vlan_ioctl_set);
833
4a3e2f71 834static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 835static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 836
89bddce5 837void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 838{
4a3e2f71 839 mutex_lock(&dlci_ioctl_mutex);
1da177e4 840 dlci_ioctl_hook = hook;
4a3e2f71 841 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 842}
89bddce5 843
1da177e4
LT
844EXPORT_SYMBOL(dlci_ioctl_set);
845
846/*
847 * With an ioctl, arg may well be a user mode pointer, but we don't know
848 * what to do with it - that's up to the protocol still.
849 */
850
851static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
852{
853 struct socket *sock;
881d966b 854 struct sock *sk;
1da177e4
LT
855 void __user *argp = (void __user *)arg;
856 int pid, err;
881d966b 857 struct net *net;
1da177e4 858
b69aee04 859 sock = file->private_data;
881d966b 860 sk = sock->sk;
3b1e0a65 861 net = sock_net(sk);
1da177e4 862 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 863 err = dev_ioctl(net, cmd, argp);
1da177e4 864 } else
d86b5e0e 865#ifdef CONFIG_WIRELESS_EXT
1da177e4 866 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 867 err = dev_ioctl(net, cmd, argp);
1da177e4 868 } else
89bddce5
SH
869#endif /* CONFIG_WIRELESS_EXT */
870 switch (cmd) {
1da177e4
LT
871 case FIOSETOWN:
872 case SIOCSPGRP:
873 err = -EFAULT;
874 if (get_user(pid, (int __user *)argp))
875 break;
876 err = f_setown(sock->file, pid, 1);
877 break;
878 case FIOGETOWN:
879 case SIOCGPGRP:
609d7fa9 880 err = put_user(f_getown(sock->file),
89bddce5 881 (int __user *)argp);
1da177e4
LT
882 break;
883 case SIOCGIFBR:
884 case SIOCSIFBR:
885 case SIOCBRADDBR:
886 case SIOCBRDELBR:
887 err = -ENOPKG;
888 if (!br_ioctl_hook)
889 request_module("bridge");
890
4a3e2f71 891 mutex_lock(&br_ioctl_mutex);
89bddce5 892 if (br_ioctl_hook)
881d966b 893 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 894 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
895 break;
896 case SIOCGIFVLAN:
897 case SIOCSIFVLAN:
898 err = -ENOPKG;
899 if (!vlan_ioctl_hook)
900 request_module("8021q");
901
4a3e2f71 902 mutex_lock(&vlan_ioctl_mutex);
1da177e4 903 if (vlan_ioctl_hook)
881d966b 904 err = vlan_ioctl_hook(net, argp);
4a3e2f71 905 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 906 break;
1da177e4
LT
907 case SIOCADDDLCI:
908 case SIOCDELDLCI:
909 err = -ENOPKG;
910 if (!dlci_ioctl_hook)
911 request_module("dlci");
912
7512cbf6
PE
913 mutex_lock(&dlci_ioctl_mutex);
914 if (dlci_ioctl_hook)
1da177e4 915 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 916 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
917 break;
918 default:
919 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
920
921 /*
922 * If this ioctl is unknown try to hand it down
923 * to the NIC driver.
924 */
925 if (err == -ENOIOCTLCMD)
881d966b 926 err = dev_ioctl(net, cmd, argp);
1da177e4 927 break;
89bddce5 928 }
1da177e4
LT
929 return err;
930}
931
932int sock_create_lite(int family, int type, int protocol, struct socket **res)
933{
934 int err;
935 struct socket *sock = NULL;
89bddce5 936
1da177e4
LT
937 err = security_socket_create(family, type, protocol, 1);
938 if (err)
939 goto out;
940
941 sock = sock_alloc();
942 if (!sock) {
943 err = -ENOMEM;
944 goto out;
945 }
946
1da177e4 947 sock->type = type;
7420ed23
VY
948 err = security_socket_post_create(sock, family, type, protocol, 1);
949 if (err)
950 goto out_release;
951
1da177e4
LT
952out:
953 *res = sock;
954 return err;
7420ed23
VY
955out_release:
956 sock_release(sock);
957 sock = NULL;
958 goto out;
1da177e4
LT
959}
960
961/* No kernel lock held - perfect */
89bddce5 962static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
963{
964 struct socket *sock;
965
966 /*
89bddce5 967 * We can't return errors to poll, so it's either yes or no.
1da177e4 968 */
b69aee04 969 sock = file->private_data;
1da177e4
LT
970 return sock->ops->poll(file, sock, wait);
971}
972
89bddce5 973static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 974{
b69aee04 975 struct socket *sock = file->private_data;
1da177e4
LT
976
977 return sock->ops->mmap(file, sock, vma);
978}
979
20380731 980static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
981{
982 /*
89bddce5
SH
983 * It was possible the inode is NULL we were
984 * closing an unfinished socket.
1da177e4
LT
985 */
986
89bddce5 987 if (!inode) {
1da177e4
LT
988 printk(KERN_DEBUG "sock_close: NULL inode\n");
989 return 0;
990 }
991 sock_fasync(-1, filp, 0);
992 sock_release(SOCKET_I(inode));
993 return 0;
994}
995
996/*
997 * Update the socket async list
998 *
999 * Fasync_list locking strategy.
1000 *
1001 * 1. fasync_list is modified only under process context socket lock
1002 * i.e. under semaphore.
1003 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1004 * or under socket lock.
1005 * 3. fasync_list can be used from softirq context, so that
1006 * modification under socket lock have to be enhanced with
1007 * write_lock_bh(&sk->sk_callback_lock).
1008 * --ANK (990710)
1009 */
1010
1011static int sock_fasync(int fd, struct file *filp, int on)
1012{
89bddce5 1013 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1014 struct socket *sock;
1015 struct sock *sk;
1016
89bddce5 1017 if (on) {
8b3a7005 1018 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1019 if (fna == NULL)
1da177e4
LT
1020 return -ENOMEM;
1021 }
1022
b69aee04 1023 sock = filp->private_data;
1da177e4 1024
89bddce5
SH
1025 sk = sock->sk;
1026 if (sk == NULL) {
1da177e4
LT
1027 kfree(fna);
1028 return -EINVAL;
1029 }
1030
1031 lock_sock(sk);
1032
89bddce5 1033 prev = &(sock->fasync_list);
1da177e4 1034
89bddce5
SH
1035 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1036 if (fa->fa_file == filp)
1da177e4
LT
1037 break;
1038
89bddce5
SH
1039 if (on) {
1040 if (fa != NULL) {
1da177e4 1041 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1042 fa->fa_fd = fd;
1da177e4
LT
1043 write_unlock_bh(&sk->sk_callback_lock);
1044
1045 kfree(fna);
1046 goto out;
1047 }
89bddce5
SH
1048 fna->fa_file = filp;
1049 fna->fa_fd = fd;
1050 fna->magic = FASYNC_MAGIC;
1051 fna->fa_next = sock->fasync_list;
1da177e4 1052 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1053 sock->fasync_list = fna;
1da177e4 1054 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1055 } else {
1056 if (fa != NULL) {
1da177e4 1057 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1058 *prev = fa->fa_next;
1da177e4
LT
1059 write_unlock_bh(&sk->sk_callback_lock);
1060 kfree(fa);
1061 }
1062 }
1063
1064out:
1065 release_sock(sock->sk);
1066 return 0;
1067}
1068
1069/* This function may be called only under socket lock or callback_lock */
1070
1071int sock_wake_async(struct socket *sock, int how, int band)
1072{
1073 if (!sock || !sock->fasync_list)
1074 return -1;
89bddce5 1075 switch (how) {
8d8ad9d7 1076 case SOCK_WAKE_WAITD:
1da177e4
LT
1077 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1078 break;
1079 goto call_kill;
8d8ad9d7 1080 case SOCK_WAKE_SPACE:
1da177e4
LT
1081 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1082 break;
1083 /* fall through */
8d8ad9d7 1084 case SOCK_WAKE_IO:
89bddce5 1085call_kill:
1da177e4
LT
1086 __kill_fasync(sock->fasync_list, SIGIO, band);
1087 break;
8d8ad9d7 1088 case SOCK_WAKE_URG:
1da177e4
LT
1089 __kill_fasync(sock->fasync_list, SIGURG, band);
1090 }
1091 return 0;
1092}
1093
1b8d7ae4 1094static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1095 struct socket **res, int kern)
1da177e4
LT
1096{
1097 int err;
1098 struct socket *sock;
55737fda 1099 const struct net_proto_family *pf;
1da177e4
LT
1100
1101 /*
89bddce5 1102 * Check protocol is in range
1da177e4
LT
1103 */
1104 if (family < 0 || family >= NPROTO)
1105 return -EAFNOSUPPORT;
1106 if (type < 0 || type >= SOCK_MAX)
1107 return -EINVAL;
1108
1109 /* Compatibility.
1110
1111 This uglymoron is moved from INET layer to here to avoid
1112 deadlock in module load.
1113 */
1114 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1115 static int warned;
1da177e4
LT
1116 if (!warned) {
1117 warned = 1;
89bddce5
SH
1118 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1119 current->comm);
1da177e4
LT
1120 }
1121 family = PF_PACKET;
1122 }
1123
1124 err = security_socket_create(family, type, protocol, kern);
1125 if (err)
1126 return err;
89bddce5 1127
55737fda
SH
1128 /*
1129 * Allocate the socket and allow the family to set things up. if
1130 * the protocol is 0, the family is instructed to select an appropriate
1131 * default.
1132 */
1133 sock = sock_alloc();
1134 if (!sock) {
1135 if (net_ratelimit())
1136 printk(KERN_WARNING "socket: no more sockets\n");
1137 return -ENFILE; /* Not exactly a match, but its the
1138 closest posix thing */
1139 }
1140
1141 sock->type = type;
1142
1da177e4 1143#if defined(CONFIG_KMOD)
89bddce5
SH
1144 /* Attempt to load a protocol module if the find failed.
1145 *
1146 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1147 * requested real, full-featured networking support upon configuration.
1148 * Otherwise module support will break!
1149 */
55737fda 1150 if (net_families[family] == NULL)
89bddce5 1151 request_module("net-pf-%d", family);
1da177e4
LT
1152#endif
1153
55737fda
SH
1154 rcu_read_lock();
1155 pf = rcu_dereference(net_families[family]);
1156 err = -EAFNOSUPPORT;
1157 if (!pf)
1158 goto out_release;
1da177e4
LT
1159
1160 /*
1161 * We will call the ->create function, that possibly is in a loadable
1162 * module, so we have to bump that loadable module refcnt first.
1163 */
55737fda 1164 if (!try_module_get(pf->owner))
1da177e4
LT
1165 goto out_release;
1166
55737fda
SH
1167 /* Now protected by module ref count */
1168 rcu_read_unlock();
1169
1b8d7ae4 1170 err = pf->create(net, sock, protocol);
55737fda 1171 if (err < 0)
1da177e4 1172 goto out_module_put;
a79af59e 1173
1da177e4
LT
1174 /*
1175 * Now to bump the refcnt of the [loadable] module that owns this
1176 * socket at sock_release time we decrement its refcnt.
1177 */
55737fda
SH
1178 if (!try_module_get(sock->ops->owner))
1179 goto out_module_busy;
1180
1da177e4
LT
1181 /*
1182 * Now that we're done with the ->create function, the [loadable]
1183 * module can have its refcnt decremented
1184 */
55737fda 1185 module_put(pf->owner);
7420ed23
VY
1186 err = security_socket_post_create(sock, family, type, protocol, kern);
1187 if (err)
3b185525 1188 goto out_sock_release;
55737fda 1189 *res = sock;
1da177e4 1190
55737fda
SH
1191 return 0;
1192
1193out_module_busy:
1194 err = -EAFNOSUPPORT;
1da177e4 1195out_module_put:
55737fda
SH
1196 sock->ops = NULL;
1197 module_put(pf->owner);
1198out_sock_release:
1da177e4 1199 sock_release(sock);
55737fda
SH
1200 return err;
1201
1202out_release:
1203 rcu_read_unlock();
1204 goto out_sock_release;
1da177e4
LT
1205}
1206
1207int sock_create(int family, int type, int protocol, struct socket **res)
1208{
1b8d7ae4 1209 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1210}
1211
1212int sock_create_kern(int family, int type, int protocol, struct socket **res)
1213{
1b8d7ae4 1214 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1215}
1216
1217asmlinkage long sys_socket(int family, int type, int protocol)
1218{
1219 int retval;
1220 struct socket *sock;
a677a039
UD
1221 int flags;
1222
1223 flags = type & ~SOCK_TYPE_MASK;
1224 if (flags & ~SOCK_CLOEXEC)
1225 return -EINVAL;
1226 type &= SOCK_TYPE_MASK;
1da177e4
LT
1227
1228 retval = sock_create(family, type, protocol, &sock);
1229 if (retval < 0)
1230 goto out;
1231
a677a039 1232 retval = sock_map_fd(sock, flags & O_CLOEXEC);
1da177e4
LT
1233 if (retval < 0)
1234 goto out_release;
1235
1236out:
1237 /* It may be already another descriptor 8) Not kernel problem. */
1238 return retval;
1239
1240out_release:
1241 sock_release(sock);
1242 return retval;
1243}
1244
1245/*
1246 * Create a pair of connected sockets.
1247 */
1248
89bddce5
SH
1249asmlinkage long sys_socketpair(int family, int type, int protocol,
1250 int __user *usockvec)
1da177e4
LT
1251{
1252 struct socket *sock1, *sock2;
1253 int fd1, fd2, err;
db349509 1254 struct file *newfile1, *newfile2;
a677a039
UD
1255 int flags;
1256
1257 flags = type & ~SOCK_TYPE_MASK;
1258 if (flags & ~SOCK_CLOEXEC)
1259 return -EINVAL;
1260 type &= SOCK_TYPE_MASK;
1da177e4
LT
1261
1262 /*
1263 * Obtain the first socket and check if the underlying protocol
1264 * supports the socketpair call.
1265 */
1266
1267 err = sock_create(family, type, protocol, &sock1);
1268 if (err < 0)
1269 goto out;
1270
1271 err = sock_create(family, type, protocol, &sock2);
1272 if (err < 0)
1273 goto out_release_1;
1274
1275 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1276 if (err < 0)
1da177e4
LT
1277 goto out_release_both;
1278
a677a039 1279 fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
bf3c23d1
DM
1280 if (unlikely(fd1 < 0)) {
1281 err = fd1;
db349509 1282 goto out_release_both;
bf3c23d1 1283 }
1da177e4 1284
a677a039 1285 fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
db349509 1286 if (unlikely(fd2 < 0)) {
bf3c23d1 1287 err = fd2;
db349509
AV
1288 put_filp(newfile1);
1289 put_unused_fd(fd1);
1da177e4 1290 goto out_release_both;
db349509 1291 }
1da177e4 1292
db349509
AV
1293 err = sock_attach_fd(sock1, newfile1);
1294 if (unlikely(err < 0)) {
1295 goto out_fd2;
1296 }
1297
1298 err = sock_attach_fd(sock2, newfile2);
1299 if (unlikely(err < 0)) {
1300 fput(newfile1);
1301 goto out_fd1;
1302 }
1303
1304 err = audit_fd_pair(fd1, fd2);
1305 if (err < 0) {
1306 fput(newfile1);
1307 fput(newfile2);
1308 goto out_fd;
1309 }
1da177e4 1310
db349509
AV
1311 fd_install(fd1, newfile1);
1312 fd_install(fd2, newfile2);
1da177e4
LT
1313 /* fd1 and fd2 may be already another descriptors.
1314 * Not kernel problem.
1315 */
1316
89bddce5 1317 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1318 if (!err)
1319 err = put_user(fd2, &usockvec[1]);
1320 if (!err)
1321 return 0;
1322
1323 sys_close(fd2);
1324 sys_close(fd1);
1325 return err;
1326
1da177e4 1327out_release_both:
89bddce5 1328 sock_release(sock2);
1da177e4 1329out_release_1:
89bddce5 1330 sock_release(sock1);
1da177e4
LT
1331out:
1332 return err;
db349509
AV
1333
1334out_fd2:
1335 put_filp(newfile1);
1336 sock_release(sock1);
1337out_fd1:
1338 put_filp(newfile2);
1339 sock_release(sock2);
1340out_fd:
1341 put_unused_fd(fd1);
1342 put_unused_fd(fd2);
1343 goto out;
1da177e4
LT
1344}
1345
1da177e4
LT
1346/*
1347 * Bind a name to a socket. Nothing much to do here since it's
1348 * the protocol's responsibility to handle the local address.
1349 *
1350 * We move the socket address to kernel space before we call
1351 * the protocol layer (having also checked the address is ok).
1352 */
1353
1354asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1355{
1356 struct socket *sock;
230b1839 1357 struct sockaddr_storage address;
6cb153ca 1358 int err, fput_needed;
1da177e4 1359
89bddce5 1360 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1361 if (sock) {
230b1839 1362 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1363 if (err >= 0) {
1364 err = security_socket_bind(sock,
230b1839 1365 (struct sockaddr *)&address,
89bddce5 1366 addrlen);
6cb153ca
BL
1367 if (!err)
1368 err = sock->ops->bind(sock,
89bddce5 1369 (struct sockaddr *)
230b1839 1370 &address, addrlen);
1da177e4 1371 }
6cb153ca 1372 fput_light(sock->file, fput_needed);
89bddce5 1373 }
1da177e4
LT
1374 return err;
1375}
1376
1da177e4
LT
1377/*
1378 * Perform a listen. Basically, we allow the protocol to do anything
1379 * necessary for a listen, and if that works, we mark the socket as
1380 * ready for listening.
1381 */
1382
1da177e4
LT
1383asmlinkage long sys_listen(int fd, int backlog)
1384{
1385 struct socket *sock;
6cb153ca 1386 int err, fput_needed;
b8e1f9b5 1387 int somaxconn;
89bddce5
SH
1388
1389 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1390 if (sock) {
8efa6e93 1391 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1392 if ((unsigned)backlog > somaxconn)
1393 backlog = somaxconn;
1da177e4
LT
1394
1395 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1396 if (!err)
1397 err = sock->ops->listen(sock, backlog);
1da177e4 1398
6cb153ca 1399 fput_light(sock->file, fput_needed);
1da177e4
LT
1400 }
1401 return err;
1402}
1403
1da177e4
LT
1404/*
1405 * For accept, we attempt to create a new socket, set up the link
1406 * with the client, wake up the client, then return the new
1407 * connected fd. We collect the address of the connector in kernel
1408 * space and move it to user at the very end. This is unclean because
1409 * we open the socket then return an error.
1410 *
1411 * 1003.1g adds the ability to recvmsg() to query connection pending
1412 * status to recvmsg. We need to add that support in a way thats
1413 * clean when we restucture accept also.
1414 */
1415
89bddce5
SH
1416asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1417 int __user *upeer_addrlen)
1da177e4
LT
1418{
1419 struct socket *sock, *newsock;
39d8c1b6 1420 struct file *newfile;
6cb153ca 1421 int err, len, newfd, fput_needed;
230b1839 1422 struct sockaddr_storage address;
1da177e4 1423
6cb153ca 1424 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1425 if (!sock)
1426 goto out;
1427
1428 err = -ENFILE;
89bddce5 1429 if (!(newsock = sock_alloc()))
1da177e4
LT
1430 goto out_put;
1431
1432 newsock->type = sock->type;
1433 newsock->ops = sock->ops;
1434
1da177e4
LT
1435 /*
1436 * We don't need try_module_get here, as the listening socket (sock)
1437 * has the protocol module (sock->ops->owner) held.
1438 */
1439 __module_get(newsock->ops->owner);
1440
a677a039 1441 newfd = sock_alloc_fd(&newfile, 0);
39d8c1b6
DM
1442 if (unlikely(newfd < 0)) {
1443 err = newfd;
9a1875e6
DM
1444 sock_release(newsock);
1445 goto out_put;
39d8c1b6
DM
1446 }
1447
1448 err = sock_attach_fd(newsock, newfile);
1449 if (err < 0)
79f4f642 1450 goto out_fd_simple;
39d8c1b6 1451
a79af59e
FF
1452 err = security_socket_accept(sock, newsock);
1453 if (err)
39d8c1b6 1454 goto out_fd;
a79af59e 1455
1da177e4
LT
1456 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1457 if (err < 0)
39d8c1b6 1458 goto out_fd;
1da177e4
LT
1459
1460 if (upeer_sockaddr) {
230b1839 1461 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1462 &len, 2) < 0) {
1da177e4 1463 err = -ECONNABORTED;
39d8c1b6 1464 goto out_fd;
1da177e4 1465 }
230b1839
YH
1466 err = move_addr_to_user((struct sockaddr *)&address,
1467 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1468 if (err < 0)
39d8c1b6 1469 goto out_fd;
1da177e4
LT
1470 }
1471
1472 /* File flags are not inherited via accept() unlike another OSes. */
1473
39d8c1b6
DM
1474 fd_install(newfd, newfile);
1475 err = newfd;
1da177e4
LT
1476
1477 security_socket_post_accept(sock, newsock);
1478
1479out_put:
6cb153ca 1480 fput_light(sock->file, fput_needed);
1da177e4
LT
1481out:
1482 return err;
79f4f642
AD
1483out_fd_simple:
1484 sock_release(newsock);
1485 put_filp(newfile);
1486 put_unused_fd(newfd);
1487 goto out_put;
39d8c1b6 1488out_fd:
9606a216 1489 fput(newfile);
39d8c1b6 1490 put_unused_fd(newfd);
1da177e4
LT
1491 goto out_put;
1492}
1493
1da177e4
LT
1494/*
1495 * Attempt to connect to a socket with the server address. The address
1496 * is in user space so we verify it is OK and move it to kernel space.
1497 *
1498 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1499 * break bindings
1500 *
1501 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1502 * other SEQPACKET protocols that take time to connect() as it doesn't
1503 * include the -EINPROGRESS status for such sockets.
1504 */
1505
89bddce5
SH
1506asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1507 int addrlen)
1da177e4
LT
1508{
1509 struct socket *sock;
230b1839 1510 struct sockaddr_storage address;
6cb153ca 1511 int err, fput_needed;
1da177e4 1512
6cb153ca 1513 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1514 if (!sock)
1515 goto out;
230b1839 1516 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1517 if (err < 0)
1518 goto out_put;
1519
89bddce5 1520 err =
230b1839 1521 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1522 if (err)
1523 goto out_put;
1524
230b1839 1525 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1526 sock->file->f_flags);
1527out_put:
6cb153ca 1528 fput_light(sock->file, fput_needed);
1da177e4
LT
1529out:
1530 return err;
1531}
1532
1533/*
1534 * Get the local address ('name') of a socket object. Move the obtained
1535 * name to user space.
1536 */
1537
89bddce5
SH
1538asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1539 int __user *usockaddr_len)
1da177e4
LT
1540{
1541 struct socket *sock;
230b1839 1542 struct sockaddr_storage address;
6cb153ca 1543 int len, err, fput_needed;
89bddce5 1544
6cb153ca 1545 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1546 if (!sock)
1547 goto out;
1548
1549 err = security_socket_getsockname(sock);
1550 if (err)
1551 goto out_put;
1552
230b1839 1553 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1554 if (err)
1555 goto out_put;
230b1839 1556 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1557
1558out_put:
6cb153ca 1559 fput_light(sock->file, fput_needed);
1da177e4
LT
1560out:
1561 return err;
1562}
1563
1564/*
1565 * Get the remote address ('name') of a socket object. Move the obtained
1566 * name to user space.
1567 */
1568
89bddce5
SH
1569asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1570 int __user *usockaddr_len)
1da177e4
LT
1571{
1572 struct socket *sock;
230b1839 1573 struct sockaddr_storage address;
6cb153ca 1574 int len, err, fput_needed;
1da177e4 1575
89bddce5
SH
1576 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1577 if (sock != NULL) {
1da177e4
LT
1578 err = security_socket_getpeername(sock);
1579 if (err) {
6cb153ca 1580 fput_light(sock->file, fput_needed);
1da177e4
LT
1581 return err;
1582 }
1583
89bddce5 1584 err =
230b1839 1585 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1586 1);
1da177e4 1587 if (!err)
230b1839 1588 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1589 usockaddr_len);
6cb153ca 1590 fput_light(sock->file, fput_needed);
1da177e4
LT
1591 }
1592 return err;
1593}
1594
1595/*
1596 * Send a datagram to a given address. We move the address into kernel
1597 * space and check the user space data area is readable before invoking
1598 * the protocol.
1599 */
1600
89bddce5
SH
1601asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1602 unsigned flags, struct sockaddr __user *addr,
1603 int addr_len)
1da177e4
LT
1604{
1605 struct socket *sock;
230b1839 1606 struct sockaddr_storage address;
1da177e4
LT
1607 int err;
1608 struct msghdr msg;
1609 struct iovec iov;
6cb153ca 1610 int fput_needed;
6cb153ca 1611
de0fa95c
PE
1612 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1613 if (!sock)
4387ff75 1614 goto out;
6cb153ca 1615
89bddce5
SH
1616 iov.iov_base = buff;
1617 iov.iov_len = len;
1618 msg.msg_name = NULL;
1619 msg.msg_iov = &iov;
1620 msg.msg_iovlen = 1;
1621 msg.msg_control = NULL;
1622 msg.msg_controllen = 0;
1623 msg.msg_namelen = 0;
6cb153ca 1624 if (addr) {
230b1839 1625 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1626 if (err < 0)
1627 goto out_put;
230b1839 1628 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1629 msg.msg_namelen = addr_len;
1da177e4
LT
1630 }
1631 if (sock->file->f_flags & O_NONBLOCK)
1632 flags |= MSG_DONTWAIT;
1633 msg.msg_flags = flags;
1634 err = sock_sendmsg(sock, &msg, len);
1635
89bddce5 1636out_put:
de0fa95c 1637 fput_light(sock->file, fput_needed);
4387ff75 1638out:
1da177e4
LT
1639 return err;
1640}
1641
1642/*
89bddce5 1643 * Send a datagram down a socket.
1da177e4
LT
1644 */
1645
89bddce5 1646asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1647{
1648 return sys_sendto(fd, buff, len, flags, NULL, 0);
1649}
1650
1651/*
89bddce5 1652 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1653 * sender. We verify the buffers are writable and if needed move the
1654 * sender address from kernel to user space.
1655 */
1656
89bddce5
SH
1657asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1658 unsigned flags, struct sockaddr __user *addr,
1659 int __user *addr_len)
1da177e4
LT
1660{
1661 struct socket *sock;
1662 struct iovec iov;
1663 struct msghdr msg;
230b1839 1664 struct sockaddr_storage address;
89bddce5 1665 int err, err2;
6cb153ca
BL
1666 int fput_needed;
1667
de0fa95c 1668 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1669 if (!sock)
de0fa95c 1670 goto out;
1da177e4 1671
89bddce5
SH
1672 msg.msg_control = NULL;
1673 msg.msg_controllen = 0;
1674 msg.msg_iovlen = 1;
1675 msg.msg_iov = &iov;
1676 iov.iov_len = size;
1677 iov.iov_base = ubuf;
230b1839
YH
1678 msg.msg_name = (struct sockaddr *)&address;
1679 msg.msg_namelen = sizeof(address);
1da177e4
LT
1680 if (sock->file->f_flags & O_NONBLOCK)
1681 flags |= MSG_DONTWAIT;
89bddce5 1682 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1683
89bddce5 1684 if (err >= 0 && addr != NULL) {
230b1839
YH
1685 err2 = move_addr_to_user((struct sockaddr *)&address,
1686 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1687 if (err2 < 0)
1688 err = err2;
1da177e4 1689 }
de0fa95c
PE
1690
1691 fput_light(sock->file, fput_needed);
4387ff75 1692out:
1da177e4
LT
1693 return err;
1694}
1695
1696/*
89bddce5 1697 * Receive a datagram from a socket.
1da177e4
LT
1698 */
1699
89bddce5
SH
1700asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1701 unsigned flags)
1da177e4
LT
1702{
1703 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1704}
1705
1706/*
1707 * Set a socket option. Because we don't know the option lengths we have
1708 * to pass the user mode parameter for the protocols to sort out.
1709 */
1710
89bddce5
SH
1711asmlinkage long sys_setsockopt(int fd, int level, int optname,
1712 char __user *optval, int optlen)
1da177e4 1713{
6cb153ca 1714 int err, fput_needed;
1da177e4
LT
1715 struct socket *sock;
1716
1717 if (optlen < 0)
1718 return -EINVAL;
89bddce5
SH
1719
1720 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1721 if (sock != NULL) {
1722 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1723 if (err)
1724 goto out_put;
1da177e4
LT
1725
1726 if (level == SOL_SOCKET)
89bddce5
SH
1727 err =
1728 sock_setsockopt(sock, level, optname, optval,
1729 optlen);
1da177e4 1730 else
89bddce5
SH
1731 err =
1732 sock->ops->setsockopt(sock, level, optname, optval,
1733 optlen);
6cb153ca
BL
1734out_put:
1735 fput_light(sock->file, fput_needed);
1da177e4
LT
1736 }
1737 return err;
1738}
1739
1740/*
1741 * Get a socket option. Because we don't know the option lengths we have
1742 * to pass a user mode parameter for the protocols to sort out.
1743 */
1744
89bddce5
SH
1745asmlinkage long sys_getsockopt(int fd, int level, int optname,
1746 char __user *optval, int __user *optlen)
1da177e4 1747{
6cb153ca 1748 int err, fput_needed;
1da177e4
LT
1749 struct socket *sock;
1750
89bddce5
SH
1751 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1752 if (sock != NULL) {
6cb153ca
BL
1753 err = security_socket_getsockopt(sock, level, optname);
1754 if (err)
1755 goto out_put;
1da177e4
LT
1756
1757 if (level == SOL_SOCKET)
89bddce5
SH
1758 err =
1759 sock_getsockopt(sock, level, optname, optval,
1760 optlen);
1da177e4 1761 else
89bddce5
SH
1762 err =
1763 sock->ops->getsockopt(sock, level, optname, optval,
1764 optlen);
6cb153ca
BL
1765out_put:
1766 fput_light(sock->file, fput_needed);
1da177e4
LT
1767 }
1768 return err;
1769}
1770
1da177e4
LT
1771/*
1772 * Shutdown a socket.
1773 */
1774
1775asmlinkage long sys_shutdown(int fd, int how)
1776{
6cb153ca 1777 int err, fput_needed;
1da177e4
LT
1778 struct socket *sock;
1779
89bddce5
SH
1780 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1781 if (sock != NULL) {
1da177e4 1782 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1783 if (!err)
1784 err = sock->ops->shutdown(sock, how);
1785 fput_light(sock->file, fput_needed);
1da177e4
LT
1786 }
1787 return err;
1788}
1789
89bddce5 1790/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1791 * fields which are the same type (int / unsigned) on our platforms.
1792 */
1793#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1794#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1795#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1796
1da177e4
LT
1797/*
1798 * BSD sendmsg interface
1799 */
1800
1801asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1802{
89bddce5
SH
1803 struct compat_msghdr __user *msg_compat =
1804 (struct compat_msghdr __user *)msg;
1da177e4 1805 struct socket *sock;
230b1839 1806 struct sockaddr_storage address;
1da177e4 1807 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1808 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1809 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1810 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1811 unsigned char *ctl_buf = ctl;
1812 struct msghdr msg_sys;
1813 int err, ctl_len, iov_size, total_len;
6cb153ca 1814 int fput_needed;
89bddce5 1815
1da177e4
LT
1816 err = -EFAULT;
1817 if (MSG_CMSG_COMPAT & flags) {
1818 if (get_compat_msghdr(&msg_sys, msg_compat))
1819 return -EFAULT;
89bddce5
SH
1820 }
1821 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1822 return -EFAULT;
1823
6cb153ca 1824 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1825 if (!sock)
1da177e4
LT
1826 goto out;
1827
1828 /* do not move before msg_sys is valid */
1829 err = -EMSGSIZE;
1830 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1831 goto out_put;
1832
89bddce5 1833 /* Check whether to allocate the iovec area */
1da177e4
LT
1834 err = -ENOMEM;
1835 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1836 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1837 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1838 if (!iov)
1839 goto out_put;
1840 }
1841
1842 /* This will also move the address data into kernel space */
1843 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1844 err = verify_compat_iovec(&msg_sys, iov,
1845 (struct sockaddr *)&address,
1846 VERIFY_READ);
1da177e4 1847 } else
230b1839
YH
1848 err = verify_iovec(&msg_sys, iov,
1849 (struct sockaddr *)&address,
1850 VERIFY_READ);
89bddce5 1851 if (err < 0)
1da177e4
LT
1852 goto out_freeiov;
1853 total_len = err;
1854
1855 err = -ENOBUFS;
1856
1857 if (msg_sys.msg_controllen > INT_MAX)
1858 goto out_freeiov;
89bddce5 1859 ctl_len = msg_sys.msg_controllen;
1da177e4 1860 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1861 err =
1862 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1863 sizeof(ctl));
1da177e4
LT
1864 if (err)
1865 goto out_freeiov;
1866 ctl_buf = msg_sys.msg_control;
8920e8f9 1867 ctl_len = msg_sys.msg_controllen;
1da177e4 1868 } else if (ctl_len) {
89bddce5 1869 if (ctl_len > sizeof(ctl)) {
1da177e4 1870 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1871 if (ctl_buf == NULL)
1da177e4
LT
1872 goto out_freeiov;
1873 }
1874 err = -EFAULT;
1875 /*
1876 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1877 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1878 * checking falls down on this.
1879 */
89bddce5
SH
1880 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1881 ctl_len))
1da177e4
LT
1882 goto out_freectl;
1883 msg_sys.msg_control = ctl_buf;
1884 }
1885 msg_sys.msg_flags = flags;
1886
1887 if (sock->file->f_flags & O_NONBLOCK)
1888 msg_sys.msg_flags |= MSG_DONTWAIT;
1889 err = sock_sendmsg(sock, &msg_sys, total_len);
1890
1891out_freectl:
89bddce5 1892 if (ctl_buf != ctl)
1da177e4
LT
1893 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1894out_freeiov:
1895 if (iov != iovstack)
1896 sock_kfree_s(sock->sk, iov, iov_size);
1897out_put:
6cb153ca 1898 fput_light(sock->file, fput_needed);
89bddce5 1899out:
1da177e4
LT
1900 return err;
1901}
1902
1903/*
1904 * BSD recvmsg interface
1905 */
1906
89bddce5
SH
1907asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1908 unsigned int flags)
1da177e4 1909{
89bddce5
SH
1910 struct compat_msghdr __user *msg_compat =
1911 (struct compat_msghdr __user *)msg;
1da177e4
LT
1912 struct socket *sock;
1913 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1914 struct iovec *iov = iovstack;
1da177e4
LT
1915 struct msghdr msg_sys;
1916 unsigned long cmsg_ptr;
1917 int err, iov_size, total_len, len;
6cb153ca 1918 int fput_needed;
1da177e4
LT
1919
1920 /* kernel mode address */
230b1839 1921 struct sockaddr_storage addr;
1da177e4
LT
1922
1923 /* user mode address pointers */
1924 struct sockaddr __user *uaddr;
1925 int __user *uaddr_len;
89bddce5 1926
1da177e4
LT
1927 if (MSG_CMSG_COMPAT & flags) {
1928 if (get_compat_msghdr(&msg_sys, msg_compat))
1929 return -EFAULT;
89bddce5
SH
1930 }
1931 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1932 return -EFAULT;
1da177e4 1933
6cb153ca 1934 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1935 if (!sock)
1936 goto out;
1937
1938 err = -EMSGSIZE;
1939 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1940 goto out_put;
89bddce5
SH
1941
1942 /* Check whether to allocate the iovec area */
1da177e4
LT
1943 err = -ENOMEM;
1944 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1945 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1946 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1947 if (!iov)
1948 goto out_put;
1949 }
1950
1951 /*
89bddce5
SH
1952 * Save the user-mode address (verify_iovec will change the
1953 * kernel msghdr to use the kernel address space)
1da177e4 1954 */
89bddce5 1955
cfcabdcc 1956 uaddr = (__force void __user *)msg_sys.msg_name;
1da177e4
LT
1957 uaddr_len = COMPAT_NAMELEN(msg);
1958 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1959 err = verify_compat_iovec(&msg_sys, iov,
1960 (struct sockaddr *)&addr,
1961 VERIFY_WRITE);
1da177e4 1962 } else
230b1839
YH
1963 err = verify_iovec(&msg_sys, iov,
1964 (struct sockaddr *)&addr,
1965 VERIFY_WRITE);
1da177e4
LT
1966 if (err < 0)
1967 goto out_freeiov;
89bddce5 1968 total_len = err;
1da177e4
LT
1969
1970 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1971 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1972
1da177e4
LT
1973 if (sock->file->f_flags & O_NONBLOCK)
1974 flags |= MSG_DONTWAIT;
1975 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1976 if (err < 0)
1977 goto out_freeiov;
1978 len = err;
1979
1980 if (uaddr != NULL) {
230b1839
YH
1981 err = move_addr_to_user((struct sockaddr *)&addr,
1982 msg_sys.msg_namelen, uaddr,
89bddce5 1983 uaddr_len);
1da177e4
LT
1984 if (err < 0)
1985 goto out_freeiov;
1986 }
37f7f421
DM
1987 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1988 COMPAT_FLAGS(msg));
1da177e4
LT
1989 if (err)
1990 goto out_freeiov;
1991 if (MSG_CMSG_COMPAT & flags)
89bddce5 1992 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1993 &msg_compat->msg_controllen);
1994 else
89bddce5 1995 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1996 &msg->msg_controllen);
1997 if (err)
1998 goto out_freeiov;
1999 err = len;
2000
2001out_freeiov:
2002 if (iov != iovstack)
2003 sock_kfree_s(sock->sk, iov, iov_size);
2004out_put:
6cb153ca 2005 fput_light(sock->file, fput_needed);
1da177e4
LT
2006out:
2007 return err;
2008}
2009
2010#ifdef __ARCH_WANT_SYS_SOCKETCALL
2011
2012/* Argument list sizes for sys_socketcall */
2013#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
2014static const unsigned char nargs[18]={
2015 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2016 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
2017 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
2018};
2019
1da177e4
LT
2020#undef AL
2021
2022/*
89bddce5 2023 * System call vectors.
1da177e4
LT
2024 *
2025 * Argument checking cleaned up. Saved 20% in size.
2026 * This function doesn't need to set the kernel lock because
89bddce5 2027 * it is set by the callees.
1da177e4
LT
2028 */
2029
2030asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2031{
2032 unsigned long a[6];
89bddce5 2033 unsigned long a0, a1;
1da177e4
LT
2034 int err;
2035
89bddce5 2036 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2037 return -EINVAL;
2038
2039 /* copy_from_user should be SMP safe. */
2040 if (copy_from_user(a, args, nargs[call]))
2041 return -EFAULT;
3ec3b2fb 2042
89bddce5 2043 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2044 if (err)
2045 return err;
2046
89bddce5
SH
2047 a0 = a[0];
2048 a1 = a[1];
2049
2050 switch (call) {
2051 case SYS_SOCKET:
2052 err = sys_socket(a0, a1, a[2]);
2053 break;
2054 case SYS_BIND:
2055 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2056 break;
2057 case SYS_CONNECT:
2058 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2059 break;
2060 case SYS_LISTEN:
2061 err = sys_listen(a0, a1);
2062 break;
2063 case SYS_ACCEPT:
2064 err =
2065 sys_accept(a0, (struct sockaddr __user *)a1,
2066 (int __user *)a[2]);
2067 break;
2068 case SYS_GETSOCKNAME:
2069 err =
2070 sys_getsockname(a0, (struct sockaddr __user *)a1,
2071 (int __user *)a[2]);
2072 break;
2073 case SYS_GETPEERNAME:
2074 err =
2075 sys_getpeername(a0, (struct sockaddr __user *)a1,
2076 (int __user *)a[2]);
2077 break;
2078 case SYS_SOCKETPAIR:
2079 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2080 break;
2081 case SYS_SEND:
2082 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2083 break;
2084 case SYS_SENDTO:
2085 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2086 (struct sockaddr __user *)a[4], a[5]);
2087 break;
2088 case SYS_RECV:
2089 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2090 break;
2091 case SYS_RECVFROM:
2092 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2093 (struct sockaddr __user *)a[4],
2094 (int __user *)a[5]);
2095 break;
2096 case SYS_SHUTDOWN:
2097 err = sys_shutdown(a0, a1);
2098 break;
2099 case SYS_SETSOCKOPT:
2100 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2101 break;
2102 case SYS_GETSOCKOPT:
2103 err =
2104 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2105 (int __user *)a[4]);
2106 break;
2107 case SYS_SENDMSG:
2108 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2109 break;
2110 case SYS_RECVMSG:
2111 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2112 break;
2113 default:
2114 err = -EINVAL;
2115 break;
1da177e4
LT
2116 }
2117 return err;
2118}
2119
89bddce5 2120#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2121
55737fda
SH
2122/**
2123 * sock_register - add a socket protocol handler
2124 * @ops: description of protocol
2125 *
1da177e4
LT
2126 * This function is called by a protocol handler that wants to
2127 * advertise its address family, and have it linked into the
55737fda
SH
2128 * socket interface. The value ops->family coresponds to the
2129 * socket system call protocol family.
1da177e4 2130 */
f0fd27d4 2131int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2132{
2133 int err;
2134
2135 if (ops->family >= NPROTO) {
89bddce5
SH
2136 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2137 NPROTO);
1da177e4
LT
2138 return -ENOBUFS;
2139 }
55737fda
SH
2140
2141 spin_lock(&net_family_lock);
2142 if (net_families[ops->family])
2143 err = -EEXIST;
2144 else {
89bddce5 2145 net_families[ops->family] = ops;
1da177e4
LT
2146 err = 0;
2147 }
55737fda
SH
2148 spin_unlock(&net_family_lock);
2149
89bddce5 2150 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2151 return err;
2152}
2153
55737fda
SH
2154/**
2155 * sock_unregister - remove a protocol handler
2156 * @family: protocol family to remove
2157 *
1da177e4
LT
2158 * This function is called by a protocol handler that wants to
2159 * remove its address family, and have it unlinked from the
55737fda
SH
2160 * new socket creation.
2161 *
2162 * If protocol handler is a module, then it can use module reference
2163 * counts to protect against new references. If protocol handler is not
2164 * a module then it needs to provide its own protection in
2165 * the ops->create routine.
1da177e4 2166 */
f0fd27d4 2167void sock_unregister(int family)
1da177e4 2168{
f0fd27d4 2169 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2170
55737fda 2171 spin_lock(&net_family_lock);
89bddce5 2172 net_families[family] = NULL;
55737fda
SH
2173 spin_unlock(&net_family_lock);
2174
2175 synchronize_rcu();
2176
89bddce5 2177 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2178}
2179
77d76ea3 2180static int __init sock_init(void)
1da177e4
LT
2181{
2182 /*
89bddce5 2183 * Initialize sock SLAB cache.
1da177e4 2184 */
89bddce5 2185
1da177e4
LT
2186 sk_init();
2187
1da177e4 2188 /*
89bddce5 2189 * Initialize skbuff SLAB cache
1da177e4
LT
2190 */
2191 skb_init();
1da177e4
LT
2192
2193 /*
89bddce5 2194 * Initialize the protocols module.
1da177e4
LT
2195 */
2196
2197 init_inodecache();
2198 register_filesystem(&sock_fs_type);
2199 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2200
2201 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2202 */
2203
2204#ifdef CONFIG_NETFILTER
2205 netfilter_init();
2206#endif
cbeb321a
DM
2207
2208 return 0;
1da177e4
LT
2209}
2210
77d76ea3
AK
2211core_initcall(sock_init); /* early initcall */
2212
1da177e4
LT
2213#ifdef CONFIG_PROC_FS
2214void socket_seq_show(struct seq_file *seq)
2215{
2216 int cpu;
2217 int counter = 0;
2218
6f912042 2219 for_each_possible_cpu(cpu)
89bddce5 2220 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2221
2222 /* It can be negative, by the way. 8) */
2223 if (counter < 0)
2224 counter = 0;
2225
2226 seq_printf(seq, "sockets: used %d\n", counter);
2227}
89bddce5 2228#endif /* CONFIG_PROC_FS */
1da177e4 2229
89bbfc95
SP
2230#ifdef CONFIG_COMPAT
2231static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2232 unsigned long arg)
89bbfc95
SP
2233{
2234 struct socket *sock = file->private_data;
2235 int ret = -ENOIOCTLCMD;
87de87d5
DM
2236 struct sock *sk;
2237 struct net *net;
2238
2239 sk = sock->sk;
2240 net = sock_net(sk);
89bbfc95
SP
2241
2242 if (sock->ops->compat_ioctl)
2243 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2244
87de87d5
DM
2245 if (ret == -ENOIOCTLCMD &&
2246 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2247 ret = compat_wext_handle_ioctl(net, cmd, arg);
2248
89bbfc95
SP
2249 return ret;
2250}
2251#endif
2252
ac5a488e
SS
2253int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2254{
2255 return sock->ops->bind(sock, addr, addrlen);
2256}
2257
2258int kernel_listen(struct socket *sock, int backlog)
2259{
2260 return sock->ops->listen(sock, backlog);
2261}
2262
2263int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2264{
2265 struct sock *sk = sock->sk;
2266 int err;
2267
2268 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2269 newsock);
2270 if (err < 0)
2271 goto done;
2272
2273 err = sock->ops->accept(sock, *newsock, flags);
2274 if (err < 0) {
2275 sock_release(*newsock);
fa8705b0 2276 *newsock = NULL;
ac5a488e
SS
2277 goto done;
2278 }
2279
2280 (*newsock)->ops = sock->ops;
2281
2282done:
2283 return err;
2284}
2285
2286int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2287 int flags)
ac5a488e
SS
2288{
2289 return sock->ops->connect(sock, addr, addrlen, flags);
2290}
2291
2292int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2293 int *addrlen)
2294{
2295 return sock->ops->getname(sock, addr, addrlen, 0);
2296}
2297
2298int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2299 int *addrlen)
2300{
2301 return sock->ops->getname(sock, addr, addrlen, 1);
2302}
2303
2304int kernel_getsockopt(struct socket *sock, int level, int optname,
2305 char *optval, int *optlen)
2306{
2307 mm_segment_t oldfs = get_fs();
2308 int err;
2309
2310 set_fs(KERNEL_DS);
2311 if (level == SOL_SOCKET)
2312 err = sock_getsockopt(sock, level, optname, optval, optlen);
2313 else
2314 err = sock->ops->getsockopt(sock, level, optname, optval,
2315 optlen);
2316 set_fs(oldfs);
2317 return err;
2318}
2319
2320int kernel_setsockopt(struct socket *sock, int level, int optname,
2321 char *optval, int optlen)
2322{
2323 mm_segment_t oldfs = get_fs();
2324 int err;
2325
2326 set_fs(KERNEL_DS);
2327 if (level == SOL_SOCKET)
2328 err = sock_setsockopt(sock, level, optname, optval, optlen);
2329 else
2330 err = sock->ops->setsockopt(sock, level, optname, optval,
2331 optlen);
2332 set_fs(oldfs);
2333 return err;
2334}
2335
2336int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2337 size_t size, int flags)
2338{
2339 if (sock->ops->sendpage)
2340 return sock->ops->sendpage(sock, page, offset, size, flags);
2341
2342 return sock_no_sendpage(sock, page, offset, size, flags);
2343}
2344
2345int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2346{
2347 mm_segment_t oldfs = get_fs();
2348 int err;
2349
2350 set_fs(KERNEL_DS);
2351 err = sock->ops->ioctl(sock, cmd, arg);
2352 set_fs(oldfs);
2353
2354 return err;
2355}
2356
91cf45f0
TM
2357int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2358{
2359 return sock->ops->shutdown(sock, how);
2360}
2361
1da177e4
LT
2362EXPORT_SYMBOL(sock_create);
2363EXPORT_SYMBOL(sock_create_kern);
2364EXPORT_SYMBOL(sock_create_lite);
2365EXPORT_SYMBOL(sock_map_fd);
2366EXPORT_SYMBOL(sock_recvmsg);
2367EXPORT_SYMBOL(sock_register);
2368EXPORT_SYMBOL(sock_release);
2369EXPORT_SYMBOL(sock_sendmsg);
2370EXPORT_SYMBOL(sock_unregister);
2371EXPORT_SYMBOL(sock_wake_async);
2372EXPORT_SYMBOL(sockfd_lookup);
2373EXPORT_SYMBOL(kernel_sendmsg);
2374EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2375EXPORT_SYMBOL(kernel_bind);
2376EXPORT_SYMBOL(kernel_listen);
2377EXPORT_SYMBOL(kernel_accept);
2378EXPORT_SYMBOL(kernel_connect);
2379EXPORT_SYMBOL(kernel_getsockname);
2380EXPORT_SYMBOL(kernel_getpeername);
2381EXPORT_SYMBOL(kernel_getsockopt);
2382EXPORT_SYMBOL(kernel_setsockopt);
2383EXPORT_SYMBOL(kernel_sendpage);
2384EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2385EXPORT_SYMBOL(kernel_sock_shutdown);