[NET]: Support multiple network namespaces with netlink
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
115
1da177e4
LT
116/*
117 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
118 * in the operation structures but are done directly via the socketcall() multiplexor.
119 */
120
da7071d7 121static const struct file_operations socket_file_ops = {
1da177e4
LT
122 .owner = THIS_MODULE,
123 .llseek = no_llseek,
124 .aio_read = sock_aio_read,
125 .aio_write = sock_aio_write,
126 .poll = sock_poll,
127 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
128#ifdef CONFIG_COMPAT
129 .compat_ioctl = compat_sock_ioctl,
130#endif
1da177e4
LT
131 .mmap = sock_mmap,
132 .open = sock_no_open, /* special open code to disallow open via /proc */
133 .release = sock_close,
134 .fasync = sock_fasync,
5274f052
JA
135 .sendpage = sock_sendpage,
136 .splice_write = generic_splice_sendpage,
1da177e4
LT
137};
138
139/*
140 * The protocol list. Each protocol is registered in here.
141 */
142
1da177e4 143static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 144static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 145
1da177e4
LT
146/*
147 * Statistics counters of the socket lists
148 */
149
150static DEFINE_PER_CPU(int, sockets_in_use) = 0;
151
152/*
89bddce5
SH
153 * Support routines.
154 * Move socket addresses back and forth across the kernel/user
155 * divide and look after the messy bits.
1da177e4
LT
156 */
157
89bddce5 158#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
159 16 for IP, 16 for IPX,
160 24 for IPv6,
89bddce5 161 about 80 for AX.25
1da177e4
LT
162 must be at least one bigger than
163 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 164 :unix_mkname()).
1da177e4 165 */
89bddce5 166
1da177e4
LT
167/**
168 * move_addr_to_kernel - copy a socket address into kernel space
169 * @uaddr: Address in user space
170 * @kaddr: Address in kernel space
171 * @ulen: Length in user space
172 *
173 * The address is copied into kernel space. If the provided address is
174 * too long an error code of -EINVAL is returned. If the copy gives
175 * invalid addresses -EFAULT is returned. On a success 0 is returned.
176 */
177
178int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
179{
89bddce5 180 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 181 return -EINVAL;
89bddce5 182 if (ulen == 0)
1da177e4 183 return 0;
89bddce5 184 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 185 return -EFAULT;
3ec3b2fb 186 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
187}
188
189/**
190 * move_addr_to_user - copy an address to user space
191 * @kaddr: kernel space address
192 * @klen: length of address in kernel
193 * @uaddr: user space address
194 * @ulen: pointer to user length field
195 *
196 * The value pointed to by ulen on entry is the buffer length available.
197 * This is overwritten with the buffer space used. -EINVAL is returned
198 * if an overlong buffer is specified or a negative buffer size. -EFAULT
199 * is returned if either the buffer or the length field are not
200 * accessible.
201 * After copying the data up to the limit the user specifies, the true
202 * length of the data is written over the length limit the user
203 * specified. Zero is returned for a success.
204 */
89bddce5
SH
205
206int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
207 int __user *ulen)
1da177e4
LT
208{
209 int err;
210 int len;
211
89bddce5
SH
212 err = get_user(len, ulen);
213 if (err)
1da177e4 214 return err;
89bddce5
SH
215 if (len > klen)
216 len = klen;
217 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 218 return -EINVAL;
89bddce5 219 if (len) {
d6fe3945
SG
220 if (audit_sockaddr(klen, kaddr))
221 return -ENOMEM;
89bddce5 222 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
223 return -EFAULT;
224 }
225 /*
89bddce5
SH
226 * "fromlen shall refer to the value before truncation.."
227 * 1003.1g
1da177e4
LT
228 */
229 return __put_user(klen, ulen);
230}
231
232#define SOCKFS_MAGIC 0x534F434B
233
e18b890b 234static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
235
236static struct inode *sock_alloc_inode(struct super_block *sb)
237{
238 struct socket_alloc *ei;
89bddce5 239
e94b1766 240 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
241 if (!ei)
242 return NULL;
243 init_waitqueue_head(&ei->socket.wait);
89bddce5 244
1da177e4
LT
245 ei->socket.fasync_list = NULL;
246 ei->socket.state = SS_UNCONNECTED;
247 ei->socket.flags = 0;
248 ei->socket.ops = NULL;
249 ei->socket.sk = NULL;
250 ei->socket.file = NULL;
1da177e4
LT
251
252 return &ei->vfs_inode;
253}
254
255static void sock_destroy_inode(struct inode *inode)
256{
257 kmem_cache_free(sock_inode_cachep,
258 container_of(inode, struct socket_alloc, vfs_inode));
259}
260
e18b890b 261static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 262{
89bddce5 263 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 264
a35afb83 265 inode_init_once(&ei->vfs_inode);
1da177e4 266}
89bddce5 267
1da177e4
LT
268static int init_inodecache(void)
269{
270 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
271 sizeof(struct socket_alloc),
272 0,
273 (SLAB_HWCACHE_ALIGN |
274 SLAB_RECLAIM_ACCOUNT |
275 SLAB_MEM_SPREAD),
20c2df83 276 init_once);
1da177e4
LT
277 if (sock_inode_cachep == NULL)
278 return -ENOMEM;
279 return 0;
280}
281
282static struct super_operations sockfs_ops = {
283 .alloc_inode = sock_alloc_inode,
284 .destroy_inode =sock_destroy_inode,
285 .statfs = simple_statfs,
286};
287
454e2398 288static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
289 int flags, const char *dev_name, void *data,
290 struct vfsmount *mnt)
1da177e4 291{
454e2398
DH
292 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
293 mnt);
1da177e4
LT
294}
295
ba89966c 296static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
297
298static struct file_system_type sock_fs_type = {
299 .name = "sockfs",
300 .get_sb = sockfs_get_sb,
301 .kill_sb = kill_anon_super,
302};
89bddce5 303
1da177e4
LT
304static int sockfs_delete_dentry(struct dentry *dentry)
305{
304e61e6
ED
306 /*
307 * At creation time, we pretended this dentry was hashed
308 * (by clearing DCACHE_UNHASHED bit in d_flags)
309 * At delete time, we restore the truth : not hashed.
310 * (so that dput() can proceed correctly)
311 */
312 dentry->d_flags |= DCACHE_UNHASHED;
313 return 0;
1da177e4 314}
c23fbb6b
ED
315
316/*
317 * sockfs_dname() is called from d_path().
318 */
319static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
320{
321 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
322 dentry->d_inode->i_ino);
323}
324
1da177e4 325static struct dentry_operations sockfs_dentry_operations = {
89bddce5 326 .d_delete = sockfs_delete_dentry,
c23fbb6b 327 .d_dname = sockfs_dname,
1da177e4
LT
328};
329
330/*
331 * Obtains the first available file descriptor and sets it up for use.
332 *
39d8c1b6
DM
333 * These functions create file structures and maps them to fd space
334 * of the current process. On success it returns file descriptor
1da177e4
LT
335 * and file struct implicitly stored in sock->file.
336 * Note that another thread may close file descriptor before we return
337 * from this function. We use the fact that now we do not refer
338 * to socket after mapping. If one day we will need it, this
339 * function will increment ref. count on file by 1.
340 *
341 * In any case returned fd MAY BE not valid!
342 * This race condition is unavoidable
343 * with shared fd spaces, we cannot solve it inside kernel,
344 * but we take care of internal coherence yet.
345 */
346
39d8c1b6 347static int sock_alloc_fd(struct file **filep)
1da177e4
LT
348{
349 int fd;
1da177e4
LT
350
351 fd = get_unused_fd();
39d8c1b6 352 if (likely(fd >= 0)) {
1da177e4
LT
353 struct file *file = get_empty_filp();
354
39d8c1b6
DM
355 *filep = file;
356 if (unlikely(!file)) {
1da177e4 357 put_unused_fd(fd);
39d8c1b6 358 return -ENFILE;
1da177e4 359 }
39d8c1b6
DM
360 } else
361 *filep = NULL;
362 return fd;
363}
1da177e4 364
39d8c1b6
DM
365static int sock_attach_fd(struct socket *sock, struct file *file)
366{
c23fbb6b 367 struct qstr name = { .name = "" };
39d8c1b6 368
c23fbb6b 369 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
3126a42c 370 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
371 return -ENOMEM;
372
3126a42c 373 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
374 /*
375 * We dont want to push this dentry into global dentry hash table.
376 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
377 * This permits a working /proc/$pid/fd/XXX on sockets
378 */
3126a42c
JS
379 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
380 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
381 file->f_path.mnt = mntget(sock_mnt);
382 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
383
384 sock->file = file;
385 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
386 file->f_mode = FMODE_READ | FMODE_WRITE;
387 file->f_flags = O_RDWR;
388 file->f_pos = 0;
389 file->private_data = sock;
1da177e4 390
39d8c1b6
DM
391 return 0;
392}
393
394int sock_map_fd(struct socket *sock)
395{
396 struct file *newfile;
397 int fd = sock_alloc_fd(&newfile);
398
399 if (likely(fd >= 0)) {
400 int err = sock_attach_fd(sock, newfile);
401
402 if (unlikely(err < 0)) {
403 put_filp(newfile);
1da177e4 404 put_unused_fd(fd);
39d8c1b6 405 return err;
1da177e4 406 }
39d8c1b6 407 fd_install(fd, newfile);
1da177e4 408 }
1da177e4
LT
409 return fd;
410}
411
6cb153ca
BL
412static struct socket *sock_from_file(struct file *file, int *err)
413{
6cb153ca
BL
414 if (file->f_op == &socket_file_ops)
415 return file->private_data; /* set in sock_map_fd */
416
23bb80d2
ED
417 *err = -ENOTSOCK;
418 return NULL;
6cb153ca
BL
419}
420
1da177e4
LT
421/**
422 * sockfd_lookup - Go from a file number to its socket slot
423 * @fd: file handle
424 * @err: pointer to an error code return
425 *
426 * The file handle passed in is locked and the socket it is bound
427 * too is returned. If an error occurs the err pointer is overwritten
428 * with a negative errno code and NULL is returned. The function checks
429 * for both invalid handles and passing a handle which is not a socket.
430 *
431 * On a success the socket object pointer is returned.
432 */
433
434struct socket *sockfd_lookup(int fd, int *err)
435{
436 struct file *file;
1da177e4
LT
437 struct socket *sock;
438
89bddce5
SH
439 file = fget(fd);
440 if (!file) {
1da177e4
LT
441 *err = -EBADF;
442 return NULL;
443 }
89bddce5 444
6cb153ca
BL
445 sock = sock_from_file(file, err);
446 if (!sock)
1da177e4 447 fput(file);
6cb153ca
BL
448 return sock;
449}
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
453 struct file *file;
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
6cb153ca
BL
457 file = fget_light(fd, fput_needed);
458 if (file) {
459 sock = sock_from_file(file, err);
460 if (sock)
461 return sock;
462 fput_light(file, *fput_needed);
1da177e4 463 }
6cb153ca 464 return NULL;
1da177e4
LT
465}
466
467/**
468 * sock_alloc - allocate a socket
89bddce5 469 *
1da177e4
LT
470 * Allocate a new inode and socket object. The two are bound together
471 * and initialised. The socket is then returned. If we are out of inodes
472 * NULL is returned.
473 */
474
475static struct socket *sock_alloc(void)
476{
89bddce5
SH
477 struct inode *inode;
478 struct socket *sock;
1da177e4
LT
479
480 inode = new_inode(sock_mnt->mnt_sb);
481 if (!inode)
482 return NULL;
483
484 sock = SOCKET_I(inode);
485
89bddce5 486 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
487 inode->i_uid = current->fsuid;
488 inode->i_gid = current->fsgid;
489
490 get_cpu_var(sockets_in_use)++;
491 put_cpu_var(sockets_in_use);
492 return sock;
493}
494
495/*
496 * In theory you can't get an open on this inode, but /proc provides
497 * a back door. Remember to keep it shut otherwise you'll let the
498 * creepy crawlies in.
499 */
89bddce5 500
1da177e4
LT
501static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
502{
503 return -ENXIO;
504}
505
4b6f5d20 506const struct file_operations bad_sock_fops = {
1da177e4
LT
507 .owner = THIS_MODULE,
508 .open = sock_no_open,
509};
510
511/**
512 * sock_release - close a socket
513 * @sock: socket to close
514 *
515 * The socket is released from the protocol stack if it has a release
516 * callback, and the inode is then released if the socket is bound to
89bddce5 517 * an inode not a file.
1da177e4 518 */
89bddce5 519
1da177e4
LT
520void sock_release(struct socket *sock)
521{
522 if (sock->ops) {
523 struct module *owner = sock->ops->owner;
524
525 sock->ops->release(sock);
526 sock->ops = NULL;
527 module_put(owner);
528 }
529
530 if (sock->fasync_list)
531 printk(KERN_ERR "sock_release: fasync list not empty!\n");
532
533 get_cpu_var(sockets_in_use)--;
534 put_cpu_var(sockets_in_use);
535 if (!sock->file) {
536 iput(SOCK_INODE(sock));
537 return;
538 }
89bddce5 539 sock->file = NULL;
1da177e4
LT
540}
541
89bddce5 542static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
543 struct msghdr *msg, size_t size)
544{
545 struct sock_iocb *si = kiocb_to_siocb(iocb);
546 int err;
547
548 si->sock = sock;
549 si->scm = NULL;
550 si->msg = msg;
551 si->size = size;
552
553 err = security_socket_sendmsg(sock, msg, size);
554 if (err)
555 return err;
556
557 return sock->ops->sendmsg(iocb, sock, msg, size);
558}
559
560int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
561{
562 struct kiocb iocb;
563 struct sock_iocb siocb;
564 int ret;
565
566 init_sync_kiocb(&iocb, NULL);
567 iocb.private = &siocb;
568 ret = __sock_sendmsg(&iocb, sock, msg, size);
569 if (-EIOCBQUEUED == ret)
570 ret = wait_on_sync_kiocb(&iocb);
571 return ret;
572}
573
574int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
575 struct kvec *vec, size_t num, size_t size)
576{
577 mm_segment_t oldfs = get_fs();
578 int result;
579
580 set_fs(KERNEL_DS);
581 /*
582 * the following is safe, since for compiler definitions of kvec and
583 * iovec are identical, yielding the same in-core layout and alignment
584 */
89bddce5 585 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
586 msg->msg_iovlen = num;
587 result = sock_sendmsg(sock, msg, size);
588 set_fs(oldfs);
589 return result;
590}
591
92f37fd2
ED
592/*
593 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
594 */
595void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
596 struct sk_buff *skb)
597{
598 ktime_t kt = skb->tstamp;
599
600 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
601 struct timeval tv;
602 /* Race occurred between timestamp enabling and packet
603 receiving. Fill in the current time for now. */
604 if (kt.tv64 == 0)
605 kt = ktime_get_real();
606 skb->tstamp = kt;
607 tv = ktime_to_timeval(kt);
608 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
609 } else {
610 struct timespec ts;
611 /* Race occurred between timestamp enabling and packet
612 receiving. Fill in the current time for now. */
613 if (kt.tv64 == 0)
614 kt = ktime_get_real();
615 skb->tstamp = kt;
616 ts = ktime_to_timespec(kt);
617 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
618 }
619}
620
7c81fd8b
ACM
621EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
622
89bddce5 623static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
624 struct msghdr *msg, size_t size, int flags)
625{
626 int err;
627 struct sock_iocb *si = kiocb_to_siocb(iocb);
628
629 si->sock = sock;
630 si->scm = NULL;
631 si->msg = msg;
632 si->size = size;
633 si->flags = flags;
634
635 err = security_socket_recvmsg(sock, msg, size, flags);
636 if (err)
637 return err;
638
639 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
640}
641
89bddce5 642int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
643 size_t size, int flags)
644{
645 struct kiocb iocb;
646 struct sock_iocb siocb;
647 int ret;
648
89bddce5 649 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
650 iocb.private = &siocb;
651 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
652 if (-EIOCBQUEUED == ret)
653 ret = wait_on_sync_kiocb(&iocb);
654 return ret;
655}
656
89bddce5
SH
657int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
658 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
659{
660 mm_segment_t oldfs = get_fs();
661 int result;
662
663 set_fs(KERNEL_DS);
664 /*
665 * the following is safe, since for compiler definitions of kvec and
666 * iovec are identical, yielding the same in-core layout and alignment
667 */
89bddce5 668 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
669 result = sock_recvmsg(sock, msg, size, flags);
670 set_fs(oldfs);
671 return result;
672}
673
674static void sock_aio_dtor(struct kiocb *iocb)
675{
676 kfree(iocb->private);
677}
678
ce1d4d3e
CH
679static ssize_t sock_sendpage(struct file *file, struct page *page,
680 int offset, size_t size, loff_t *ppos, int more)
1da177e4 681{
1da177e4
LT
682 struct socket *sock;
683 int flags;
684
ce1d4d3e
CH
685 sock = file->private_data;
686
687 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
688 if (more)
689 flags |= MSG_MORE;
690
691 return sock->ops->sendpage(sock, page, offset, size, flags);
692}
1da177e4 693
ce1d4d3e 694static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 695 struct sock_iocb *siocb)
ce1d4d3e
CH
696{
697 if (!is_sync_kiocb(iocb)) {
698 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
699 if (!siocb)
700 return NULL;
1da177e4
LT
701 iocb->ki_dtor = sock_aio_dtor;
702 }
1da177e4 703
ce1d4d3e 704 siocb->kiocb = iocb;
ce1d4d3e
CH
705 iocb->private = siocb;
706 return siocb;
1da177e4
LT
707}
708
ce1d4d3e 709static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
710 struct file *file, const struct iovec *iov,
711 unsigned long nr_segs)
ce1d4d3e
CH
712{
713 struct socket *sock = file->private_data;
714 size_t size = 0;
715 int i;
1da177e4 716
89bddce5
SH
717 for (i = 0; i < nr_segs; i++)
718 size += iov[i].iov_len;
1da177e4 719
ce1d4d3e
CH
720 msg->msg_name = NULL;
721 msg->msg_namelen = 0;
722 msg->msg_control = NULL;
723 msg->msg_controllen = 0;
89bddce5 724 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
725 msg->msg_iovlen = nr_segs;
726 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
727
728 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
729}
730
027445c3
BP
731static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
732 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
733{
734 struct sock_iocb siocb, *x;
735
1da177e4
LT
736 if (pos != 0)
737 return -ESPIPE;
027445c3
BP
738
739 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
740 return 0;
741
027445c3
BP
742
743 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
744 if (!x)
745 return -ENOMEM;
027445c3 746 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
747}
748
ce1d4d3e 749static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
750 struct file *file, const struct iovec *iov,
751 unsigned long nr_segs)
1da177e4 752{
ce1d4d3e
CH
753 struct socket *sock = file->private_data;
754 size_t size = 0;
755 int i;
1da177e4 756
89bddce5
SH
757 for (i = 0; i < nr_segs; i++)
758 size += iov[i].iov_len;
1da177e4 759
ce1d4d3e
CH
760 msg->msg_name = NULL;
761 msg->msg_namelen = 0;
762 msg->msg_control = NULL;
763 msg->msg_controllen = 0;
89bddce5 764 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
765 msg->msg_iovlen = nr_segs;
766 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
767 if (sock->type == SOCK_SEQPACKET)
768 msg->msg_flags |= MSG_EOR;
1da177e4 769
ce1d4d3e 770 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
771}
772
027445c3
BP
773static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
774 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
775{
776 struct sock_iocb siocb, *x;
1da177e4 777
ce1d4d3e
CH
778 if (pos != 0)
779 return -ESPIPE;
027445c3 780
027445c3 781 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
782 if (!x)
783 return -ENOMEM;
1da177e4 784
027445c3 785 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
786}
787
1da177e4
LT
788/*
789 * Atomic setting of ioctl hooks to avoid race
790 * with module unload.
791 */
792
4a3e2f71 793static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 794static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 795
89bddce5 796void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 797{
4a3e2f71 798 mutex_lock(&br_ioctl_mutex);
1da177e4 799 br_ioctl_hook = hook;
4a3e2f71 800 mutex_unlock(&br_ioctl_mutex);
1da177e4 801}
89bddce5 802
1da177e4
LT
803EXPORT_SYMBOL(brioctl_set);
804
4a3e2f71 805static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 806static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 807
89bddce5 808void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 809{
4a3e2f71 810 mutex_lock(&vlan_ioctl_mutex);
1da177e4 811 vlan_ioctl_hook = hook;
4a3e2f71 812 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 813}
89bddce5 814
1da177e4
LT
815EXPORT_SYMBOL(vlan_ioctl_set);
816
4a3e2f71 817static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 818static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 819
89bddce5 820void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 821{
4a3e2f71 822 mutex_lock(&dlci_ioctl_mutex);
1da177e4 823 dlci_ioctl_hook = hook;
4a3e2f71 824 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 825}
89bddce5 826
1da177e4
LT
827EXPORT_SYMBOL(dlci_ioctl_set);
828
829/*
830 * With an ioctl, arg may well be a user mode pointer, but we don't know
831 * what to do with it - that's up to the protocol still.
832 */
833
834static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
835{
836 struct socket *sock;
837 void __user *argp = (void __user *)arg;
838 int pid, err;
839
b69aee04 840 sock = file->private_data;
1da177e4
LT
841 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
842 err = dev_ioctl(cmd, argp);
843 } else
d86b5e0e 844#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
845 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
846 err = dev_ioctl(cmd, argp);
847 } else
89bddce5
SH
848#endif /* CONFIG_WIRELESS_EXT */
849 switch (cmd) {
1da177e4
LT
850 case FIOSETOWN:
851 case SIOCSPGRP:
852 err = -EFAULT;
853 if (get_user(pid, (int __user *)argp))
854 break;
855 err = f_setown(sock->file, pid, 1);
856 break;
857 case FIOGETOWN:
858 case SIOCGPGRP:
609d7fa9 859 err = put_user(f_getown(sock->file),
89bddce5 860 (int __user *)argp);
1da177e4
LT
861 break;
862 case SIOCGIFBR:
863 case SIOCSIFBR:
864 case SIOCBRADDBR:
865 case SIOCBRDELBR:
866 err = -ENOPKG;
867 if (!br_ioctl_hook)
868 request_module("bridge");
869
4a3e2f71 870 mutex_lock(&br_ioctl_mutex);
89bddce5 871 if (br_ioctl_hook)
1da177e4 872 err = br_ioctl_hook(cmd, argp);
4a3e2f71 873 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
874 break;
875 case SIOCGIFVLAN:
876 case SIOCSIFVLAN:
877 err = -ENOPKG;
878 if (!vlan_ioctl_hook)
879 request_module("8021q");
880
4a3e2f71 881 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
882 if (vlan_ioctl_hook)
883 err = vlan_ioctl_hook(argp);
4a3e2f71 884 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 885 break;
1da177e4
LT
886 case SIOCADDDLCI:
887 case SIOCDELDLCI:
888 err = -ENOPKG;
889 if (!dlci_ioctl_hook)
890 request_module("dlci");
891
892 if (dlci_ioctl_hook) {
4a3e2f71 893 mutex_lock(&dlci_ioctl_mutex);
1da177e4 894 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 895 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
896 }
897 break;
898 default:
899 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
900
901 /*
902 * If this ioctl is unknown try to hand it down
903 * to the NIC driver.
904 */
905 if (err == -ENOIOCTLCMD)
906 err = dev_ioctl(cmd, argp);
1da177e4 907 break;
89bddce5 908 }
1da177e4
LT
909 return err;
910}
911
912int sock_create_lite(int family, int type, int protocol, struct socket **res)
913{
914 int err;
915 struct socket *sock = NULL;
89bddce5 916
1da177e4
LT
917 err = security_socket_create(family, type, protocol, 1);
918 if (err)
919 goto out;
920
921 sock = sock_alloc();
922 if (!sock) {
923 err = -ENOMEM;
924 goto out;
925 }
926
1da177e4 927 sock->type = type;
7420ed23
VY
928 err = security_socket_post_create(sock, family, type, protocol, 1);
929 if (err)
930 goto out_release;
931
1da177e4
LT
932out:
933 *res = sock;
934 return err;
7420ed23
VY
935out_release:
936 sock_release(sock);
937 sock = NULL;
938 goto out;
1da177e4
LT
939}
940
941/* No kernel lock held - perfect */
89bddce5 942static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
943{
944 struct socket *sock;
945
946 /*
89bddce5 947 * We can't return errors to poll, so it's either yes or no.
1da177e4 948 */
b69aee04 949 sock = file->private_data;
1da177e4
LT
950 return sock->ops->poll(file, sock, wait);
951}
952
89bddce5 953static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 954{
b69aee04 955 struct socket *sock = file->private_data;
1da177e4
LT
956
957 return sock->ops->mmap(file, sock, vma);
958}
959
20380731 960static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
961{
962 /*
89bddce5
SH
963 * It was possible the inode is NULL we were
964 * closing an unfinished socket.
1da177e4
LT
965 */
966
89bddce5 967 if (!inode) {
1da177e4
LT
968 printk(KERN_DEBUG "sock_close: NULL inode\n");
969 return 0;
970 }
971 sock_fasync(-1, filp, 0);
972 sock_release(SOCKET_I(inode));
973 return 0;
974}
975
976/*
977 * Update the socket async list
978 *
979 * Fasync_list locking strategy.
980 *
981 * 1. fasync_list is modified only under process context socket lock
982 * i.e. under semaphore.
983 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
984 * or under socket lock.
985 * 3. fasync_list can be used from softirq context, so that
986 * modification under socket lock have to be enhanced with
987 * write_lock_bh(&sk->sk_callback_lock).
988 * --ANK (990710)
989 */
990
991static int sock_fasync(int fd, struct file *filp, int on)
992{
89bddce5 993 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
994 struct socket *sock;
995 struct sock *sk;
996
89bddce5 997 if (on) {
8b3a7005 998 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 999 if (fna == NULL)
1da177e4
LT
1000 return -ENOMEM;
1001 }
1002
b69aee04 1003 sock = filp->private_data;
1da177e4 1004
89bddce5
SH
1005 sk = sock->sk;
1006 if (sk == NULL) {
1da177e4
LT
1007 kfree(fna);
1008 return -EINVAL;
1009 }
1010
1011 lock_sock(sk);
1012
89bddce5 1013 prev = &(sock->fasync_list);
1da177e4 1014
89bddce5
SH
1015 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1016 if (fa->fa_file == filp)
1da177e4
LT
1017 break;
1018
89bddce5
SH
1019 if (on) {
1020 if (fa != NULL) {
1da177e4 1021 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1022 fa->fa_fd = fd;
1da177e4
LT
1023 write_unlock_bh(&sk->sk_callback_lock);
1024
1025 kfree(fna);
1026 goto out;
1027 }
89bddce5
SH
1028 fna->fa_file = filp;
1029 fna->fa_fd = fd;
1030 fna->magic = FASYNC_MAGIC;
1031 fna->fa_next = sock->fasync_list;
1da177e4 1032 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1033 sock->fasync_list = fna;
1da177e4 1034 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1035 } else {
1036 if (fa != NULL) {
1da177e4 1037 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1038 *prev = fa->fa_next;
1da177e4
LT
1039 write_unlock_bh(&sk->sk_callback_lock);
1040 kfree(fa);
1041 }
1042 }
1043
1044out:
1045 release_sock(sock->sk);
1046 return 0;
1047}
1048
1049/* This function may be called only under socket lock or callback_lock */
1050
1051int sock_wake_async(struct socket *sock, int how, int band)
1052{
1053 if (!sock || !sock->fasync_list)
1054 return -1;
89bddce5 1055 switch (how) {
1da177e4 1056 case 1:
89bddce5 1057
1da177e4
LT
1058 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1059 break;
1060 goto call_kill;
1061 case 2:
1062 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1063 break;
1064 /* fall through */
1065 case 0:
89bddce5 1066call_kill:
1da177e4
LT
1067 __kill_fasync(sock->fasync_list, SIGIO, band);
1068 break;
1069 case 3:
1070 __kill_fasync(sock->fasync_list, SIGURG, band);
1071 }
1072 return 0;
1073}
1074
1b8d7ae4 1075static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1076 struct socket **res, int kern)
1da177e4
LT
1077{
1078 int err;
1079 struct socket *sock;
55737fda 1080 const struct net_proto_family *pf;
1da177e4
LT
1081
1082 /*
89bddce5 1083 * Check protocol is in range
1da177e4
LT
1084 */
1085 if (family < 0 || family >= NPROTO)
1086 return -EAFNOSUPPORT;
1087 if (type < 0 || type >= SOCK_MAX)
1088 return -EINVAL;
1089
1090 /* Compatibility.
1091
1092 This uglymoron is moved from INET layer to here to avoid
1093 deadlock in module load.
1094 */
1095 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1096 static int warned;
1da177e4
LT
1097 if (!warned) {
1098 warned = 1;
89bddce5
SH
1099 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1100 current->comm);
1da177e4
LT
1101 }
1102 family = PF_PACKET;
1103 }
1104
1105 err = security_socket_create(family, type, protocol, kern);
1106 if (err)
1107 return err;
89bddce5 1108
55737fda
SH
1109 /*
1110 * Allocate the socket and allow the family to set things up. if
1111 * the protocol is 0, the family is instructed to select an appropriate
1112 * default.
1113 */
1114 sock = sock_alloc();
1115 if (!sock) {
1116 if (net_ratelimit())
1117 printk(KERN_WARNING "socket: no more sockets\n");
1118 return -ENFILE; /* Not exactly a match, but its the
1119 closest posix thing */
1120 }
1121
1122 sock->type = type;
1123
1da177e4 1124#if defined(CONFIG_KMOD)
89bddce5
SH
1125 /* Attempt to load a protocol module if the find failed.
1126 *
1127 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1128 * requested real, full-featured networking support upon configuration.
1129 * Otherwise module support will break!
1130 */
55737fda 1131 if (net_families[family] == NULL)
89bddce5 1132 request_module("net-pf-%d", family);
1da177e4
LT
1133#endif
1134
55737fda
SH
1135 rcu_read_lock();
1136 pf = rcu_dereference(net_families[family]);
1137 err = -EAFNOSUPPORT;
1138 if (!pf)
1139 goto out_release;
1da177e4
LT
1140
1141 /*
1142 * We will call the ->create function, that possibly is in a loadable
1143 * module, so we have to bump that loadable module refcnt first.
1144 */
55737fda 1145 if (!try_module_get(pf->owner))
1da177e4
LT
1146 goto out_release;
1147
55737fda
SH
1148 /* Now protected by module ref count */
1149 rcu_read_unlock();
1150
1b8d7ae4 1151 err = pf->create(net, sock, protocol);
55737fda 1152 if (err < 0)
1da177e4 1153 goto out_module_put;
a79af59e 1154
1da177e4
LT
1155 /*
1156 * Now to bump the refcnt of the [loadable] module that owns this
1157 * socket at sock_release time we decrement its refcnt.
1158 */
55737fda
SH
1159 if (!try_module_get(sock->ops->owner))
1160 goto out_module_busy;
1161
1da177e4
LT
1162 /*
1163 * Now that we're done with the ->create function, the [loadable]
1164 * module can have its refcnt decremented
1165 */
55737fda 1166 module_put(pf->owner);
7420ed23
VY
1167 err = security_socket_post_create(sock, family, type, protocol, kern);
1168 if (err)
3b185525 1169 goto out_sock_release;
55737fda 1170 *res = sock;
1da177e4 1171
55737fda
SH
1172 return 0;
1173
1174out_module_busy:
1175 err = -EAFNOSUPPORT;
1da177e4 1176out_module_put:
55737fda
SH
1177 sock->ops = NULL;
1178 module_put(pf->owner);
1179out_sock_release:
1da177e4 1180 sock_release(sock);
55737fda
SH
1181 return err;
1182
1183out_release:
1184 rcu_read_unlock();
1185 goto out_sock_release;
1da177e4
LT
1186}
1187
1188int sock_create(int family, int type, int protocol, struct socket **res)
1189{
1b8d7ae4 1190 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1191}
1192
1193int sock_create_kern(int family, int type, int protocol, struct socket **res)
1194{
1b8d7ae4 1195 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1196}
1197
1198asmlinkage long sys_socket(int family, int type, int protocol)
1199{
1200 int retval;
1201 struct socket *sock;
1202
1203 retval = sock_create(family, type, protocol, &sock);
1204 if (retval < 0)
1205 goto out;
1206
1207 retval = sock_map_fd(sock);
1208 if (retval < 0)
1209 goto out_release;
1210
1211out:
1212 /* It may be already another descriptor 8) Not kernel problem. */
1213 return retval;
1214
1215out_release:
1216 sock_release(sock);
1217 return retval;
1218}
1219
1220/*
1221 * Create a pair of connected sockets.
1222 */
1223
89bddce5
SH
1224asmlinkage long sys_socketpair(int family, int type, int protocol,
1225 int __user *usockvec)
1da177e4
LT
1226{
1227 struct socket *sock1, *sock2;
1228 int fd1, fd2, err;
db349509 1229 struct file *newfile1, *newfile2;
1da177e4
LT
1230
1231 /*
1232 * Obtain the first socket and check if the underlying protocol
1233 * supports the socketpair call.
1234 */
1235
1236 err = sock_create(family, type, protocol, &sock1);
1237 if (err < 0)
1238 goto out;
1239
1240 err = sock_create(family, type, protocol, &sock2);
1241 if (err < 0)
1242 goto out_release_1;
1243
1244 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1245 if (err < 0)
1da177e4
LT
1246 goto out_release_both;
1247
db349509
AV
1248 fd1 = sock_alloc_fd(&newfile1);
1249 if (unlikely(fd1 < 0))
1250 goto out_release_both;
1da177e4 1251
db349509
AV
1252 fd2 = sock_alloc_fd(&newfile2);
1253 if (unlikely(fd2 < 0)) {
1254 put_filp(newfile1);
1255 put_unused_fd(fd1);
1da177e4 1256 goto out_release_both;
db349509 1257 }
1da177e4 1258
db349509
AV
1259 err = sock_attach_fd(sock1, newfile1);
1260 if (unlikely(err < 0)) {
1261 goto out_fd2;
1262 }
1263
1264 err = sock_attach_fd(sock2, newfile2);
1265 if (unlikely(err < 0)) {
1266 fput(newfile1);
1267 goto out_fd1;
1268 }
1269
1270 err = audit_fd_pair(fd1, fd2);
1271 if (err < 0) {
1272 fput(newfile1);
1273 fput(newfile2);
1274 goto out_fd;
1275 }
1da177e4 1276
db349509
AV
1277 fd_install(fd1, newfile1);
1278 fd_install(fd2, newfile2);
1da177e4
LT
1279 /* fd1 and fd2 may be already another descriptors.
1280 * Not kernel problem.
1281 */
1282
89bddce5 1283 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1284 if (!err)
1285 err = put_user(fd2, &usockvec[1]);
1286 if (!err)
1287 return 0;
1288
1289 sys_close(fd2);
1290 sys_close(fd1);
1291 return err;
1292
1da177e4 1293out_release_both:
89bddce5 1294 sock_release(sock2);
1da177e4 1295out_release_1:
89bddce5 1296 sock_release(sock1);
1da177e4
LT
1297out:
1298 return err;
db349509
AV
1299
1300out_fd2:
1301 put_filp(newfile1);
1302 sock_release(sock1);
1303out_fd1:
1304 put_filp(newfile2);
1305 sock_release(sock2);
1306out_fd:
1307 put_unused_fd(fd1);
1308 put_unused_fd(fd2);
1309 goto out;
1da177e4
LT
1310}
1311
1da177e4
LT
1312/*
1313 * Bind a name to a socket. Nothing much to do here since it's
1314 * the protocol's responsibility to handle the local address.
1315 *
1316 * We move the socket address to kernel space before we call
1317 * the protocol layer (having also checked the address is ok).
1318 */
1319
1320asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1321{
1322 struct socket *sock;
1323 char address[MAX_SOCK_ADDR];
6cb153ca 1324 int err, fput_needed;
1da177e4 1325
89bddce5 1326 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1327 if (sock) {
89bddce5
SH
1328 err = move_addr_to_kernel(umyaddr, addrlen, address);
1329 if (err >= 0) {
1330 err = security_socket_bind(sock,
1331 (struct sockaddr *)address,
1332 addrlen);
6cb153ca
BL
1333 if (!err)
1334 err = sock->ops->bind(sock,
89bddce5
SH
1335 (struct sockaddr *)
1336 address, addrlen);
1da177e4 1337 }
6cb153ca 1338 fput_light(sock->file, fput_needed);
89bddce5 1339 }
1da177e4
LT
1340 return err;
1341}
1342
1da177e4
LT
1343/*
1344 * Perform a listen. Basically, we allow the protocol to do anything
1345 * necessary for a listen, and if that works, we mark the socket as
1346 * ready for listening.
1347 */
1348
7a42c217 1349int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1350
1351asmlinkage long sys_listen(int fd, int backlog)
1352{
1353 struct socket *sock;
6cb153ca 1354 int err, fput_needed;
89bddce5
SH
1355
1356 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1357 if (sock) {
1358 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1359 backlog = sysctl_somaxconn;
1360
1361 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1362 if (!err)
1363 err = sock->ops->listen(sock, backlog);
1da177e4 1364
6cb153ca 1365 fput_light(sock->file, fput_needed);
1da177e4
LT
1366 }
1367 return err;
1368}
1369
1da177e4
LT
1370/*
1371 * For accept, we attempt to create a new socket, set up the link
1372 * with the client, wake up the client, then return the new
1373 * connected fd. We collect the address of the connector in kernel
1374 * space and move it to user at the very end. This is unclean because
1375 * we open the socket then return an error.
1376 *
1377 * 1003.1g adds the ability to recvmsg() to query connection pending
1378 * status to recvmsg. We need to add that support in a way thats
1379 * clean when we restucture accept also.
1380 */
1381
89bddce5
SH
1382asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1383 int __user *upeer_addrlen)
1da177e4
LT
1384{
1385 struct socket *sock, *newsock;
39d8c1b6 1386 struct file *newfile;
6cb153ca 1387 int err, len, newfd, fput_needed;
1da177e4
LT
1388 char address[MAX_SOCK_ADDR];
1389
6cb153ca 1390 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1391 if (!sock)
1392 goto out;
1393
1394 err = -ENFILE;
89bddce5 1395 if (!(newsock = sock_alloc()))
1da177e4
LT
1396 goto out_put;
1397
1398 newsock->type = sock->type;
1399 newsock->ops = sock->ops;
1400
1da177e4
LT
1401 /*
1402 * We don't need try_module_get here, as the listening socket (sock)
1403 * has the protocol module (sock->ops->owner) held.
1404 */
1405 __module_get(newsock->ops->owner);
1406
39d8c1b6
DM
1407 newfd = sock_alloc_fd(&newfile);
1408 if (unlikely(newfd < 0)) {
1409 err = newfd;
9a1875e6
DM
1410 sock_release(newsock);
1411 goto out_put;
39d8c1b6
DM
1412 }
1413
1414 err = sock_attach_fd(newsock, newfile);
1415 if (err < 0)
79f4f642 1416 goto out_fd_simple;
39d8c1b6 1417
a79af59e
FF
1418 err = security_socket_accept(sock, newsock);
1419 if (err)
39d8c1b6 1420 goto out_fd;
a79af59e 1421
1da177e4
LT
1422 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1423 if (err < 0)
39d8c1b6 1424 goto out_fd;
1da177e4
LT
1425
1426 if (upeer_sockaddr) {
89bddce5
SH
1427 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1428 &len, 2) < 0) {
1da177e4 1429 err = -ECONNABORTED;
39d8c1b6 1430 goto out_fd;
1da177e4 1431 }
89bddce5
SH
1432 err = move_addr_to_user(address, len, upeer_sockaddr,
1433 upeer_addrlen);
1da177e4 1434 if (err < 0)
39d8c1b6 1435 goto out_fd;
1da177e4
LT
1436 }
1437
1438 /* File flags are not inherited via accept() unlike another OSes. */
1439
39d8c1b6
DM
1440 fd_install(newfd, newfile);
1441 err = newfd;
1da177e4
LT
1442
1443 security_socket_post_accept(sock, newsock);
1444
1445out_put:
6cb153ca 1446 fput_light(sock->file, fput_needed);
1da177e4
LT
1447out:
1448 return err;
79f4f642
AD
1449out_fd_simple:
1450 sock_release(newsock);
1451 put_filp(newfile);
1452 put_unused_fd(newfd);
1453 goto out_put;
39d8c1b6 1454out_fd:
9606a216 1455 fput(newfile);
39d8c1b6 1456 put_unused_fd(newfd);
1da177e4
LT
1457 goto out_put;
1458}
1459
1da177e4
LT
1460/*
1461 * Attempt to connect to a socket with the server address. The address
1462 * is in user space so we verify it is OK and move it to kernel space.
1463 *
1464 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1465 * break bindings
1466 *
1467 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1468 * other SEQPACKET protocols that take time to connect() as it doesn't
1469 * include the -EINPROGRESS status for such sockets.
1470 */
1471
89bddce5
SH
1472asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1473 int addrlen)
1da177e4
LT
1474{
1475 struct socket *sock;
1476 char address[MAX_SOCK_ADDR];
6cb153ca 1477 int err, fput_needed;
1da177e4 1478
6cb153ca 1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1480 if (!sock)
1481 goto out;
1482 err = move_addr_to_kernel(uservaddr, addrlen, address);
1483 if (err < 0)
1484 goto out_put;
1485
89bddce5
SH
1486 err =
1487 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1488 if (err)
1489 goto out_put;
1490
89bddce5 1491 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1492 sock->file->f_flags);
1493out_put:
6cb153ca 1494 fput_light(sock->file, fput_needed);
1da177e4
LT
1495out:
1496 return err;
1497}
1498
1499/*
1500 * Get the local address ('name') of a socket object. Move the obtained
1501 * name to user space.
1502 */
1503
89bddce5
SH
1504asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1505 int __user *usockaddr_len)
1da177e4
LT
1506{
1507 struct socket *sock;
1508 char address[MAX_SOCK_ADDR];
6cb153ca 1509 int len, err, fput_needed;
89bddce5 1510
6cb153ca 1511 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1512 if (!sock)
1513 goto out;
1514
1515 err = security_socket_getsockname(sock);
1516 if (err)
1517 goto out_put;
1518
1519 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1520 if (err)
1521 goto out_put;
1522 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1523
1524out_put:
6cb153ca 1525 fput_light(sock->file, fput_needed);
1da177e4
LT
1526out:
1527 return err;
1528}
1529
1530/*
1531 * Get the remote address ('name') of a socket object. Move the obtained
1532 * name to user space.
1533 */
1534
89bddce5
SH
1535asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1536 int __user *usockaddr_len)
1da177e4
LT
1537{
1538 struct socket *sock;
1539 char address[MAX_SOCK_ADDR];
6cb153ca 1540 int len, err, fput_needed;
1da177e4 1541
89bddce5
SH
1542 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1543 if (sock != NULL) {
1da177e4
LT
1544 err = security_socket_getpeername(sock);
1545 if (err) {
6cb153ca 1546 fput_light(sock->file, fput_needed);
1da177e4
LT
1547 return err;
1548 }
1549
89bddce5
SH
1550 err =
1551 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1552 1);
1da177e4 1553 if (!err)
89bddce5
SH
1554 err = move_addr_to_user(address, len, usockaddr,
1555 usockaddr_len);
6cb153ca 1556 fput_light(sock->file, fput_needed);
1da177e4
LT
1557 }
1558 return err;
1559}
1560
1561/*
1562 * Send a datagram to a given address. We move the address into kernel
1563 * space and check the user space data area is readable before invoking
1564 * the protocol.
1565 */
1566
89bddce5
SH
1567asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1568 unsigned flags, struct sockaddr __user *addr,
1569 int addr_len)
1da177e4
LT
1570{
1571 struct socket *sock;
1572 char address[MAX_SOCK_ADDR];
1573 int err;
1574 struct msghdr msg;
1575 struct iovec iov;
6cb153ca
BL
1576 int fput_needed;
1577 struct file *sock_file;
1578
1579 sock_file = fget_light(fd, &fput_needed);
4387ff75 1580 err = -EBADF;
6cb153ca 1581 if (!sock_file)
4387ff75 1582 goto out;
6cb153ca
BL
1583
1584 sock = sock_from_file(sock_file, &err);
1da177e4 1585 if (!sock)
6cb153ca 1586 goto out_put;
89bddce5
SH
1587 iov.iov_base = buff;
1588 iov.iov_len = len;
1589 msg.msg_name = NULL;
1590 msg.msg_iov = &iov;
1591 msg.msg_iovlen = 1;
1592 msg.msg_control = NULL;
1593 msg.msg_controllen = 0;
1594 msg.msg_namelen = 0;
6cb153ca 1595 if (addr) {
1da177e4
LT
1596 err = move_addr_to_kernel(addr, addr_len, address);
1597 if (err < 0)
1598 goto out_put;
89bddce5
SH
1599 msg.msg_name = address;
1600 msg.msg_namelen = addr_len;
1da177e4
LT
1601 }
1602 if (sock->file->f_flags & O_NONBLOCK)
1603 flags |= MSG_DONTWAIT;
1604 msg.msg_flags = flags;
1605 err = sock_sendmsg(sock, &msg, len);
1606
89bddce5 1607out_put:
6cb153ca 1608 fput_light(sock_file, fput_needed);
4387ff75 1609out:
1da177e4
LT
1610 return err;
1611}
1612
1613/*
89bddce5 1614 * Send a datagram down a socket.
1da177e4
LT
1615 */
1616
89bddce5 1617asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1618{
1619 return sys_sendto(fd, buff, len, flags, NULL, 0);
1620}
1621
1622/*
89bddce5 1623 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1624 * sender. We verify the buffers are writable and if needed move the
1625 * sender address from kernel to user space.
1626 */
1627
89bddce5
SH
1628asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1629 unsigned flags, struct sockaddr __user *addr,
1630 int __user *addr_len)
1da177e4
LT
1631{
1632 struct socket *sock;
1633 struct iovec iov;
1634 struct msghdr msg;
1635 char address[MAX_SOCK_ADDR];
89bddce5 1636 int err, err2;
6cb153ca
BL
1637 struct file *sock_file;
1638 int fput_needed;
1639
1640 sock_file = fget_light(fd, &fput_needed);
4387ff75 1641 err = -EBADF;
6cb153ca 1642 if (!sock_file)
4387ff75 1643 goto out;
1da177e4 1644
6cb153ca 1645 sock = sock_from_file(sock_file, &err);
1da177e4 1646 if (!sock)
4387ff75 1647 goto out_put;
1da177e4 1648
89bddce5
SH
1649 msg.msg_control = NULL;
1650 msg.msg_controllen = 0;
1651 msg.msg_iovlen = 1;
1652 msg.msg_iov = &iov;
1653 iov.iov_len = size;
1654 iov.iov_base = ubuf;
1655 msg.msg_name = address;
1656 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1657 if (sock->file->f_flags & O_NONBLOCK)
1658 flags |= MSG_DONTWAIT;
89bddce5 1659 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1660
89bddce5
SH
1661 if (err >= 0 && addr != NULL) {
1662 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1663 if (err2 < 0)
1664 err = err2;
1da177e4 1665 }
4387ff75 1666out_put:
6cb153ca 1667 fput_light(sock_file, fput_needed);
4387ff75 1668out:
1da177e4
LT
1669 return err;
1670}
1671
1672/*
89bddce5 1673 * Receive a datagram from a socket.
1da177e4
LT
1674 */
1675
89bddce5
SH
1676asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1677 unsigned flags)
1da177e4
LT
1678{
1679 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1680}
1681
1682/*
1683 * Set a socket option. Because we don't know the option lengths we have
1684 * to pass the user mode parameter for the protocols to sort out.
1685 */
1686
89bddce5
SH
1687asmlinkage long sys_setsockopt(int fd, int level, int optname,
1688 char __user *optval, int optlen)
1da177e4 1689{
6cb153ca 1690 int err, fput_needed;
1da177e4
LT
1691 struct socket *sock;
1692
1693 if (optlen < 0)
1694 return -EINVAL;
89bddce5
SH
1695
1696 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1697 if (sock != NULL) {
1698 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1699 if (err)
1700 goto out_put;
1da177e4
LT
1701
1702 if (level == SOL_SOCKET)
89bddce5
SH
1703 err =
1704 sock_setsockopt(sock, level, optname, optval,
1705 optlen);
1da177e4 1706 else
89bddce5
SH
1707 err =
1708 sock->ops->setsockopt(sock, level, optname, optval,
1709 optlen);
6cb153ca
BL
1710out_put:
1711 fput_light(sock->file, fput_needed);
1da177e4
LT
1712 }
1713 return err;
1714}
1715
1716/*
1717 * Get a socket option. Because we don't know the option lengths we have
1718 * to pass a user mode parameter for the protocols to sort out.
1719 */
1720
89bddce5
SH
1721asmlinkage long sys_getsockopt(int fd, int level, int optname,
1722 char __user *optval, int __user *optlen)
1da177e4 1723{
6cb153ca 1724 int err, fput_needed;
1da177e4
LT
1725 struct socket *sock;
1726
89bddce5
SH
1727 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1728 if (sock != NULL) {
6cb153ca
BL
1729 err = security_socket_getsockopt(sock, level, optname);
1730 if (err)
1731 goto out_put;
1da177e4
LT
1732
1733 if (level == SOL_SOCKET)
89bddce5
SH
1734 err =
1735 sock_getsockopt(sock, level, optname, optval,
1736 optlen);
1da177e4 1737 else
89bddce5
SH
1738 err =
1739 sock->ops->getsockopt(sock, level, optname, optval,
1740 optlen);
6cb153ca
BL
1741out_put:
1742 fput_light(sock->file, fput_needed);
1da177e4
LT
1743 }
1744 return err;
1745}
1746
1da177e4
LT
1747/*
1748 * Shutdown a socket.
1749 */
1750
1751asmlinkage long sys_shutdown(int fd, int how)
1752{
6cb153ca 1753 int err, fput_needed;
1da177e4
LT
1754 struct socket *sock;
1755
89bddce5
SH
1756 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1757 if (sock != NULL) {
1da177e4 1758 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1759 if (!err)
1760 err = sock->ops->shutdown(sock, how);
1761 fput_light(sock->file, fput_needed);
1da177e4
LT
1762 }
1763 return err;
1764}
1765
89bddce5 1766/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1767 * fields which are the same type (int / unsigned) on our platforms.
1768 */
1769#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1770#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1771#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1772
1da177e4
LT
1773/*
1774 * BSD sendmsg interface
1775 */
1776
1777asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1778{
89bddce5
SH
1779 struct compat_msghdr __user *msg_compat =
1780 (struct compat_msghdr __user *)msg;
1da177e4
LT
1781 struct socket *sock;
1782 char address[MAX_SOCK_ADDR];
1783 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1784 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1785 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1786 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1787 unsigned char *ctl_buf = ctl;
1788 struct msghdr msg_sys;
1789 int err, ctl_len, iov_size, total_len;
6cb153ca 1790 int fput_needed;
89bddce5 1791
1da177e4
LT
1792 err = -EFAULT;
1793 if (MSG_CMSG_COMPAT & flags) {
1794 if (get_compat_msghdr(&msg_sys, msg_compat))
1795 return -EFAULT;
89bddce5
SH
1796 }
1797 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1798 return -EFAULT;
1799
6cb153ca 1800 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1801 if (!sock)
1da177e4
LT
1802 goto out;
1803
1804 /* do not move before msg_sys is valid */
1805 err = -EMSGSIZE;
1806 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1807 goto out_put;
1808
89bddce5 1809 /* Check whether to allocate the iovec area */
1da177e4
LT
1810 err = -ENOMEM;
1811 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1812 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1813 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1814 if (!iov)
1815 goto out_put;
1816 }
1817
1818 /* This will also move the address data into kernel space */
1819 if (MSG_CMSG_COMPAT & flags) {
1820 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1821 } else
1822 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1823 if (err < 0)
1da177e4
LT
1824 goto out_freeiov;
1825 total_len = err;
1826
1827 err = -ENOBUFS;
1828
1829 if (msg_sys.msg_controllen > INT_MAX)
1830 goto out_freeiov;
89bddce5 1831 ctl_len = msg_sys.msg_controllen;
1da177e4 1832 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1833 err =
1834 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1835 sizeof(ctl));
1da177e4
LT
1836 if (err)
1837 goto out_freeiov;
1838 ctl_buf = msg_sys.msg_control;
8920e8f9 1839 ctl_len = msg_sys.msg_controllen;
1da177e4 1840 } else if (ctl_len) {
89bddce5 1841 if (ctl_len > sizeof(ctl)) {
1da177e4 1842 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1843 if (ctl_buf == NULL)
1da177e4
LT
1844 goto out_freeiov;
1845 }
1846 err = -EFAULT;
1847 /*
1848 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1849 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1850 * checking falls down on this.
1851 */
89bddce5
SH
1852 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1853 ctl_len))
1da177e4
LT
1854 goto out_freectl;
1855 msg_sys.msg_control = ctl_buf;
1856 }
1857 msg_sys.msg_flags = flags;
1858
1859 if (sock->file->f_flags & O_NONBLOCK)
1860 msg_sys.msg_flags |= MSG_DONTWAIT;
1861 err = sock_sendmsg(sock, &msg_sys, total_len);
1862
1863out_freectl:
89bddce5 1864 if (ctl_buf != ctl)
1da177e4
LT
1865 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1866out_freeiov:
1867 if (iov != iovstack)
1868 sock_kfree_s(sock->sk, iov, iov_size);
1869out_put:
6cb153ca 1870 fput_light(sock->file, fput_needed);
89bddce5 1871out:
1da177e4
LT
1872 return err;
1873}
1874
1875/*
1876 * BSD recvmsg interface
1877 */
1878
89bddce5
SH
1879asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1880 unsigned int flags)
1da177e4 1881{
89bddce5
SH
1882 struct compat_msghdr __user *msg_compat =
1883 (struct compat_msghdr __user *)msg;
1da177e4
LT
1884 struct socket *sock;
1885 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1886 struct iovec *iov = iovstack;
1da177e4
LT
1887 struct msghdr msg_sys;
1888 unsigned long cmsg_ptr;
1889 int err, iov_size, total_len, len;
6cb153ca 1890 int fput_needed;
1da177e4
LT
1891
1892 /* kernel mode address */
1893 char addr[MAX_SOCK_ADDR];
1894
1895 /* user mode address pointers */
1896 struct sockaddr __user *uaddr;
1897 int __user *uaddr_len;
89bddce5 1898
1da177e4
LT
1899 if (MSG_CMSG_COMPAT & flags) {
1900 if (get_compat_msghdr(&msg_sys, msg_compat))
1901 return -EFAULT;
89bddce5
SH
1902 }
1903 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1904 return -EFAULT;
1da177e4 1905
6cb153ca 1906 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1907 if (!sock)
1908 goto out;
1909
1910 err = -EMSGSIZE;
1911 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1912 goto out_put;
89bddce5
SH
1913
1914 /* Check whether to allocate the iovec area */
1da177e4
LT
1915 err = -ENOMEM;
1916 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1917 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1918 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1919 if (!iov)
1920 goto out_put;
1921 }
1922
1923 /*
89bddce5
SH
1924 * Save the user-mode address (verify_iovec will change the
1925 * kernel msghdr to use the kernel address space)
1da177e4 1926 */
89bddce5
SH
1927
1928 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1929 uaddr_len = COMPAT_NAMELEN(msg);
1930 if (MSG_CMSG_COMPAT & flags) {
1931 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1932 } else
1933 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1934 if (err < 0)
1935 goto out_freeiov;
89bddce5 1936 total_len = err;
1da177e4
LT
1937
1938 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1939 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1940
1da177e4
LT
1941 if (sock->file->f_flags & O_NONBLOCK)
1942 flags |= MSG_DONTWAIT;
1943 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1944 if (err < 0)
1945 goto out_freeiov;
1946 len = err;
1947
1948 if (uaddr != NULL) {
89bddce5
SH
1949 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1950 uaddr_len);
1da177e4
LT
1951 if (err < 0)
1952 goto out_freeiov;
1953 }
37f7f421
DM
1954 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1955 COMPAT_FLAGS(msg));
1da177e4
LT
1956 if (err)
1957 goto out_freeiov;
1958 if (MSG_CMSG_COMPAT & flags)
89bddce5 1959 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1960 &msg_compat->msg_controllen);
1961 else
89bddce5 1962 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1963 &msg->msg_controllen);
1964 if (err)
1965 goto out_freeiov;
1966 err = len;
1967
1968out_freeiov:
1969 if (iov != iovstack)
1970 sock_kfree_s(sock->sk, iov, iov_size);
1971out_put:
6cb153ca 1972 fput_light(sock->file, fput_needed);
1da177e4
LT
1973out:
1974 return err;
1975}
1976
1977#ifdef __ARCH_WANT_SYS_SOCKETCALL
1978
1979/* Argument list sizes for sys_socketcall */
1980#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1981static const unsigned char nargs[18]={
1982 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1983 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1984 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1985};
1986
1da177e4
LT
1987#undef AL
1988
1989/*
89bddce5 1990 * System call vectors.
1da177e4
LT
1991 *
1992 * Argument checking cleaned up. Saved 20% in size.
1993 * This function doesn't need to set the kernel lock because
89bddce5 1994 * it is set by the callees.
1da177e4
LT
1995 */
1996
1997asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1998{
1999 unsigned long a[6];
89bddce5 2000 unsigned long a0, a1;
1da177e4
LT
2001 int err;
2002
89bddce5 2003 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2004 return -EINVAL;
2005
2006 /* copy_from_user should be SMP safe. */
2007 if (copy_from_user(a, args, nargs[call]))
2008 return -EFAULT;
3ec3b2fb 2009
89bddce5 2010 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2011 if (err)
2012 return err;
2013
89bddce5
SH
2014 a0 = a[0];
2015 a1 = a[1];
2016
2017 switch (call) {
2018 case SYS_SOCKET:
2019 err = sys_socket(a0, a1, a[2]);
2020 break;
2021 case SYS_BIND:
2022 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2023 break;
2024 case SYS_CONNECT:
2025 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2026 break;
2027 case SYS_LISTEN:
2028 err = sys_listen(a0, a1);
2029 break;
2030 case SYS_ACCEPT:
2031 err =
2032 sys_accept(a0, (struct sockaddr __user *)a1,
2033 (int __user *)a[2]);
2034 break;
2035 case SYS_GETSOCKNAME:
2036 err =
2037 sys_getsockname(a0, (struct sockaddr __user *)a1,
2038 (int __user *)a[2]);
2039 break;
2040 case SYS_GETPEERNAME:
2041 err =
2042 sys_getpeername(a0, (struct sockaddr __user *)a1,
2043 (int __user *)a[2]);
2044 break;
2045 case SYS_SOCKETPAIR:
2046 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2047 break;
2048 case SYS_SEND:
2049 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2050 break;
2051 case SYS_SENDTO:
2052 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2053 (struct sockaddr __user *)a[4], a[5]);
2054 break;
2055 case SYS_RECV:
2056 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2057 break;
2058 case SYS_RECVFROM:
2059 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2060 (struct sockaddr __user *)a[4],
2061 (int __user *)a[5]);
2062 break;
2063 case SYS_SHUTDOWN:
2064 err = sys_shutdown(a0, a1);
2065 break;
2066 case SYS_SETSOCKOPT:
2067 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2068 break;
2069 case SYS_GETSOCKOPT:
2070 err =
2071 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2072 (int __user *)a[4]);
2073 break;
2074 case SYS_SENDMSG:
2075 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2076 break;
2077 case SYS_RECVMSG:
2078 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2079 break;
2080 default:
2081 err = -EINVAL;
2082 break;
1da177e4
LT
2083 }
2084 return err;
2085}
2086
89bddce5 2087#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2088
55737fda
SH
2089/**
2090 * sock_register - add a socket protocol handler
2091 * @ops: description of protocol
2092 *
1da177e4
LT
2093 * This function is called by a protocol handler that wants to
2094 * advertise its address family, and have it linked into the
55737fda
SH
2095 * socket interface. The value ops->family coresponds to the
2096 * socket system call protocol family.
1da177e4 2097 */
f0fd27d4 2098int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2099{
2100 int err;
2101
2102 if (ops->family >= NPROTO) {
89bddce5
SH
2103 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2104 NPROTO);
1da177e4
LT
2105 return -ENOBUFS;
2106 }
55737fda
SH
2107
2108 spin_lock(&net_family_lock);
2109 if (net_families[ops->family])
2110 err = -EEXIST;
2111 else {
89bddce5 2112 net_families[ops->family] = ops;
1da177e4
LT
2113 err = 0;
2114 }
55737fda
SH
2115 spin_unlock(&net_family_lock);
2116
89bddce5 2117 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2118 return err;
2119}
2120
55737fda
SH
2121/**
2122 * sock_unregister - remove a protocol handler
2123 * @family: protocol family to remove
2124 *
1da177e4
LT
2125 * This function is called by a protocol handler that wants to
2126 * remove its address family, and have it unlinked from the
55737fda
SH
2127 * new socket creation.
2128 *
2129 * If protocol handler is a module, then it can use module reference
2130 * counts to protect against new references. If protocol handler is not
2131 * a module then it needs to provide its own protection in
2132 * the ops->create routine.
1da177e4 2133 */
f0fd27d4 2134void sock_unregister(int family)
1da177e4 2135{
f0fd27d4 2136 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2137
55737fda 2138 spin_lock(&net_family_lock);
89bddce5 2139 net_families[family] = NULL;
55737fda
SH
2140 spin_unlock(&net_family_lock);
2141
2142 synchronize_rcu();
2143
89bddce5 2144 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2145}
2146
77d76ea3 2147static int __init sock_init(void)
1da177e4
LT
2148{
2149 /*
89bddce5 2150 * Initialize sock SLAB cache.
1da177e4 2151 */
89bddce5 2152
1da177e4
LT
2153 sk_init();
2154
1da177e4 2155 /*
89bddce5 2156 * Initialize skbuff SLAB cache
1da177e4
LT
2157 */
2158 skb_init();
1da177e4
LT
2159
2160 /*
89bddce5 2161 * Initialize the protocols module.
1da177e4
LT
2162 */
2163
2164 init_inodecache();
2165 register_filesystem(&sock_fs_type);
2166 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2167
2168 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2169 */
2170
2171#ifdef CONFIG_NETFILTER
2172 netfilter_init();
2173#endif
cbeb321a
DM
2174
2175 return 0;
1da177e4
LT
2176}
2177
77d76ea3
AK
2178core_initcall(sock_init); /* early initcall */
2179
1da177e4
LT
2180#ifdef CONFIG_PROC_FS
2181void socket_seq_show(struct seq_file *seq)
2182{
2183 int cpu;
2184 int counter = 0;
2185
6f912042 2186 for_each_possible_cpu(cpu)
89bddce5 2187 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2188
2189 /* It can be negative, by the way. 8) */
2190 if (counter < 0)
2191 counter = 0;
2192
2193 seq_printf(seq, "sockets: used %d\n", counter);
2194}
89bddce5 2195#endif /* CONFIG_PROC_FS */
1da177e4 2196
89bbfc95
SP
2197#ifdef CONFIG_COMPAT
2198static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2199 unsigned long arg)
89bbfc95
SP
2200{
2201 struct socket *sock = file->private_data;
2202 int ret = -ENOIOCTLCMD;
2203
2204 if (sock->ops->compat_ioctl)
2205 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2206
2207 return ret;
2208}
2209#endif
2210
ac5a488e
SS
2211int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2212{
2213 return sock->ops->bind(sock, addr, addrlen);
2214}
2215
2216int kernel_listen(struct socket *sock, int backlog)
2217{
2218 return sock->ops->listen(sock, backlog);
2219}
2220
2221int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2222{
2223 struct sock *sk = sock->sk;
2224 int err;
2225
2226 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2227 newsock);
2228 if (err < 0)
2229 goto done;
2230
2231 err = sock->ops->accept(sock, *newsock, flags);
2232 if (err < 0) {
2233 sock_release(*newsock);
2234 goto done;
2235 }
2236
2237 (*newsock)->ops = sock->ops;
2238
2239done:
2240 return err;
2241}
2242
2243int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2244 int flags)
ac5a488e
SS
2245{
2246 return sock->ops->connect(sock, addr, addrlen, flags);
2247}
2248
2249int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2250 int *addrlen)
2251{
2252 return sock->ops->getname(sock, addr, addrlen, 0);
2253}
2254
2255int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2256 int *addrlen)
2257{
2258 return sock->ops->getname(sock, addr, addrlen, 1);
2259}
2260
2261int kernel_getsockopt(struct socket *sock, int level, int optname,
2262 char *optval, int *optlen)
2263{
2264 mm_segment_t oldfs = get_fs();
2265 int err;
2266
2267 set_fs(KERNEL_DS);
2268 if (level == SOL_SOCKET)
2269 err = sock_getsockopt(sock, level, optname, optval, optlen);
2270 else
2271 err = sock->ops->getsockopt(sock, level, optname, optval,
2272 optlen);
2273 set_fs(oldfs);
2274 return err;
2275}
2276
2277int kernel_setsockopt(struct socket *sock, int level, int optname,
2278 char *optval, int optlen)
2279{
2280 mm_segment_t oldfs = get_fs();
2281 int err;
2282
2283 set_fs(KERNEL_DS);
2284 if (level == SOL_SOCKET)
2285 err = sock_setsockopt(sock, level, optname, optval, optlen);
2286 else
2287 err = sock->ops->setsockopt(sock, level, optname, optval,
2288 optlen);
2289 set_fs(oldfs);
2290 return err;
2291}
2292
2293int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2294 size_t size, int flags)
2295{
2296 if (sock->ops->sendpage)
2297 return sock->ops->sendpage(sock, page, offset, size, flags);
2298
2299 return sock_no_sendpage(sock, page, offset, size, flags);
2300}
2301
2302int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2303{
2304 mm_segment_t oldfs = get_fs();
2305 int err;
2306
2307 set_fs(KERNEL_DS);
2308 err = sock->ops->ioctl(sock, cmd, arg);
2309 set_fs(oldfs);
2310
2311 return err;
2312}
2313
1da177e4
LT
2314/* ABI emulation layers need these two */
2315EXPORT_SYMBOL(move_addr_to_kernel);
2316EXPORT_SYMBOL(move_addr_to_user);
2317EXPORT_SYMBOL(sock_create);
2318EXPORT_SYMBOL(sock_create_kern);
2319EXPORT_SYMBOL(sock_create_lite);
2320EXPORT_SYMBOL(sock_map_fd);
2321EXPORT_SYMBOL(sock_recvmsg);
2322EXPORT_SYMBOL(sock_register);
2323EXPORT_SYMBOL(sock_release);
2324EXPORT_SYMBOL(sock_sendmsg);
2325EXPORT_SYMBOL(sock_unregister);
2326EXPORT_SYMBOL(sock_wake_async);
2327EXPORT_SYMBOL(sockfd_lookup);
2328EXPORT_SYMBOL(kernel_sendmsg);
2329EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2330EXPORT_SYMBOL(kernel_bind);
2331EXPORT_SYMBOL(kernel_listen);
2332EXPORT_SYMBOL(kernel_accept);
2333EXPORT_SYMBOL(kernel_connect);
2334EXPORT_SYMBOL(kernel_getsockname);
2335EXPORT_SYMBOL(kernel_getpeername);
2336EXPORT_SYMBOL(kernel_getsockopt);
2337EXPORT_SYMBOL(kernel_setsockopt);
2338EXPORT_SYMBOL(kernel_sendpage);
2339EXPORT_SYMBOL(kernel_sock_ioctl);