net: drop capability from protocol definitions
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
1da177e4
LT
90
91#include <asm/uaccess.h>
92#include <asm/unistd.h>
93
94#include <net/compat.h>
87de87d5 95#include <net/wext.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
100static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
101static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
102 unsigned long nr_segs, loff_t pos);
103static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
104 unsigned long nr_segs, loff_t pos);
89bddce5 105static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
106
107static int sock_close(struct inode *inode, struct file *file);
108static unsigned int sock_poll(struct file *file,
109 struct poll_table_struct *wait);
89bddce5 110static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
111#ifdef CONFIG_COMPAT
112static long compat_sock_ioctl(struct file *file,
89bddce5 113 unsigned int cmd, unsigned long arg);
89bbfc95 114#endif
1da177e4 115static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
116static ssize_t sock_sendpage(struct file *file, struct page *page,
117 int offset, size_t size, loff_t *ppos, int more);
9c55e01c
JA
118static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
119 struct pipe_inode_info *pipe, size_t len,
120 unsigned int flags);
1da177e4 121
1da177e4
LT
122/*
123 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
124 * in the operation structures but are done directly via the socketcall() multiplexor.
125 */
126
da7071d7 127static const struct file_operations socket_file_ops = {
1da177e4
LT
128 .owner = THIS_MODULE,
129 .llseek = no_llseek,
130 .aio_read = sock_aio_read,
131 .aio_write = sock_aio_write,
132 .poll = sock_poll,
133 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
134#ifdef CONFIG_COMPAT
135 .compat_ioctl = compat_sock_ioctl,
136#endif
1da177e4
LT
137 .mmap = sock_mmap,
138 .open = sock_no_open, /* special open code to disallow open via /proc */
139 .release = sock_close,
140 .fasync = sock_fasync,
5274f052
JA
141 .sendpage = sock_sendpage,
142 .splice_write = generic_splice_sendpage,
9c55e01c 143 .splice_read = sock_splice_read,
1da177e4
LT
144};
145
146/*
147 * The protocol list. Each protocol is registered in here.
148 */
149
1da177e4 150static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 151static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 152
1da177e4
LT
153/*
154 * Statistics counters of the socket lists
155 */
156
157static DEFINE_PER_CPU(int, sockets_in_use) = 0;
158
159/*
89bddce5
SH
160 * Support routines.
161 * Move socket addresses back and forth across the kernel/user
162 * divide and look after the messy bits.
1da177e4
LT
163 */
164
89bddce5 165#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
166 16 for IP, 16 for IPX,
167 24 for IPv6,
89bddce5 168 about 80 for AX.25
1da177e4
LT
169 must be at least one bigger than
170 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 171 :unix_mkname()).
1da177e4 172 */
89bddce5 173
1da177e4
LT
174/**
175 * move_addr_to_kernel - copy a socket address into kernel space
176 * @uaddr: Address in user space
177 * @kaddr: Address in kernel space
178 * @ulen: Length in user space
179 *
180 * The address is copied into kernel space. If the provided address is
181 * too long an error code of -EINVAL is returned. If the copy gives
182 * invalid addresses -EFAULT is returned. On a success 0 is returned.
183 */
184
230b1839 185int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 186{
230b1839 187 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 188 return -EINVAL;
89bddce5 189 if (ulen == 0)
1da177e4 190 return 0;
89bddce5 191 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 192 return -EFAULT;
3ec3b2fb 193 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
194}
195
196/**
197 * move_addr_to_user - copy an address to user space
198 * @kaddr: kernel space address
199 * @klen: length of address in kernel
200 * @uaddr: user space address
201 * @ulen: pointer to user length field
202 *
203 * The value pointed to by ulen on entry is the buffer length available.
204 * This is overwritten with the buffer space used. -EINVAL is returned
205 * if an overlong buffer is specified or a negative buffer size. -EFAULT
206 * is returned if either the buffer or the length field are not
207 * accessible.
208 * After copying the data up to the limit the user specifies, the true
209 * length of the data is written over the length limit the user
210 * specified. Zero is returned for a success.
211 */
89bddce5 212
230b1839 213int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 214 int __user *ulen)
1da177e4
LT
215{
216 int err;
217 int len;
218
89bddce5
SH
219 err = get_user(len, ulen);
220 if (err)
1da177e4 221 return err;
89bddce5
SH
222 if (len > klen)
223 len = klen;
230b1839 224 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 225 return -EINVAL;
89bddce5 226 if (len) {
d6fe3945
SG
227 if (audit_sockaddr(klen, kaddr))
228 return -ENOMEM;
89bddce5 229 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
230 return -EFAULT;
231 }
232 /*
89bddce5
SH
233 * "fromlen shall refer to the value before truncation.."
234 * 1003.1g
1da177e4
LT
235 */
236 return __put_user(klen, ulen);
237}
238
e18b890b 239static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
240
241static struct inode *sock_alloc_inode(struct super_block *sb)
242{
243 struct socket_alloc *ei;
89bddce5 244
e94b1766 245 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
246 if (!ei)
247 return NULL;
248 init_waitqueue_head(&ei->socket.wait);
89bddce5 249
1da177e4
LT
250 ei->socket.fasync_list = NULL;
251 ei->socket.state = SS_UNCONNECTED;
252 ei->socket.flags = 0;
253 ei->socket.ops = NULL;
254 ei->socket.sk = NULL;
255 ei->socket.file = NULL;
1da177e4
LT
256
257 return &ei->vfs_inode;
258}
259
260static void sock_destroy_inode(struct inode *inode)
261{
262 kmem_cache_free(sock_inode_cachep,
263 container_of(inode, struct socket_alloc, vfs_inode));
264}
265
51cc5068 266static void init_once(void *foo)
1da177e4 267{
89bddce5 268 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 269
a35afb83 270 inode_init_once(&ei->vfs_inode);
1da177e4 271}
89bddce5 272
1da177e4
LT
273static int init_inodecache(void)
274{
275 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
276 sizeof(struct socket_alloc),
277 0,
278 (SLAB_HWCACHE_ALIGN |
279 SLAB_RECLAIM_ACCOUNT |
280 SLAB_MEM_SPREAD),
20c2df83 281 init_once);
1da177e4
LT
282 if (sock_inode_cachep == NULL)
283 return -ENOMEM;
284 return 0;
285}
286
b87221de 287static const struct super_operations sockfs_ops = {
1da177e4
LT
288 .alloc_inode = sock_alloc_inode,
289 .destroy_inode =sock_destroy_inode,
290 .statfs = simple_statfs,
291};
292
454e2398 293static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
294 int flags, const char *dev_name, void *data,
295 struct vfsmount *mnt)
1da177e4 296{
454e2398
DH
297 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
298 mnt);
1da177e4
LT
299}
300
ba89966c 301static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
302
303static struct file_system_type sock_fs_type = {
304 .name = "sockfs",
305 .get_sb = sockfs_get_sb,
306 .kill_sb = kill_anon_super,
307};
89bddce5 308
1da177e4
LT
309static int sockfs_delete_dentry(struct dentry *dentry)
310{
304e61e6
ED
311 /*
312 * At creation time, we pretended this dentry was hashed
313 * (by clearing DCACHE_UNHASHED bit in d_flags)
314 * At delete time, we restore the truth : not hashed.
315 * (so that dput() can proceed correctly)
316 */
317 dentry->d_flags |= DCACHE_UNHASHED;
318 return 0;
1da177e4 319}
c23fbb6b
ED
320
321/*
322 * sockfs_dname() is called from d_path().
323 */
324static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
325{
326 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
327 dentry->d_inode->i_ino);
328}
329
3ba13d17 330static const struct dentry_operations sockfs_dentry_operations = {
89bddce5 331 .d_delete = sockfs_delete_dentry,
c23fbb6b 332 .d_dname = sockfs_dname,
1da177e4
LT
333};
334
335/*
336 * Obtains the first available file descriptor and sets it up for use.
337 *
39d8c1b6
DM
338 * These functions create file structures and maps them to fd space
339 * of the current process. On success it returns file descriptor
1da177e4
LT
340 * and file struct implicitly stored in sock->file.
341 * Note that another thread may close file descriptor before we return
342 * from this function. We use the fact that now we do not refer
343 * to socket after mapping. If one day we will need it, this
344 * function will increment ref. count on file by 1.
345 *
346 * In any case returned fd MAY BE not valid!
347 * This race condition is unavoidable
348 * with shared fd spaces, we cannot solve it inside kernel,
349 * but we take care of internal coherence yet.
350 */
351
a677a039 352static int sock_alloc_fd(struct file **filep, int flags)
1da177e4
LT
353{
354 int fd;
1da177e4 355
a677a039 356 fd = get_unused_fd_flags(flags);
39d8c1b6 357 if (likely(fd >= 0)) {
1da177e4
LT
358 struct file *file = get_empty_filp();
359
39d8c1b6
DM
360 *filep = file;
361 if (unlikely(!file)) {
1da177e4 362 put_unused_fd(fd);
39d8c1b6 363 return -ENFILE;
1da177e4 364 }
39d8c1b6
DM
365 } else
366 *filep = NULL;
367 return fd;
368}
1da177e4 369
77d27200 370static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
39d8c1b6 371{
ce8d2cdf 372 struct dentry *dentry;
c23fbb6b 373 struct qstr name = { .name = "" };
39d8c1b6 374
ce8d2cdf
DH
375 dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
376 if (unlikely(!dentry))
39d8c1b6
DM
377 return -ENOMEM;
378
ce8d2cdf 379 dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
380 /*
381 * We dont want to push this dentry into global dentry hash table.
382 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
383 * This permits a working /proc/$pid/fd/XXX on sockets
384 */
ce8d2cdf
DH
385 dentry->d_flags &= ~DCACHE_UNHASHED;
386 d_instantiate(dentry, SOCK_INODE(sock));
39d8c1b6
DM
387
388 sock->file = file;
ce8d2cdf
DH
389 init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
390 &socket_file_ops);
391 SOCK_INODE(sock)->i_fop = &socket_file_ops;
77d27200 392 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
393 file->f_pos = 0;
394 file->private_data = sock;
1da177e4 395
39d8c1b6
DM
396 return 0;
397}
398
a677a039 399int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
400{
401 struct file *newfile;
a677a039 402 int fd = sock_alloc_fd(&newfile, flags);
39d8c1b6
DM
403
404 if (likely(fd >= 0)) {
77d27200 405 int err = sock_attach_fd(sock, newfile, flags);
39d8c1b6
DM
406
407 if (unlikely(err < 0)) {
408 put_filp(newfile);
1da177e4 409 put_unused_fd(fd);
39d8c1b6 410 return err;
1da177e4 411 }
39d8c1b6 412 fd_install(fd, newfile);
1da177e4 413 }
1da177e4
LT
414 return fd;
415}
416
6cb153ca
BL
417static struct socket *sock_from_file(struct file *file, int *err)
418{
6cb153ca
BL
419 if (file->f_op == &socket_file_ops)
420 return file->private_data; /* set in sock_map_fd */
421
23bb80d2
ED
422 *err = -ENOTSOCK;
423 return NULL;
6cb153ca
BL
424}
425
1da177e4
LT
426/**
427 * sockfd_lookup - Go from a file number to its socket slot
428 * @fd: file handle
429 * @err: pointer to an error code return
430 *
431 * The file handle passed in is locked and the socket it is bound
432 * too is returned. If an error occurs the err pointer is overwritten
433 * with a negative errno code and NULL is returned. The function checks
434 * for both invalid handles and passing a handle which is not a socket.
435 *
436 * On a success the socket object pointer is returned.
437 */
438
439struct socket *sockfd_lookup(int fd, int *err)
440{
441 struct file *file;
1da177e4
LT
442 struct socket *sock;
443
89bddce5
SH
444 file = fget(fd);
445 if (!file) {
1da177e4
LT
446 *err = -EBADF;
447 return NULL;
448 }
89bddce5 449
6cb153ca
BL
450 sock = sock_from_file(file, err);
451 if (!sock)
1da177e4 452 fput(file);
6cb153ca
BL
453 return sock;
454}
1da177e4 455
6cb153ca
BL
456static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
457{
458 struct file *file;
459 struct socket *sock;
460
3672558c 461 *err = -EBADF;
6cb153ca
BL
462 file = fget_light(fd, fput_needed);
463 if (file) {
464 sock = sock_from_file(file, err);
465 if (sock)
466 return sock;
467 fput_light(file, *fput_needed);
1da177e4 468 }
6cb153ca 469 return NULL;
1da177e4
LT
470}
471
472/**
473 * sock_alloc - allocate a socket
89bddce5 474 *
1da177e4
LT
475 * Allocate a new inode and socket object. The two are bound together
476 * and initialised. The socket is then returned. If we are out of inodes
477 * NULL is returned.
478 */
479
480static struct socket *sock_alloc(void)
481{
89bddce5
SH
482 struct inode *inode;
483 struct socket *sock;
1da177e4
LT
484
485 inode = new_inode(sock_mnt->mnt_sb);
486 if (!inode)
487 return NULL;
488
489 sock = SOCKET_I(inode);
490
29a020d3 491 kmemcheck_annotate_bitfield(sock, type);
89bddce5 492 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
493 inode->i_uid = current_fsuid();
494 inode->i_gid = current_fsgid();
1da177e4 495
4e69489a 496 percpu_add(sockets_in_use, 1);
1da177e4
LT
497 return sock;
498}
499
500/*
501 * In theory you can't get an open on this inode, but /proc provides
502 * a back door. Remember to keep it shut otherwise you'll let the
503 * creepy crawlies in.
504 */
89bddce5 505
1da177e4
LT
506static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
507{
508 return -ENXIO;
509}
510
4b6f5d20 511const struct file_operations bad_sock_fops = {
1da177e4
LT
512 .owner = THIS_MODULE,
513 .open = sock_no_open,
514};
515
516/**
517 * sock_release - close a socket
518 * @sock: socket to close
519 *
520 * The socket is released from the protocol stack if it has a release
521 * callback, and the inode is then released if the socket is bound to
89bddce5 522 * an inode not a file.
1da177e4 523 */
89bddce5 524
1da177e4
LT
525void sock_release(struct socket *sock)
526{
527 if (sock->ops) {
528 struct module *owner = sock->ops->owner;
529
530 sock->ops->release(sock);
531 sock->ops = NULL;
532 module_put(owner);
533 }
534
535 if (sock->fasync_list)
536 printk(KERN_ERR "sock_release: fasync list not empty!\n");
537
4e69489a 538 percpu_sub(sockets_in_use, 1);
1da177e4
LT
539 if (!sock->file) {
540 iput(SOCK_INODE(sock));
541 return;
542 }
89bddce5 543 sock->file = NULL;
1da177e4
LT
544}
545
20d49473
PO
546int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
547 union skb_shared_tx *shtx)
548{
549 shtx->flags = 0;
550 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
551 shtx->hardware = 1;
552 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
553 shtx->software = 1;
554 return 0;
555}
556EXPORT_SYMBOL(sock_tx_timestamp);
557
89bddce5 558static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
559 struct msghdr *msg, size_t size)
560{
561 struct sock_iocb *si = kiocb_to_siocb(iocb);
562 int err;
563
564 si->sock = sock;
565 si->scm = NULL;
566 si->msg = msg;
567 si->size = size;
568
569 err = security_socket_sendmsg(sock, msg, size);
570 if (err)
571 return err;
572
573 return sock->ops->sendmsg(iocb, sock, msg, size);
574}
575
576int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
577{
578 struct kiocb iocb;
579 struct sock_iocb siocb;
580 int ret;
581
582 init_sync_kiocb(&iocb, NULL);
583 iocb.private = &siocb;
584 ret = __sock_sendmsg(&iocb, sock, msg, size);
585 if (-EIOCBQUEUED == ret)
586 ret = wait_on_sync_kiocb(&iocb);
587 return ret;
588}
589
590int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
591 struct kvec *vec, size_t num, size_t size)
592{
593 mm_segment_t oldfs = get_fs();
594 int result;
595
596 set_fs(KERNEL_DS);
597 /*
598 * the following is safe, since for compiler definitions of kvec and
599 * iovec are identical, yielding the same in-core layout and alignment
600 */
89bddce5 601 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
602 msg->msg_iovlen = num;
603 result = sock_sendmsg(sock, msg, size);
604 set_fs(oldfs);
605 return result;
606}
607
20d49473
PO
608static int ktime2ts(ktime_t kt, struct timespec *ts)
609{
610 if (kt.tv64) {
611 *ts = ktime_to_timespec(kt);
612 return 1;
613 } else {
614 return 0;
615 }
616}
617
92f37fd2
ED
618/*
619 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
620 */
621void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
622 struct sk_buff *skb)
623{
20d49473
PO
624 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
625 struct timespec ts[3];
626 int empty = 1;
627 struct skb_shared_hwtstamps *shhwtstamps =
628 skb_hwtstamps(skb);
629
630 /* Race occurred between timestamp enabling and packet
631 receiving. Fill in the current time for now. */
632 if (need_software_tstamp && skb->tstamp.tv64 == 0)
633 __net_timestamp(skb);
634
635 if (need_software_tstamp) {
636 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
637 struct timeval tv;
638 skb_get_timestamp(skb, &tv);
639 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
640 sizeof(tv), &tv);
641 } else {
642 struct timespec ts;
643 skb_get_timestampns(skb, &ts);
644 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
645 sizeof(ts), &ts);
646 }
647 }
648
649
650 memset(ts, 0, sizeof(ts));
651 if (skb->tstamp.tv64 &&
652 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
653 skb_get_timestampns(skb, ts + 0);
654 empty = 0;
655 }
656 if (shhwtstamps) {
657 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
658 ktime2ts(shhwtstamps->syststamp, ts + 1))
659 empty = 0;
660 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
661 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
662 empty = 0;
92f37fd2 663 }
20d49473
PO
664 if (!empty)
665 put_cmsg(msg, SOL_SOCKET,
666 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2
ED
667}
668
7c81fd8b
ACM
669EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
670
3b885787
NH
671inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
672{
673 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
674 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
675 sizeof(__u32), &skb->dropcount);
676}
677
678void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
679 struct sk_buff *skb)
680{
681 sock_recv_timestamp(msg, sk, skb);
682 sock_recv_drops(msg, sk, skb);
683}
684EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
685
a2e27255
ACM
686static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
687 struct msghdr *msg, size_t size, int flags)
1da177e4 688{
1da177e4
LT
689 struct sock_iocb *si = kiocb_to_siocb(iocb);
690
691 si->sock = sock;
692 si->scm = NULL;
693 si->msg = msg;
694 si->size = size;
695 si->flags = flags;
696
1da177e4
LT
697 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
698}
699
a2e27255
ACM
700static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
701 struct msghdr *msg, size_t size, int flags)
702{
703 int err = security_socket_recvmsg(sock, msg, size, flags);
704
705 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
706}
707
89bddce5 708int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
709 size_t size, int flags)
710{
711 struct kiocb iocb;
712 struct sock_iocb siocb;
713 int ret;
714
89bddce5 715 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
716 iocb.private = &siocb;
717 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
718 if (-EIOCBQUEUED == ret)
719 ret = wait_on_sync_kiocb(&iocb);
720 return ret;
721}
722
a2e27255
ACM
723static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
724 size_t size, int flags)
725{
726 struct kiocb iocb;
727 struct sock_iocb siocb;
728 int ret;
729
730 init_sync_kiocb(&iocb, NULL);
731 iocb.private = &siocb;
732 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
733 if (-EIOCBQUEUED == ret)
734 ret = wait_on_sync_kiocb(&iocb);
735 return ret;
736}
737
89bddce5
SH
738int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
739 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
740{
741 mm_segment_t oldfs = get_fs();
742 int result;
743
744 set_fs(KERNEL_DS);
745 /*
746 * the following is safe, since for compiler definitions of kvec and
747 * iovec are identical, yielding the same in-core layout and alignment
748 */
89bddce5 749 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
750 result = sock_recvmsg(sock, msg, size, flags);
751 set_fs(oldfs);
752 return result;
753}
754
755static void sock_aio_dtor(struct kiocb *iocb)
756{
757 kfree(iocb->private);
758}
759
ce1d4d3e
CH
760static ssize_t sock_sendpage(struct file *file, struct page *page,
761 int offset, size_t size, loff_t *ppos, int more)
1da177e4 762{
1da177e4
LT
763 struct socket *sock;
764 int flags;
765
ce1d4d3e
CH
766 sock = file->private_data;
767
768 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
769 if (more)
770 flags |= MSG_MORE;
771
e6949583 772 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 773}
1da177e4 774
9c55e01c
JA
775static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
776 struct pipe_inode_info *pipe, size_t len,
777 unsigned int flags)
778{
779 struct socket *sock = file->private_data;
780
997b37da
RDC
781 if (unlikely(!sock->ops->splice_read))
782 return -EINVAL;
783
9c55e01c
JA
784 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
785}
786
ce1d4d3e 787static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 788 struct sock_iocb *siocb)
ce1d4d3e
CH
789{
790 if (!is_sync_kiocb(iocb)) {
791 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
792 if (!siocb)
793 return NULL;
1da177e4
LT
794 iocb->ki_dtor = sock_aio_dtor;
795 }
1da177e4 796
ce1d4d3e 797 siocb->kiocb = iocb;
ce1d4d3e
CH
798 iocb->private = siocb;
799 return siocb;
1da177e4
LT
800}
801
ce1d4d3e 802static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
803 struct file *file, const struct iovec *iov,
804 unsigned long nr_segs)
ce1d4d3e
CH
805{
806 struct socket *sock = file->private_data;
807 size_t size = 0;
808 int i;
1da177e4 809
89bddce5
SH
810 for (i = 0; i < nr_segs; i++)
811 size += iov[i].iov_len;
1da177e4 812
ce1d4d3e
CH
813 msg->msg_name = NULL;
814 msg->msg_namelen = 0;
815 msg->msg_control = NULL;
816 msg->msg_controllen = 0;
89bddce5 817 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
818 msg->msg_iovlen = nr_segs;
819 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
820
821 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
822}
823
027445c3
BP
824static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
825 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
826{
827 struct sock_iocb siocb, *x;
828
1da177e4
LT
829 if (pos != 0)
830 return -ESPIPE;
027445c3
BP
831
832 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
833 return 0;
834
027445c3
BP
835
836 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
837 if (!x)
838 return -ENOMEM;
027445c3 839 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
840}
841
ce1d4d3e 842static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
843 struct file *file, const struct iovec *iov,
844 unsigned long nr_segs)
1da177e4 845{
ce1d4d3e
CH
846 struct socket *sock = file->private_data;
847 size_t size = 0;
848 int i;
1da177e4 849
89bddce5
SH
850 for (i = 0; i < nr_segs; i++)
851 size += iov[i].iov_len;
1da177e4 852
ce1d4d3e
CH
853 msg->msg_name = NULL;
854 msg->msg_namelen = 0;
855 msg->msg_control = NULL;
856 msg->msg_controllen = 0;
89bddce5 857 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
858 msg->msg_iovlen = nr_segs;
859 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
860 if (sock->type == SOCK_SEQPACKET)
861 msg->msg_flags |= MSG_EOR;
1da177e4 862
ce1d4d3e 863 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
864}
865
027445c3
BP
866static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
867 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
868{
869 struct sock_iocb siocb, *x;
1da177e4 870
ce1d4d3e
CH
871 if (pos != 0)
872 return -ESPIPE;
027445c3 873
027445c3 874 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
875 if (!x)
876 return -ENOMEM;
1da177e4 877
027445c3 878 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
879}
880
1da177e4
LT
881/*
882 * Atomic setting of ioctl hooks to avoid race
883 * with module unload.
884 */
885
4a3e2f71 886static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 887static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 888
881d966b 889void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 890{
4a3e2f71 891 mutex_lock(&br_ioctl_mutex);
1da177e4 892 br_ioctl_hook = hook;
4a3e2f71 893 mutex_unlock(&br_ioctl_mutex);
1da177e4 894}
89bddce5 895
1da177e4
LT
896EXPORT_SYMBOL(brioctl_set);
897
4a3e2f71 898static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 899static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 900
881d966b 901void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 902{
4a3e2f71 903 mutex_lock(&vlan_ioctl_mutex);
1da177e4 904 vlan_ioctl_hook = hook;
4a3e2f71 905 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 906}
89bddce5 907
1da177e4
LT
908EXPORT_SYMBOL(vlan_ioctl_set);
909
4a3e2f71 910static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 911static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 912
89bddce5 913void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 914{
4a3e2f71 915 mutex_lock(&dlci_ioctl_mutex);
1da177e4 916 dlci_ioctl_hook = hook;
4a3e2f71 917 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 918}
89bddce5 919
1da177e4
LT
920EXPORT_SYMBOL(dlci_ioctl_set);
921
922/*
923 * With an ioctl, arg may well be a user mode pointer, but we don't know
924 * what to do with it - that's up to the protocol still.
925 */
926
927static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
928{
929 struct socket *sock;
881d966b 930 struct sock *sk;
1da177e4
LT
931 void __user *argp = (void __user *)arg;
932 int pid, err;
881d966b 933 struct net *net;
1da177e4 934
b69aee04 935 sock = file->private_data;
881d966b 936 sk = sock->sk;
3b1e0a65 937 net = sock_net(sk);
1da177e4 938 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 939 err = dev_ioctl(net, cmd, argp);
1da177e4 940 } else
3d23e349 941#ifdef CONFIG_WEXT_CORE
1da177e4 942 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 943 err = dev_ioctl(net, cmd, argp);
1da177e4 944 } else
3d23e349 945#endif
89bddce5 946 switch (cmd) {
1da177e4
LT
947 case FIOSETOWN:
948 case SIOCSPGRP:
949 err = -EFAULT;
950 if (get_user(pid, (int __user *)argp))
951 break;
952 err = f_setown(sock->file, pid, 1);
953 break;
954 case FIOGETOWN:
955 case SIOCGPGRP:
609d7fa9 956 err = put_user(f_getown(sock->file),
89bddce5 957 (int __user *)argp);
1da177e4
LT
958 break;
959 case SIOCGIFBR:
960 case SIOCSIFBR:
961 case SIOCBRADDBR:
962 case SIOCBRDELBR:
963 err = -ENOPKG;
964 if (!br_ioctl_hook)
965 request_module("bridge");
966
4a3e2f71 967 mutex_lock(&br_ioctl_mutex);
89bddce5 968 if (br_ioctl_hook)
881d966b 969 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 970 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
971 break;
972 case SIOCGIFVLAN:
973 case SIOCSIFVLAN:
974 err = -ENOPKG;
975 if (!vlan_ioctl_hook)
976 request_module("8021q");
977
4a3e2f71 978 mutex_lock(&vlan_ioctl_mutex);
1da177e4 979 if (vlan_ioctl_hook)
881d966b 980 err = vlan_ioctl_hook(net, argp);
4a3e2f71 981 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 982 break;
1da177e4
LT
983 case SIOCADDDLCI:
984 case SIOCDELDLCI:
985 err = -ENOPKG;
986 if (!dlci_ioctl_hook)
987 request_module("dlci");
988
7512cbf6
PE
989 mutex_lock(&dlci_ioctl_mutex);
990 if (dlci_ioctl_hook)
1da177e4 991 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 992 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
993 break;
994 default:
995 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
996
997 /*
998 * If this ioctl is unknown try to hand it down
999 * to the NIC driver.
1000 */
1001 if (err == -ENOIOCTLCMD)
881d966b 1002 err = dev_ioctl(net, cmd, argp);
1da177e4 1003 break;
89bddce5 1004 }
1da177e4
LT
1005 return err;
1006}
1007
1008int sock_create_lite(int family, int type, int protocol, struct socket **res)
1009{
1010 int err;
1011 struct socket *sock = NULL;
89bddce5 1012
1da177e4
LT
1013 err = security_socket_create(family, type, protocol, 1);
1014 if (err)
1015 goto out;
1016
1017 sock = sock_alloc();
1018 if (!sock) {
1019 err = -ENOMEM;
1020 goto out;
1021 }
1022
1da177e4 1023 sock->type = type;
7420ed23
VY
1024 err = security_socket_post_create(sock, family, type, protocol, 1);
1025 if (err)
1026 goto out_release;
1027
1da177e4
LT
1028out:
1029 *res = sock;
1030 return err;
7420ed23
VY
1031out_release:
1032 sock_release(sock);
1033 sock = NULL;
1034 goto out;
1da177e4
LT
1035}
1036
1037/* No kernel lock held - perfect */
89bddce5 1038static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1039{
1040 struct socket *sock;
1041
1042 /*
89bddce5 1043 * We can't return errors to poll, so it's either yes or no.
1da177e4 1044 */
b69aee04 1045 sock = file->private_data;
1da177e4
LT
1046 return sock->ops->poll(file, sock, wait);
1047}
1048
89bddce5 1049static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1050{
b69aee04 1051 struct socket *sock = file->private_data;
1da177e4
LT
1052
1053 return sock->ops->mmap(file, sock, vma);
1054}
1055
20380731 1056static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1057{
1058 /*
89bddce5
SH
1059 * It was possible the inode is NULL we were
1060 * closing an unfinished socket.
1da177e4
LT
1061 */
1062
89bddce5 1063 if (!inode) {
1da177e4
LT
1064 printk(KERN_DEBUG "sock_close: NULL inode\n");
1065 return 0;
1066 }
1da177e4
LT
1067 sock_release(SOCKET_I(inode));
1068 return 0;
1069}
1070
1071/*
1072 * Update the socket async list
1073 *
1074 * Fasync_list locking strategy.
1075 *
1076 * 1. fasync_list is modified only under process context socket lock
1077 * i.e. under semaphore.
1078 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1079 * or under socket lock.
1080 * 3. fasync_list can be used from softirq context, so that
1081 * modification under socket lock have to be enhanced with
1082 * write_lock_bh(&sk->sk_callback_lock).
1083 * --ANK (990710)
1084 */
1085
1086static int sock_fasync(int fd, struct file *filp, int on)
1087{
89bddce5 1088 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
1089 struct socket *sock;
1090 struct sock *sk;
1091
89bddce5 1092 if (on) {
8b3a7005 1093 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1094 if (fna == NULL)
1da177e4
LT
1095 return -ENOMEM;
1096 }
1097
b69aee04 1098 sock = filp->private_data;
1da177e4 1099
89bddce5
SH
1100 sk = sock->sk;
1101 if (sk == NULL) {
1da177e4
LT
1102 kfree(fna);
1103 return -EINVAL;
1104 }
1105
1106 lock_sock(sk);
1107
76398425
JC
1108 spin_lock(&filp->f_lock);
1109 if (on)
1110 filp->f_flags |= FASYNC;
1111 else
1112 filp->f_flags &= ~FASYNC;
1113 spin_unlock(&filp->f_lock);
1114
89bddce5 1115 prev = &(sock->fasync_list);
1da177e4 1116
89bddce5
SH
1117 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1118 if (fa->fa_file == filp)
1da177e4
LT
1119 break;
1120
89bddce5
SH
1121 if (on) {
1122 if (fa != NULL) {
1da177e4 1123 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1124 fa->fa_fd = fd;
1da177e4
LT
1125 write_unlock_bh(&sk->sk_callback_lock);
1126
1127 kfree(fna);
1128 goto out;
1129 }
89bddce5
SH
1130 fna->fa_file = filp;
1131 fna->fa_fd = fd;
1132 fna->magic = FASYNC_MAGIC;
1133 fna->fa_next = sock->fasync_list;
1da177e4 1134 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1135 sock->fasync_list = fna;
bcdce719 1136 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1137 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1138 } else {
1139 if (fa != NULL) {
1da177e4 1140 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1141 *prev = fa->fa_next;
bcdce719
ED
1142 if (!sock->fasync_list)
1143 sock_reset_flag(sk, SOCK_FASYNC);
1da177e4
LT
1144 write_unlock_bh(&sk->sk_callback_lock);
1145 kfree(fa);
1146 }
1147 }
1148
1149out:
1150 release_sock(sock->sk);
1151 return 0;
1152}
1153
1154/* This function may be called only under socket lock or callback_lock */
1155
1156int sock_wake_async(struct socket *sock, int how, int band)
1157{
1158 if (!sock || !sock->fasync_list)
1159 return -1;
89bddce5 1160 switch (how) {
8d8ad9d7 1161 case SOCK_WAKE_WAITD:
1da177e4
LT
1162 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1163 break;
1164 goto call_kill;
8d8ad9d7 1165 case SOCK_WAKE_SPACE:
1da177e4
LT
1166 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1167 break;
1168 /* fall through */
8d8ad9d7 1169 case SOCK_WAKE_IO:
89bddce5 1170call_kill:
1da177e4
LT
1171 __kill_fasync(sock->fasync_list, SIGIO, band);
1172 break;
8d8ad9d7 1173 case SOCK_WAKE_URG:
1da177e4
LT
1174 __kill_fasync(sock->fasync_list, SIGURG, band);
1175 }
1176 return 0;
1177}
1178
1b8d7ae4 1179static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1180 struct socket **res, int kern)
1da177e4
LT
1181{
1182 int err;
1183 struct socket *sock;
55737fda 1184 const struct net_proto_family *pf;
1da177e4
LT
1185
1186 /*
89bddce5 1187 * Check protocol is in range
1da177e4
LT
1188 */
1189 if (family < 0 || family >= NPROTO)
1190 return -EAFNOSUPPORT;
1191 if (type < 0 || type >= SOCK_MAX)
1192 return -EINVAL;
1193
1194 /* Compatibility.
1195
1196 This uglymoron is moved from INET layer to here to avoid
1197 deadlock in module load.
1198 */
1199 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1200 static int warned;
1da177e4
LT
1201 if (!warned) {
1202 warned = 1;
89bddce5
SH
1203 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1204 current->comm);
1da177e4
LT
1205 }
1206 family = PF_PACKET;
1207 }
1208
1209 err = security_socket_create(family, type, protocol, kern);
1210 if (err)
1211 return err;
89bddce5 1212
55737fda
SH
1213 /*
1214 * Allocate the socket and allow the family to set things up. if
1215 * the protocol is 0, the family is instructed to select an appropriate
1216 * default.
1217 */
1218 sock = sock_alloc();
1219 if (!sock) {
1220 if (net_ratelimit())
1221 printk(KERN_WARNING "socket: no more sockets\n");
1222 return -ENFILE; /* Not exactly a match, but its the
1223 closest posix thing */
1224 }
1225
1226 sock->type = type;
1227
95a5afca 1228#ifdef CONFIG_MODULES
89bddce5
SH
1229 /* Attempt to load a protocol module if the find failed.
1230 *
1231 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1232 * requested real, full-featured networking support upon configuration.
1233 * Otherwise module support will break!
1234 */
55737fda 1235 if (net_families[family] == NULL)
89bddce5 1236 request_module("net-pf-%d", family);
1da177e4
LT
1237#endif
1238
55737fda
SH
1239 rcu_read_lock();
1240 pf = rcu_dereference(net_families[family]);
1241 err = -EAFNOSUPPORT;
1242 if (!pf)
1243 goto out_release;
1da177e4
LT
1244
1245 /*
1246 * We will call the ->create function, that possibly is in a loadable
1247 * module, so we have to bump that loadable module refcnt first.
1248 */
55737fda 1249 if (!try_module_get(pf->owner))
1da177e4
LT
1250 goto out_release;
1251
55737fda
SH
1252 /* Now protected by module ref count */
1253 rcu_read_unlock();
1254
1b8d7ae4 1255 err = pf->create(net, sock, protocol);
55737fda 1256 if (err < 0)
1da177e4 1257 goto out_module_put;
a79af59e 1258
1da177e4
LT
1259 /*
1260 * Now to bump the refcnt of the [loadable] module that owns this
1261 * socket at sock_release time we decrement its refcnt.
1262 */
55737fda
SH
1263 if (!try_module_get(sock->ops->owner))
1264 goto out_module_busy;
1265
1da177e4
LT
1266 /*
1267 * Now that we're done with the ->create function, the [loadable]
1268 * module can have its refcnt decremented
1269 */
55737fda 1270 module_put(pf->owner);
7420ed23
VY
1271 err = security_socket_post_create(sock, family, type, protocol, kern);
1272 if (err)
3b185525 1273 goto out_sock_release;
55737fda 1274 *res = sock;
1da177e4 1275
55737fda
SH
1276 return 0;
1277
1278out_module_busy:
1279 err = -EAFNOSUPPORT;
1da177e4 1280out_module_put:
55737fda
SH
1281 sock->ops = NULL;
1282 module_put(pf->owner);
1283out_sock_release:
1da177e4 1284 sock_release(sock);
55737fda
SH
1285 return err;
1286
1287out_release:
1288 rcu_read_unlock();
1289 goto out_sock_release;
1da177e4
LT
1290}
1291
1292int sock_create(int family, int type, int protocol, struct socket **res)
1293{
1b8d7ae4 1294 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1295}
1296
1297int sock_create_kern(int family, int type, int protocol, struct socket **res)
1298{
1b8d7ae4 1299 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1300}
1301
3e0fa65f 1302SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1303{
1304 int retval;
1305 struct socket *sock;
a677a039
UD
1306 int flags;
1307
e38b36f3
UD
1308 /* Check the SOCK_* constants for consistency. */
1309 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1310 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1311 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1312 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1313
a677a039 1314 flags = type & ~SOCK_TYPE_MASK;
77d27200 1315 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1316 return -EINVAL;
1317 type &= SOCK_TYPE_MASK;
1da177e4 1318
aaca0bdc
UD
1319 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1320 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1321
1da177e4
LT
1322 retval = sock_create(family, type, protocol, &sock);
1323 if (retval < 0)
1324 goto out;
1325
77d27200 1326 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1327 if (retval < 0)
1328 goto out_release;
1329
1330out:
1331 /* It may be already another descriptor 8) Not kernel problem. */
1332 return retval;
1333
1334out_release:
1335 sock_release(sock);
1336 return retval;
1337}
1338
1339/*
1340 * Create a pair of connected sockets.
1341 */
1342
3e0fa65f
HC
1343SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1344 int __user *, usockvec)
1da177e4
LT
1345{
1346 struct socket *sock1, *sock2;
1347 int fd1, fd2, err;
db349509 1348 struct file *newfile1, *newfile2;
a677a039
UD
1349 int flags;
1350
1351 flags = type & ~SOCK_TYPE_MASK;
77d27200 1352 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1353 return -EINVAL;
1354 type &= SOCK_TYPE_MASK;
1da177e4 1355
aaca0bdc
UD
1356 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1357 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1358
1da177e4
LT
1359 /*
1360 * Obtain the first socket and check if the underlying protocol
1361 * supports the socketpair call.
1362 */
1363
1364 err = sock_create(family, type, protocol, &sock1);
1365 if (err < 0)
1366 goto out;
1367
1368 err = sock_create(family, type, protocol, &sock2);
1369 if (err < 0)
1370 goto out_release_1;
1371
1372 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1373 if (err < 0)
1da177e4
LT
1374 goto out_release_both;
1375
a677a039 1376 fd1 = sock_alloc_fd(&newfile1, flags & O_CLOEXEC);
bf3c23d1
DM
1377 if (unlikely(fd1 < 0)) {
1378 err = fd1;
db349509 1379 goto out_release_both;
bf3c23d1 1380 }
1da177e4 1381
a677a039 1382 fd2 = sock_alloc_fd(&newfile2, flags & O_CLOEXEC);
db349509 1383 if (unlikely(fd2 < 0)) {
bf3c23d1 1384 err = fd2;
db349509
AV
1385 put_filp(newfile1);
1386 put_unused_fd(fd1);
1da177e4 1387 goto out_release_both;
db349509 1388 }
1da177e4 1389
77d27200 1390 err = sock_attach_fd(sock1, newfile1, flags & O_NONBLOCK);
db349509
AV
1391 if (unlikely(err < 0)) {
1392 goto out_fd2;
1393 }
1394
77d27200 1395 err = sock_attach_fd(sock2, newfile2, flags & O_NONBLOCK);
db349509
AV
1396 if (unlikely(err < 0)) {
1397 fput(newfile1);
1398 goto out_fd1;
1399 }
1400
157cf649 1401 audit_fd_pair(fd1, fd2);
db349509
AV
1402 fd_install(fd1, newfile1);
1403 fd_install(fd2, newfile2);
1da177e4
LT
1404 /* fd1 and fd2 may be already another descriptors.
1405 * Not kernel problem.
1406 */
1407
89bddce5 1408 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1409 if (!err)
1410 err = put_user(fd2, &usockvec[1]);
1411 if (!err)
1412 return 0;
1413
1414 sys_close(fd2);
1415 sys_close(fd1);
1416 return err;
1417
1da177e4 1418out_release_both:
89bddce5 1419 sock_release(sock2);
1da177e4 1420out_release_1:
89bddce5 1421 sock_release(sock1);
1da177e4
LT
1422out:
1423 return err;
db349509
AV
1424
1425out_fd2:
1426 put_filp(newfile1);
1427 sock_release(sock1);
1428out_fd1:
1429 put_filp(newfile2);
1430 sock_release(sock2);
db349509
AV
1431 put_unused_fd(fd1);
1432 put_unused_fd(fd2);
1433 goto out;
1da177e4
LT
1434}
1435
1da177e4
LT
1436/*
1437 * Bind a name to a socket. Nothing much to do here since it's
1438 * the protocol's responsibility to handle the local address.
1439 *
1440 * We move the socket address to kernel space before we call
1441 * the protocol layer (having also checked the address is ok).
1442 */
1443
20f37034 1444SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1445{
1446 struct socket *sock;
230b1839 1447 struct sockaddr_storage address;
6cb153ca 1448 int err, fput_needed;
1da177e4 1449
89bddce5 1450 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1451 if (sock) {
230b1839 1452 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1453 if (err >= 0) {
1454 err = security_socket_bind(sock,
230b1839 1455 (struct sockaddr *)&address,
89bddce5 1456 addrlen);
6cb153ca
BL
1457 if (!err)
1458 err = sock->ops->bind(sock,
89bddce5 1459 (struct sockaddr *)
230b1839 1460 &address, addrlen);
1da177e4 1461 }
6cb153ca 1462 fput_light(sock->file, fput_needed);
89bddce5 1463 }
1da177e4
LT
1464 return err;
1465}
1466
1da177e4
LT
1467/*
1468 * Perform a listen. Basically, we allow the protocol to do anything
1469 * necessary for a listen, and if that works, we mark the socket as
1470 * ready for listening.
1471 */
1472
3e0fa65f 1473SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1474{
1475 struct socket *sock;
6cb153ca 1476 int err, fput_needed;
b8e1f9b5 1477 int somaxconn;
89bddce5
SH
1478
1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1480 if (sock) {
8efa6e93 1481 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1482 if ((unsigned)backlog > somaxconn)
1483 backlog = somaxconn;
1da177e4
LT
1484
1485 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1486 if (!err)
1487 err = sock->ops->listen(sock, backlog);
1da177e4 1488
6cb153ca 1489 fput_light(sock->file, fput_needed);
1da177e4
LT
1490 }
1491 return err;
1492}
1493
1da177e4
LT
1494/*
1495 * For accept, we attempt to create a new socket, set up the link
1496 * with the client, wake up the client, then return the new
1497 * connected fd. We collect the address of the connector in kernel
1498 * space and move it to user at the very end. This is unclean because
1499 * we open the socket then return an error.
1500 *
1501 * 1003.1g adds the ability to recvmsg() to query connection pending
1502 * status to recvmsg. We need to add that support in a way thats
1503 * clean when we restucture accept also.
1504 */
1505
20f37034
HC
1506SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1507 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1508{
1509 struct socket *sock, *newsock;
39d8c1b6 1510 struct file *newfile;
6cb153ca 1511 int err, len, newfd, fput_needed;
230b1839 1512 struct sockaddr_storage address;
1da177e4 1513
77d27200 1514 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1515 return -EINVAL;
1516
1517 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1518 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1519
6cb153ca 1520 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1521 if (!sock)
1522 goto out;
1523
1524 err = -ENFILE;
89bddce5 1525 if (!(newsock = sock_alloc()))
1da177e4
LT
1526 goto out_put;
1527
1528 newsock->type = sock->type;
1529 newsock->ops = sock->ops;
1530
1da177e4
LT
1531 /*
1532 * We don't need try_module_get here, as the listening socket (sock)
1533 * has the protocol module (sock->ops->owner) held.
1534 */
1535 __module_get(newsock->ops->owner);
1536
aaca0bdc 1537 newfd = sock_alloc_fd(&newfile, flags & O_CLOEXEC);
39d8c1b6
DM
1538 if (unlikely(newfd < 0)) {
1539 err = newfd;
9a1875e6
DM
1540 sock_release(newsock);
1541 goto out_put;
39d8c1b6
DM
1542 }
1543
77d27200 1544 err = sock_attach_fd(newsock, newfile, flags & O_NONBLOCK);
39d8c1b6 1545 if (err < 0)
79f4f642 1546 goto out_fd_simple;
39d8c1b6 1547
a79af59e
FF
1548 err = security_socket_accept(sock, newsock);
1549 if (err)
39d8c1b6 1550 goto out_fd;
a79af59e 1551
1da177e4
LT
1552 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1553 if (err < 0)
39d8c1b6 1554 goto out_fd;
1da177e4
LT
1555
1556 if (upeer_sockaddr) {
230b1839 1557 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1558 &len, 2) < 0) {
1da177e4 1559 err = -ECONNABORTED;
39d8c1b6 1560 goto out_fd;
1da177e4 1561 }
230b1839
YH
1562 err = move_addr_to_user((struct sockaddr *)&address,
1563 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1564 if (err < 0)
39d8c1b6 1565 goto out_fd;
1da177e4
LT
1566 }
1567
1568 /* File flags are not inherited via accept() unlike another OSes. */
1569
39d8c1b6
DM
1570 fd_install(newfd, newfile);
1571 err = newfd;
1da177e4 1572
1da177e4 1573out_put:
6cb153ca 1574 fput_light(sock->file, fput_needed);
1da177e4
LT
1575out:
1576 return err;
79f4f642
AD
1577out_fd_simple:
1578 sock_release(newsock);
1579 put_filp(newfile);
1580 put_unused_fd(newfd);
1581 goto out_put;
39d8c1b6 1582out_fd:
9606a216 1583 fput(newfile);
39d8c1b6 1584 put_unused_fd(newfd);
1da177e4
LT
1585 goto out_put;
1586}
1587
20f37034
HC
1588SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1589 int __user *, upeer_addrlen)
aaca0bdc 1590{
de11defe 1591 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1592}
1593
1da177e4
LT
1594/*
1595 * Attempt to connect to a socket with the server address. The address
1596 * is in user space so we verify it is OK and move it to kernel space.
1597 *
1598 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1599 * break bindings
1600 *
1601 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1602 * other SEQPACKET protocols that take time to connect() as it doesn't
1603 * include the -EINPROGRESS status for such sockets.
1604 */
1605
20f37034
HC
1606SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1607 int, addrlen)
1da177e4
LT
1608{
1609 struct socket *sock;
230b1839 1610 struct sockaddr_storage address;
6cb153ca 1611 int err, fput_needed;
1da177e4 1612
6cb153ca 1613 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1614 if (!sock)
1615 goto out;
230b1839 1616 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1617 if (err < 0)
1618 goto out_put;
1619
89bddce5 1620 err =
230b1839 1621 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1622 if (err)
1623 goto out_put;
1624
230b1839 1625 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1626 sock->file->f_flags);
1627out_put:
6cb153ca 1628 fput_light(sock->file, fput_needed);
1da177e4
LT
1629out:
1630 return err;
1631}
1632
1633/*
1634 * Get the local address ('name') of a socket object. Move the obtained
1635 * name to user space.
1636 */
1637
20f37034
HC
1638SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1639 int __user *, usockaddr_len)
1da177e4
LT
1640{
1641 struct socket *sock;
230b1839 1642 struct sockaddr_storage address;
6cb153ca 1643 int len, err, fput_needed;
89bddce5 1644
6cb153ca 1645 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1646 if (!sock)
1647 goto out;
1648
1649 err = security_socket_getsockname(sock);
1650 if (err)
1651 goto out_put;
1652
230b1839 1653 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1654 if (err)
1655 goto out_put;
230b1839 1656 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1657
1658out_put:
6cb153ca 1659 fput_light(sock->file, fput_needed);
1da177e4
LT
1660out:
1661 return err;
1662}
1663
1664/*
1665 * Get the remote address ('name') of a socket object. Move the obtained
1666 * name to user space.
1667 */
1668
20f37034
HC
1669SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1670 int __user *, usockaddr_len)
1da177e4
LT
1671{
1672 struct socket *sock;
230b1839 1673 struct sockaddr_storage address;
6cb153ca 1674 int len, err, fput_needed;
1da177e4 1675
89bddce5
SH
1676 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1677 if (sock != NULL) {
1da177e4
LT
1678 err = security_socket_getpeername(sock);
1679 if (err) {
6cb153ca 1680 fput_light(sock->file, fput_needed);
1da177e4
LT
1681 return err;
1682 }
1683
89bddce5 1684 err =
230b1839 1685 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1686 1);
1da177e4 1687 if (!err)
230b1839 1688 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1689 usockaddr_len);
6cb153ca 1690 fput_light(sock->file, fput_needed);
1da177e4
LT
1691 }
1692 return err;
1693}
1694
1695/*
1696 * Send a datagram to a given address. We move the address into kernel
1697 * space and check the user space data area is readable before invoking
1698 * the protocol.
1699 */
1700
3e0fa65f
HC
1701SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1702 unsigned, flags, struct sockaddr __user *, addr,
1703 int, addr_len)
1da177e4
LT
1704{
1705 struct socket *sock;
230b1839 1706 struct sockaddr_storage address;
1da177e4
LT
1707 int err;
1708 struct msghdr msg;
1709 struct iovec iov;
6cb153ca 1710 int fput_needed;
6cb153ca 1711
de0fa95c
PE
1712 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1713 if (!sock)
4387ff75 1714 goto out;
6cb153ca 1715
89bddce5
SH
1716 iov.iov_base = buff;
1717 iov.iov_len = len;
1718 msg.msg_name = NULL;
1719 msg.msg_iov = &iov;
1720 msg.msg_iovlen = 1;
1721 msg.msg_control = NULL;
1722 msg.msg_controllen = 0;
1723 msg.msg_namelen = 0;
6cb153ca 1724 if (addr) {
230b1839 1725 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1726 if (err < 0)
1727 goto out_put;
230b1839 1728 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1729 msg.msg_namelen = addr_len;
1da177e4
LT
1730 }
1731 if (sock->file->f_flags & O_NONBLOCK)
1732 flags |= MSG_DONTWAIT;
1733 msg.msg_flags = flags;
1734 err = sock_sendmsg(sock, &msg, len);
1735
89bddce5 1736out_put:
de0fa95c 1737 fput_light(sock->file, fput_needed);
4387ff75 1738out:
1da177e4
LT
1739 return err;
1740}
1741
1742/*
89bddce5 1743 * Send a datagram down a socket.
1da177e4
LT
1744 */
1745
3e0fa65f
HC
1746SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1747 unsigned, flags)
1da177e4
LT
1748{
1749 return sys_sendto(fd, buff, len, flags, NULL, 0);
1750}
1751
1752/*
89bddce5 1753 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1754 * sender. We verify the buffers are writable and if needed move the
1755 * sender address from kernel to user space.
1756 */
1757
3e0fa65f
HC
1758SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1759 unsigned, flags, struct sockaddr __user *, addr,
1760 int __user *, addr_len)
1da177e4
LT
1761{
1762 struct socket *sock;
1763 struct iovec iov;
1764 struct msghdr msg;
230b1839 1765 struct sockaddr_storage address;
89bddce5 1766 int err, err2;
6cb153ca
BL
1767 int fput_needed;
1768
de0fa95c 1769 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1770 if (!sock)
de0fa95c 1771 goto out;
1da177e4 1772
89bddce5
SH
1773 msg.msg_control = NULL;
1774 msg.msg_controllen = 0;
1775 msg.msg_iovlen = 1;
1776 msg.msg_iov = &iov;
1777 iov.iov_len = size;
1778 iov.iov_base = ubuf;
230b1839
YH
1779 msg.msg_name = (struct sockaddr *)&address;
1780 msg.msg_namelen = sizeof(address);
1da177e4
LT
1781 if (sock->file->f_flags & O_NONBLOCK)
1782 flags |= MSG_DONTWAIT;
89bddce5 1783 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1784
89bddce5 1785 if (err >= 0 && addr != NULL) {
230b1839
YH
1786 err2 = move_addr_to_user((struct sockaddr *)&address,
1787 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1788 if (err2 < 0)
1789 err = err2;
1da177e4 1790 }
de0fa95c
PE
1791
1792 fput_light(sock->file, fput_needed);
4387ff75 1793out:
1da177e4
LT
1794 return err;
1795}
1796
1797/*
89bddce5 1798 * Receive a datagram from a socket.
1da177e4
LT
1799 */
1800
89bddce5
SH
1801asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1802 unsigned flags)
1da177e4
LT
1803{
1804 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1805}
1806
1807/*
1808 * Set a socket option. Because we don't know the option lengths we have
1809 * to pass the user mode parameter for the protocols to sort out.
1810 */
1811
20f37034
HC
1812SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1813 char __user *, optval, int, optlen)
1da177e4 1814{
6cb153ca 1815 int err, fput_needed;
1da177e4
LT
1816 struct socket *sock;
1817
1818 if (optlen < 0)
1819 return -EINVAL;
89bddce5
SH
1820
1821 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1822 if (sock != NULL) {
1823 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1824 if (err)
1825 goto out_put;
1da177e4
LT
1826
1827 if (level == SOL_SOCKET)
89bddce5
SH
1828 err =
1829 sock_setsockopt(sock, level, optname, optval,
1830 optlen);
1da177e4 1831 else
89bddce5
SH
1832 err =
1833 sock->ops->setsockopt(sock, level, optname, optval,
1834 optlen);
6cb153ca
BL
1835out_put:
1836 fput_light(sock->file, fput_needed);
1da177e4
LT
1837 }
1838 return err;
1839}
1840
1841/*
1842 * Get a socket option. Because we don't know the option lengths we have
1843 * to pass a user mode parameter for the protocols to sort out.
1844 */
1845
20f37034
HC
1846SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1847 char __user *, optval, int __user *, optlen)
1da177e4 1848{
6cb153ca 1849 int err, fput_needed;
1da177e4
LT
1850 struct socket *sock;
1851
89bddce5
SH
1852 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1853 if (sock != NULL) {
6cb153ca
BL
1854 err = security_socket_getsockopt(sock, level, optname);
1855 if (err)
1856 goto out_put;
1da177e4
LT
1857
1858 if (level == SOL_SOCKET)
89bddce5
SH
1859 err =
1860 sock_getsockopt(sock, level, optname, optval,
1861 optlen);
1da177e4 1862 else
89bddce5
SH
1863 err =
1864 sock->ops->getsockopt(sock, level, optname, optval,
1865 optlen);
6cb153ca
BL
1866out_put:
1867 fput_light(sock->file, fput_needed);
1da177e4
LT
1868 }
1869 return err;
1870}
1871
1da177e4
LT
1872/*
1873 * Shutdown a socket.
1874 */
1875
754fe8d2 1876SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1877{
6cb153ca 1878 int err, fput_needed;
1da177e4
LT
1879 struct socket *sock;
1880
89bddce5
SH
1881 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1882 if (sock != NULL) {
1da177e4 1883 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1884 if (!err)
1885 err = sock->ops->shutdown(sock, how);
1886 fput_light(sock->file, fput_needed);
1da177e4
LT
1887 }
1888 return err;
1889}
1890
89bddce5 1891/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1892 * fields which are the same type (int / unsigned) on our platforms.
1893 */
1894#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1895#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1896#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1897
1da177e4
LT
1898/*
1899 * BSD sendmsg interface
1900 */
1901
3e0fa65f 1902SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1da177e4 1903{
89bddce5
SH
1904 struct compat_msghdr __user *msg_compat =
1905 (struct compat_msghdr __user *)msg;
1da177e4 1906 struct socket *sock;
230b1839 1907 struct sockaddr_storage address;
1da177e4 1908 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1909 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1910 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1911 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1912 unsigned char *ctl_buf = ctl;
1913 struct msghdr msg_sys;
1914 int err, ctl_len, iov_size, total_len;
6cb153ca 1915 int fput_needed;
89bddce5 1916
1da177e4
LT
1917 err = -EFAULT;
1918 if (MSG_CMSG_COMPAT & flags) {
1919 if (get_compat_msghdr(&msg_sys, msg_compat))
1920 return -EFAULT;
89bddce5
SH
1921 }
1922 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1923 return -EFAULT;
1924
6cb153ca 1925 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1926 if (!sock)
1da177e4
LT
1927 goto out;
1928
1929 /* do not move before msg_sys is valid */
1930 err = -EMSGSIZE;
1931 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1932 goto out_put;
1933
89bddce5 1934 /* Check whether to allocate the iovec area */
1da177e4
LT
1935 err = -ENOMEM;
1936 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1937 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1938 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1939 if (!iov)
1940 goto out_put;
1941 }
1942
1943 /* This will also move the address data into kernel space */
1944 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1945 err = verify_compat_iovec(&msg_sys, iov,
1946 (struct sockaddr *)&address,
1947 VERIFY_READ);
1da177e4 1948 } else
230b1839
YH
1949 err = verify_iovec(&msg_sys, iov,
1950 (struct sockaddr *)&address,
1951 VERIFY_READ);
89bddce5 1952 if (err < 0)
1da177e4
LT
1953 goto out_freeiov;
1954 total_len = err;
1955
1956 err = -ENOBUFS;
1957
1958 if (msg_sys.msg_controllen > INT_MAX)
1959 goto out_freeiov;
89bddce5 1960 ctl_len = msg_sys.msg_controllen;
1da177e4 1961 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1962 err =
1963 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1964 sizeof(ctl));
1da177e4
LT
1965 if (err)
1966 goto out_freeiov;
1967 ctl_buf = msg_sys.msg_control;
8920e8f9 1968 ctl_len = msg_sys.msg_controllen;
1da177e4 1969 } else if (ctl_len) {
89bddce5 1970 if (ctl_len > sizeof(ctl)) {
1da177e4 1971 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1972 if (ctl_buf == NULL)
1da177e4
LT
1973 goto out_freeiov;
1974 }
1975 err = -EFAULT;
1976 /*
1977 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1978 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1979 * checking falls down on this.
1980 */
89bddce5
SH
1981 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1982 ctl_len))
1da177e4
LT
1983 goto out_freectl;
1984 msg_sys.msg_control = ctl_buf;
1985 }
1986 msg_sys.msg_flags = flags;
1987
1988 if (sock->file->f_flags & O_NONBLOCK)
1989 msg_sys.msg_flags |= MSG_DONTWAIT;
1990 err = sock_sendmsg(sock, &msg_sys, total_len);
1991
1992out_freectl:
89bddce5 1993 if (ctl_buf != ctl)
1da177e4
LT
1994 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1995out_freeiov:
1996 if (iov != iovstack)
1997 sock_kfree_s(sock->sk, iov, iov_size);
1998out_put:
6cb153ca 1999 fput_light(sock->file, fput_needed);
89bddce5 2000out:
1da177e4
LT
2001 return err;
2002}
2003
a2e27255
ACM
2004static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
2005 struct msghdr *msg_sys, unsigned flags, int nosec)
1da177e4 2006{
89bddce5
SH
2007 struct compat_msghdr __user *msg_compat =
2008 (struct compat_msghdr __user *)msg;
1da177e4 2009 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2010 struct iovec *iov = iovstack;
1da177e4
LT
2011 unsigned long cmsg_ptr;
2012 int err, iov_size, total_len, len;
2013
2014 /* kernel mode address */
230b1839 2015 struct sockaddr_storage addr;
1da177e4
LT
2016
2017 /* user mode address pointers */
2018 struct sockaddr __user *uaddr;
2019 int __user *uaddr_len;
89bddce5 2020
1da177e4 2021 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2022 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2023 return -EFAULT;
89bddce5 2024 }
a2e27255 2025 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 2026 return -EFAULT;
1da177e4 2027
1da177e4 2028 err = -EMSGSIZE;
a2e27255
ACM
2029 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2030 goto out;
89bddce5
SH
2031
2032 /* Check whether to allocate the iovec area */
1da177e4 2033 err = -ENOMEM;
a2e27255
ACM
2034 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
2035 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
2036 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
2037 if (!iov)
a2e27255 2038 goto out;
1da177e4
LT
2039 }
2040
2041 /*
89bddce5
SH
2042 * Save the user-mode address (verify_iovec will change the
2043 * kernel msghdr to use the kernel address space)
1da177e4 2044 */
89bddce5 2045
a2e27255 2046 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
2047 uaddr_len = COMPAT_NAMELEN(msg);
2048 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2049 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
2050 (struct sockaddr *)&addr,
2051 VERIFY_WRITE);
1da177e4 2052 } else
a2e27255 2053 err = verify_iovec(msg_sys, iov,
230b1839
YH
2054 (struct sockaddr *)&addr,
2055 VERIFY_WRITE);
1da177e4
LT
2056 if (err < 0)
2057 goto out_freeiov;
89bddce5 2058 total_len = err;
1da177e4 2059
a2e27255
ACM
2060 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2061 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2062
1da177e4
LT
2063 if (sock->file->f_flags & O_NONBLOCK)
2064 flags |= MSG_DONTWAIT;
a2e27255
ACM
2065 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2066 total_len, flags);
1da177e4
LT
2067 if (err < 0)
2068 goto out_freeiov;
2069 len = err;
2070
2071 if (uaddr != NULL) {
230b1839 2072 err = move_addr_to_user((struct sockaddr *)&addr,
a2e27255 2073 msg_sys->msg_namelen, uaddr,
89bddce5 2074 uaddr_len);
1da177e4
LT
2075 if (err < 0)
2076 goto out_freeiov;
2077 }
a2e27255 2078 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2079 COMPAT_FLAGS(msg));
1da177e4
LT
2080 if (err)
2081 goto out_freeiov;
2082 if (MSG_CMSG_COMPAT & flags)
a2e27255 2083 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2084 &msg_compat->msg_controllen);
2085 else
a2e27255 2086 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2087 &msg->msg_controllen);
2088 if (err)
2089 goto out_freeiov;
2090 err = len;
2091
2092out_freeiov:
2093 if (iov != iovstack)
2094 sock_kfree_s(sock->sk, iov, iov_size);
a2e27255
ACM
2095out:
2096 return err;
2097}
2098
2099/*
2100 * BSD recvmsg interface
2101 */
2102
2103SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2104 unsigned int, flags)
2105{
2106 int fput_needed, err;
2107 struct msghdr msg_sys;
2108 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2109
2110 if (!sock)
2111 goto out;
2112
2113 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2114
6cb153ca 2115 fput_light(sock->file, fput_needed);
1da177e4
LT
2116out:
2117 return err;
2118}
2119
a2e27255
ACM
2120/*
2121 * Linux recvmmsg interface
2122 */
2123
2124int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2125 unsigned int flags, struct timespec *timeout)
2126{
2127 int fput_needed, err, datagrams;
2128 struct socket *sock;
2129 struct mmsghdr __user *entry;
2130 struct msghdr msg_sys;
2131 struct timespec end_time;
2132
2133 if (timeout &&
2134 poll_select_set_timeout(&end_time, timeout->tv_sec,
2135 timeout->tv_nsec))
2136 return -EINVAL;
2137
2138 datagrams = 0;
2139
2140 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2141 if (!sock)
2142 return err;
2143
2144 err = sock_error(sock->sk);
2145 if (err)
2146 goto out_put;
2147
2148 entry = mmsg;
2149
2150 while (datagrams < vlen) {
2151 /*
2152 * No need to ask LSM for more than the first datagram.
2153 */
2154 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2155 &msg_sys, flags, datagrams);
2156 if (err < 0)
2157 break;
2158 err = put_user(err, &entry->msg_len);
2159 if (err)
2160 break;
2161 ++entry;
2162 ++datagrams;
2163
2164 if (timeout) {
2165 ktime_get_ts(timeout);
2166 *timeout = timespec_sub(end_time, *timeout);
2167 if (timeout->tv_sec < 0) {
2168 timeout->tv_sec = timeout->tv_nsec = 0;
2169 break;
2170 }
2171
2172 /* Timeout, return less than vlen datagrams */
2173 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2174 break;
2175 }
2176
2177 /* Out of band data, return right away */
2178 if (msg_sys.msg_flags & MSG_OOB)
2179 break;
2180 }
2181
2182out_put:
2183 fput_light(sock->file, fput_needed);
1da177e4 2184
a2e27255
ACM
2185 if (err == 0)
2186 return datagrams;
2187
2188 if (datagrams != 0) {
2189 /*
2190 * We may return less entries than requested (vlen) if the
2191 * sock is non block and there aren't enough datagrams...
2192 */
2193 if (err != -EAGAIN) {
2194 /*
2195 * ... or if recvmsg returns an error after we
2196 * received some datagrams, where we record the
2197 * error to return on the next call or if the
2198 * app asks about it using getsockopt(SO_ERROR).
2199 */
2200 sock->sk->sk_err = -err;
2201 }
2202
2203 return datagrams;
2204 }
2205
2206 return err;
2207}
2208
2209SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2210 unsigned int, vlen, unsigned int, flags,
2211 struct timespec __user *, timeout)
2212{
2213 int datagrams;
2214 struct timespec timeout_sys;
2215
2216 if (!timeout)
2217 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2218
2219 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2220 return -EFAULT;
2221
2222 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2223
2224 if (datagrams > 0 &&
2225 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2226 datagrams = -EFAULT;
2227
2228 return datagrams;
2229}
2230
2231#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2232/* Argument list sizes for sys_socketcall */
2233#define AL(x) ((x) * sizeof(unsigned long))
a2e27255 2234static const unsigned char nargs[20] = {
89bddce5
SH
2235 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
2236 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
aaca0bdc 2237 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
a2e27255 2238 AL(4),AL(5)
89bddce5
SH
2239};
2240
1da177e4
LT
2241#undef AL
2242
2243/*
89bddce5 2244 * System call vectors.
1da177e4
LT
2245 *
2246 * Argument checking cleaned up. Saved 20% in size.
2247 * This function doesn't need to set the kernel lock because
89bddce5 2248 * it is set by the callees.
1da177e4
LT
2249 */
2250
3e0fa65f 2251SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2252{
2253 unsigned long a[6];
89bddce5 2254 unsigned long a0, a1;
1da177e4 2255 int err;
47379052 2256 unsigned int len;
1da177e4 2257
a2e27255 2258 if (call < 1 || call > SYS_RECVMMSG)
1da177e4
LT
2259 return -EINVAL;
2260
47379052
AV
2261 len = nargs[call];
2262 if (len > sizeof(a))
2263 return -EINVAL;
2264
1da177e4 2265 /* copy_from_user should be SMP safe. */
47379052 2266 if (copy_from_user(a, args, len))
1da177e4 2267 return -EFAULT;
3ec3b2fb 2268
f3298dc4 2269 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2270
89bddce5
SH
2271 a0 = a[0];
2272 a1 = a[1];
2273
2274 switch (call) {
2275 case SYS_SOCKET:
2276 err = sys_socket(a0, a1, a[2]);
2277 break;
2278 case SYS_BIND:
2279 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2280 break;
2281 case SYS_CONNECT:
2282 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2283 break;
2284 case SYS_LISTEN:
2285 err = sys_listen(a0, a1);
2286 break;
2287 case SYS_ACCEPT:
de11defe
UD
2288 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2289 (int __user *)a[2], 0);
89bddce5
SH
2290 break;
2291 case SYS_GETSOCKNAME:
2292 err =
2293 sys_getsockname(a0, (struct sockaddr __user *)a1,
2294 (int __user *)a[2]);
2295 break;
2296 case SYS_GETPEERNAME:
2297 err =
2298 sys_getpeername(a0, (struct sockaddr __user *)a1,
2299 (int __user *)a[2]);
2300 break;
2301 case SYS_SOCKETPAIR:
2302 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2303 break;
2304 case SYS_SEND:
2305 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2306 break;
2307 case SYS_SENDTO:
2308 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2309 (struct sockaddr __user *)a[4], a[5]);
2310 break;
2311 case SYS_RECV:
2312 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2313 break;
2314 case SYS_RECVFROM:
2315 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2316 (struct sockaddr __user *)a[4],
2317 (int __user *)a[5]);
2318 break;
2319 case SYS_SHUTDOWN:
2320 err = sys_shutdown(a0, a1);
2321 break;
2322 case SYS_SETSOCKOPT:
2323 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2324 break;
2325 case SYS_GETSOCKOPT:
2326 err =
2327 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2328 (int __user *)a[4]);
2329 break;
2330 case SYS_SENDMSG:
2331 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2332 break;
2333 case SYS_RECVMSG:
2334 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2335 break;
a2e27255
ACM
2336 case SYS_RECVMMSG:
2337 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2338 (struct timespec __user *)a[4]);
2339 break;
de11defe
UD
2340 case SYS_ACCEPT4:
2341 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2342 (int __user *)a[2], a[3]);
aaca0bdc 2343 break;
89bddce5
SH
2344 default:
2345 err = -EINVAL;
2346 break;
1da177e4
LT
2347 }
2348 return err;
2349}
2350
89bddce5 2351#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2352
55737fda
SH
2353/**
2354 * sock_register - add a socket protocol handler
2355 * @ops: description of protocol
2356 *
1da177e4
LT
2357 * This function is called by a protocol handler that wants to
2358 * advertise its address family, and have it linked into the
55737fda
SH
2359 * socket interface. The value ops->family coresponds to the
2360 * socket system call protocol family.
1da177e4 2361 */
f0fd27d4 2362int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2363{
2364 int err;
2365
2366 if (ops->family >= NPROTO) {
89bddce5
SH
2367 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2368 NPROTO);
1da177e4
LT
2369 return -ENOBUFS;
2370 }
55737fda
SH
2371
2372 spin_lock(&net_family_lock);
2373 if (net_families[ops->family])
2374 err = -EEXIST;
2375 else {
89bddce5 2376 net_families[ops->family] = ops;
1da177e4
LT
2377 err = 0;
2378 }
55737fda
SH
2379 spin_unlock(&net_family_lock);
2380
89bddce5 2381 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2382 return err;
2383}
2384
55737fda
SH
2385/**
2386 * sock_unregister - remove a protocol handler
2387 * @family: protocol family to remove
2388 *
1da177e4
LT
2389 * This function is called by a protocol handler that wants to
2390 * remove its address family, and have it unlinked from the
55737fda
SH
2391 * new socket creation.
2392 *
2393 * If protocol handler is a module, then it can use module reference
2394 * counts to protect against new references. If protocol handler is not
2395 * a module then it needs to provide its own protection in
2396 * the ops->create routine.
1da177e4 2397 */
f0fd27d4 2398void sock_unregister(int family)
1da177e4 2399{
f0fd27d4 2400 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2401
55737fda 2402 spin_lock(&net_family_lock);
89bddce5 2403 net_families[family] = NULL;
55737fda
SH
2404 spin_unlock(&net_family_lock);
2405
2406 synchronize_rcu();
2407
89bddce5 2408 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2409}
2410
77d76ea3 2411static int __init sock_init(void)
1da177e4
LT
2412{
2413 /*
89bddce5 2414 * Initialize sock SLAB cache.
1da177e4 2415 */
89bddce5 2416
1da177e4
LT
2417 sk_init();
2418
1da177e4 2419 /*
89bddce5 2420 * Initialize skbuff SLAB cache
1da177e4
LT
2421 */
2422 skb_init();
1da177e4
LT
2423
2424 /*
89bddce5 2425 * Initialize the protocols module.
1da177e4
LT
2426 */
2427
2428 init_inodecache();
2429 register_filesystem(&sock_fs_type);
2430 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2431
2432 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2433 */
2434
2435#ifdef CONFIG_NETFILTER
2436 netfilter_init();
2437#endif
cbeb321a
DM
2438
2439 return 0;
1da177e4
LT
2440}
2441
77d76ea3
AK
2442core_initcall(sock_init); /* early initcall */
2443
1da177e4
LT
2444#ifdef CONFIG_PROC_FS
2445void socket_seq_show(struct seq_file *seq)
2446{
2447 int cpu;
2448 int counter = 0;
2449
6f912042 2450 for_each_possible_cpu(cpu)
89bddce5 2451 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2452
2453 /* It can be negative, by the way. 8) */
2454 if (counter < 0)
2455 counter = 0;
2456
2457 seq_printf(seq, "sockets: used %d\n", counter);
2458}
89bddce5 2459#endif /* CONFIG_PROC_FS */
1da177e4 2460
89bbfc95
SP
2461#ifdef CONFIG_COMPAT
2462static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2463 unsigned long arg)
89bbfc95
SP
2464{
2465 struct socket *sock = file->private_data;
2466 int ret = -ENOIOCTLCMD;
87de87d5
DM
2467 struct sock *sk;
2468 struct net *net;
2469
2470 sk = sock->sk;
2471 net = sock_net(sk);
89bbfc95
SP
2472
2473 if (sock->ops->compat_ioctl)
2474 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2475
87de87d5
DM
2476 if (ret == -ENOIOCTLCMD &&
2477 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2478 ret = compat_wext_handle_ioctl(net, cmd, arg);
2479
89bbfc95
SP
2480 return ret;
2481}
2482#endif
2483
ac5a488e
SS
2484int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2485{
2486 return sock->ops->bind(sock, addr, addrlen);
2487}
2488
2489int kernel_listen(struct socket *sock, int backlog)
2490{
2491 return sock->ops->listen(sock, backlog);
2492}
2493
2494int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2495{
2496 struct sock *sk = sock->sk;
2497 int err;
2498
2499 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2500 newsock);
2501 if (err < 0)
2502 goto done;
2503
2504 err = sock->ops->accept(sock, *newsock, flags);
2505 if (err < 0) {
2506 sock_release(*newsock);
fa8705b0 2507 *newsock = NULL;
ac5a488e
SS
2508 goto done;
2509 }
2510
2511 (*newsock)->ops = sock->ops;
1b08534e 2512 __module_get((*newsock)->ops->owner);
ac5a488e
SS
2513
2514done:
2515 return err;
2516}
2517
2518int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2519 int flags)
ac5a488e
SS
2520{
2521 return sock->ops->connect(sock, addr, addrlen, flags);
2522}
2523
2524int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2525 int *addrlen)
2526{
2527 return sock->ops->getname(sock, addr, addrlen, 0);
2528}
2529
2530int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2531 int *addrlen)
2532{
2533 return sock->ops->getname(sock, addr, addrlen, 1);
2534}
2535
2536int kernel_getsockopt(struct socket *sock, int level, int optname,
2537 char *optval, int *optlen)
2538{
2539 mm_segment_t oldfs = get_fs();
2540 int err;
2541
2542 set_fs(KERNEL_DS);
2543 if (level == SOL_SOCKET)
2544 err = sock_getsockopt(sock, level, optname, optval, optlen);
2545 else
2546 err = sock->ops->getsockopt(sock, level, optname, optval,
2547 optlen);
2548 set_fs(oldfs);
2549 return err;
2550}
2551
2552int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 2553 char *optval, unsigned int optlen)
ac5a488e
SS
2554{
2555 mm_segment_t oldfs = get_fs();
2556 int err;
2557
2558 set_fs(KERNEL_DS);
2559 if (level == SOL_SOCKET)
2560 err = sock_setsockopt(sock, level, optname, optval, optlen);
2561 else
2562 err = sock->ops->setsockopt(sock, level, optname, optval,
2563 optlen);
2564 set_fs(oldfs);
2565 return err;
2566}
2567
2568int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2569 size_t size, int flags)
2570{
2571 if (sock->ops->sendpage)
2572 return sock->ops->sendpage(sock, page, offset, size, flags);
2573
2574 return sock_no_sendpage(sock, page, offset, size, flags);
2575}
2576
2577int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2578{
2579 mm_segment_t oldfs = get_fs();
2580 int err;
2581
2582 set_fs(KERNEL_DS);
2583 err = sock->ops->ioctl(sock, cmd, arg);
2584 set_fs(oldfs);
2585
2586 return err;
2587}
2588
91cf45f0
TM
2589int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
2590{
2591 return sock->ops->shutdown(sock, how);
2592}
2593
1da177e4
LT
2594EXPORT_SYMBOL(sock_create);
2595EXPORT_SYMBOL(sock_create_kern);
2596EXPORT_SYMBOL(sock_create_lite);
2597EXPORT_SYMBOL(sock_map_fd);
2598EXPORT_SYMBOL(sock_recvmsg);
2599EXPORT_SYMBOL(sock_register);
2600EXPORT_SYMBOL(sock_release);
2601EXPORT_SYMBOL(sock_sendmsg);
2602EXPORT_SYMBOL(sock_unregister);
2603EXPORT_SYMBOL(sock_wake_async);
2604EXPORT_SYMBOL(sockfd_lookup);
2605EXPORT_SYMBOL(kernel_sendmsg);
2606EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2607EXPORT_SYMBOL(kernel_bind);
2608EXPORT_SYMBOL(kernel_listen);
2609EXPORT_SYMBOL(kernel_accept);
2610EXPORT_SYMBOL(kernel_connect);
2611EXPORT_SYMBOL(kernel_getsockname);
2612EXPORT_SYMBOL(kernel_getpeername);
2613EXPORT_SYMBOL(kernel_getsockopt);
2614EXPORT_SYMBOL(kernel_setsockopt);
2615EXPORT_SYMBOL(kernel_sendpage);
2616EXPORT_SYMBOL(kernel_sock_ioctl);
91cf45f0 2617EXPORT_SYMBOL(kernel_sock_shutdown);