[PATCH] splice: page stealing needs to wait_on_page_writeback()
[GitHub/exynos8895/android_kernel_samsung_universal8895.git] / fs / pipe.c
CommitLineData
1da177e4
LT
1/*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7#include <linux/mm.h>
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/slab.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pipe_fs_i.h>
16#include <linux/uio.h>
17#include <linux/highmem.h>
5274f052 18#include <linux/pagemap.h>
1da177e4
LT
19
20#include <asm/uaccess.h>
21#include <asm/ioctls.h>
22
23/*
24 * We use a start+len construction, which provides full use of the
25 * allocated memory.
26 * -- Florian Coosmann (FGC)
27 *
28 * Reads with count = 0 should always return 0.
29 * -- Julian Bradfield 1999-06-07.
30 *
31 * FIFOs and Pipes now generate SIGIO for both readers and writers.
32 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33 *
34 * pipe_read & write cleanup
35 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36 */
37
38/* Drop the inode semaphore and wait for a pipe event, atomically */
39void pipe_wait(struct inode * inode)
40{
41 DEFINE_WAIT(wait);
42
d79fc0fc
IM
43 /*
44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait:
46 */
47 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
1b1dcc1b 48 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
49 schedule();
50 finish_wait(PIPE_WAIT(*inode), &wait);
1b1dcc1b 51 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
52}
53
858119e1 54static int
1da177e4
LT
55pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
56{
57 unsigned long copy;
58
59 while (len > 0) {
60 while (!iov->iov_len)
61 iov++;
62 copy = min_t(unsigned long, len, iov->iov_len);
63
64 if (copy_from_user(to, iov->iov_base, copy))
65 return -EFAULT;
66 to += copy;
67 len -= copy;
68 iov->iov_base += copy;
69 iov->iov_len -= copy;
70 }
71 return 0;
72}
73
858119e1 74static int
1da177e4
LT
75pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
76{
77 unsigned long copy;
78
79 while (len > 0) {
80 while (!iov->iov_len)
81 iov++;
82 copy = min_t(unsigned long, len, iov->iov_len);
83
84 if (copy_to_user(iov->iov_base, from, copy))
85 return -EFAULT;
86 from += copy;
87 len -= copy;
88 iov->iov_base += copy;
89 iov->iov_len -= copy;
90 }
91 return 0;
92}
93
94static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
95{
96 struct page *page = buf->page;
97
5274f052
JA
98 /*
99 * If nobody else uses this page, and we don't already have a
100 * temporary page, let's keep track of it as a one-deep
101 * allocation cache
102 */
103 if (page_count(page) == 1 && !info->tmp_page) {
104 info->tmp_page = page;
1da177e4
LT
105 return;
106 }
5274f052
JA
107
108 /*
109 * Otherwise just release our reference to it
110 */
111 page_cache_release(page);
1da177e4
LT
112}
113
114static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
115{
116 return kmap(buf->page);
117}
118
119static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
120{
121 kunmap(buf->page);
122}
123
5abc97aa
JA
124static int anon_pipe_buf_steal(struct pipe_inode_info *info,
125 struct pipe_buffer *buf)
126{
5abc97aa
JA
127 return 0;
128}
129
1da177e4
LT
130static struct pipe_buf_operations anon_pipe_buf_ops = {
131 .can_merge = 1,
132 .map = anon_pipe_buf_map,
133 .unmap = anon_pipe_buf_unmap,
134 .release = anon_pipe_buf_release,
5abc97aa 135 .steal = anon_pipe_buf_steal,
1da177e4
LT
136};
137
138static ssize_t
139pipe_readv(struct file *filp, const struct iovec *_iov,
140 unsigned long nr_segs, loff_t *ppos)
141{
142 struct inode *inode = filp->f_dentry->d_inode;
143 struct pipe_inode_info *info;
144 int do_wakeup;
145 ssize_t ret;
146 struct iovec *iov = (struct iovec *)_iov;
147 size_t total_len;
148
149 total_len = iov_length(iov, nr_segs);
150 /* Null read succeeds. */
151 if (unlikely(total_len == 0))
152 return 0;
153
154 do_wakeup = 0;
155 ret = 0;
1b1dcc1b 156 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
157 info = inode->i_pipe;
158 for (;;) {
159 int bufs = info->nrbufs;
160 if (bufs) {
161 int curbuf = info->curbuf;
162 struct pipe_buffer *buf = info->bufs + curbuf;
163 struct pipe_buf_operations *ops = buf->ops;
164 void *addr;
165 size_t chars = buf->len;
166 int error;
167
168 if (chars > total_len)
169 chars = total_len;
170
171 addr = ops->map(filp, info, buf);
5274f052
JA
172 if (IS_ERR(addr)) {
173 if (!ret)
174 ret = PTR_ERR(addr);
175 break;
176 }
1da177e4
LT
177 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
178 ops->unmap(info, buf);
179 if (unlikely(error)) {
180 if (!ret) ret = -EFAULT;
181 break;
182 }
183 ret += chars;
184 buf->offset += chars;
185 buf->len -= chars;
186 if (!buf->len) {
187 buf->ops = NULL;
188 ops->release(info, buf);
189 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
190 info->curbuf = curbuf;
191 info->nrbufs = --bufs;
192 do_wakeup = 1;
193 }
194 total_len -= chars;
195 if (!total_len)
196 break; /* common path: read succeeded */
197 }
198 if (bufs) /* More to do? */
199 continue;
200 if (!PIPE_WRITERS(*inode))
201 break;
202 if (!PIPE_WAITING_WRITERS(*inode)) {
203 /* syscall merging: Usually we must not sleep
204 * if O_NONBLOCK is set, or if we got some data.
205 * But if a writer sleeps in kernel space, then
206 * we can wait for that data without violating POSIX.
207 */
208 if (ret)
209 break;
210 if (filp->f_flags & O_NONBLOCK) {
211 ret = -EAGAIN;
212 break;
213 }
214 }
215 if (signal_pending(current)) {
216 if (!ret) ret = -ERESTARTSYS;
217 break;
218 }
219 if (do_wakeup) {
220 wake_up_interruptible_sync(PIPE_WAIT(*inode));
221 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
222 }
223 pipe_wait(inode);
224 }
1b1dcc1b 225 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
226 /* Signal writers asynchronously that there is more room. */
227 if (do_wakeup) {
228 wake_up_interruptible(PIPE_WAIT(*inode));
229 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
230 }
231 if (ret > 0)
232 file_accessed(filp);
233 return ret;
234}
235
236static ssize_t
237pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
238{
239 struct iovec iov = { .iov_base = buf, .iov_len = count };
240 return pipe_readv(filp, &iov, 1, ppos);
241}
242
243static ssize_t
244pipe_writev(struct file *filp, const struct iovec *_iov,
245 unsigned long nr_segs, loff_t *ppos)
246{
247 struct inode *inode = filp->f_dentry->d_inode;
248 struct pipe_inode_info *info;
249 ssize_t ret;
250 int do_wakeup;
251 struct iovec *iov = (struct iovec *)_iov;
252 size_t total_len;
253 ssize_t chars;
254
255 total_len = iov_length(iov, nr_segs);
256 /* Null write succeeds. */
257 if (unlikely(total_len == 0))
258 return 0;
259
260 do_wakeup = 0;
261 ret = 0;
1b1dcc1b 262 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
263 info = inode->i_pipe;
264
265 if (!PIPE_READERS(*inode)) {
266 send_sig(SIGPIPE, current, 0);
267 ret = -EPIPE;
268 goto out;
269 }
270
271 /* We try to merge small writes */
272 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
273 if (info->nrbufs && chars != 0) {
274 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
275 struct pipe_buffer *buf = info->bufs + lastbuf;
276 struct pipe_buf_operations *ops = buf->ops;
277 int offset = buf->offset + buf->len;
278 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
5274f052
JA
279 void *addr;
280 int error;
281
282 addr = ops->map(filp, info, buf);
283 if (IS_ERR(addr)) {
284 error = PTR_ERR(addr);
285 goto out;
286 }
287 error = pipe_iov_copy_from_user(offset + addr, iov,
288 chars);
1da177e4
LT
289 ops->unmap(info, buf);
290 ret = error;
291 do_wakeup = 1;
292 if (error)
293 goto out;
294 buf->len += chars;
295 total_len -= chars;
296 ret = chars;
297 if (!total_len)
298 goto out;
299 }
300 }
301
302 for (;;) {
303 int bufs;
304 if (!PIPE_READERS(*inode)) {
305 send_sig(SIGPIPE, current, 0);
306 if (!ret) ret = -EPIPE;
307 break;
308 }
309 bufs = info->nrbufs;
310 if (bufs < PIPE_BUFFERS) {
311 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
312 struct pipe_buffer *buf = info->bufs + newbuf;
313 struct page *page = info->tmp_page;
314 int error;
315
316 if (!page) {
317 page = alloc_page(GFP_HIGHUSER);
318 if (unlikely(!page)) {
319 ret = ret ? : -ENOMEM;
320 break;
321 }
322 info->tmp_page = page;
323 }
324 /* Always wakeup, even if the copy fails. Otherwise
325 * we lock up (O_NONBLOCK-)readers that sleep due to
326 * syscall merging.
327 * FIXME! Is this really true?
328 */
329 do_wakeup = 1;
330 chars = PAGE_SIZE;
331 if (chars > total_len)
332 chars = total_len;
333
334 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
335 kunmap(page);
336 if (unlikely(error)) {
337 if (!ret) ret = -EFAULT;
338 break;
339 }
340 ret += chars;
341
342 /* Insert it into the buffer array */
343 buf->page = page;
344 buf->ops = &anon_pipe_buf_ops;
345 buf->offset = 0;
346 buf->len = chars;
347 info->nrbufs = ++bufs;
348 info->tmp_page = NULL;
349
350 total_len -= chars;
351 if (!total_len)
352 break;
353 }
354 if (bufs < PIPE_BUFFERS)
355 continue;
356 if (filp->f_flags & O_NONBLOCK) {
357 if (!ret) ret = -EAGAIN;
358 break;
359 }
360 if (signal_pending(current)) {
361 if (!ret) ret = -ERESTARTSYS;
362 break;
363 }
364 if (do_wakeup) {
365 wake_up_interruptible_sync(PIPE_WAIT(*inode));
366 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
367 do_wakeup = 0;
368 }
369 PIPE_WAITING_WRITERS(*inode)++;
370 pipe_wait(inode);
371 PIPE_WAITING_WRITERS(*inode)--;
372 }
373out:
1b1dcc1b 374 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
375 if (do_wakeup) {
376 wake_up_interruptible(PIPE_WAIT(*inode));
377 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
378 }
379 if (ret > 0)
870f4817 380 file_update_time(filp);
1da177e4
LT
381 return ret;
382}
383
384static ssize_t
385pipe_write(struct file *filp, const char __user *buf,
386 size_t count, loff_t *ppos)
387{
388 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
389 return pipe_writev(filp, &iov, 1, ppos);
390}
391
392static ssize_t
393bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
394{
395 return -EBADF;
396}
397
398static ssize_t
399bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
400{
401 return -EBADF;
402}
403
404static int
405pipe_ioctl(struct inode *pino, struct file *filp,
406 unsigned int cmd, unsigned long arg)
407{
408 struct inode *inode = filp->f_dentry->d_inode;
409 struct pipe_inode_info *info;
410 int count, buf, nrbufs;
411
412 switch (cmd) {
413 case FIONREAD:
1b1dcc1b 414 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
415 info = inode->i_pipe;
416 count = 0;
417 buf = info->curbuf;
418 nrbufs = info->nrbufs;
419 while (--nrbufs >= 0) {
420 count += info->bufs[buf].len;
421 buf = (buf+1) & (PIPE_BUFFERS-1);
422 }
1b1dcc1b 423 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
424 return put_user(count, (int __user *)arg);
425 default:
426 return -EINVAL;
427 }
428}
429
430/* No kernel lock held - fine */
431static unsigned int
432pipe_poll(struct file *filp, poll_table *wait)
433{
434 unsigned int mask;
435 struct inode *inode = filp->f_dentry->d_inode;
436 struct pipe_inode_info *info = inode->i_pipe;
437 int nrbufs;
438
439 poll_wait(filp, PIPE_WAIT(*inode), wait);
440
441 /* Reading only -- no need for acquiring the semaphore. */
442 nrbufs = info->nrbufs;
443 mask = 0;
444 if (filp->f_mode & FMODE_READ) {
445 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
446 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
447 mask |= POLLHUP;
448 }
449
450 if (filp->f_mode & FMODE_WRITE) {
451 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
5e5d7a22
PE
452 /*
453 * Most Unices do not set POLLERR for FIFOs but on Linux they
454 * behave exactly like pipes for poll().
455 */
1da177e4
LT
456 if (!PIPE_READERS(*inode))
457 mask |= POLLERR;
458 }
459
460 return mask;
461}
462
1da177e4
LT
463static int
464pipe_release(struct inode *inode, int decr, int decw)
465{
1b1dcc1b 466 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
467 PIPE_READERS(*inode) -= decr;
468 PIPE_WRITERS(*inode) -= decw;
469 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
470 free_pipe_info(inode);
471 } else {
472 wake_up_interruptible(PIPE_WAIT(*inode));
473 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
474 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
475 }
1b1dcc1b 476 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
477
478 return 0;
479}
480
481static int
482pipe_read_fasync(int fd, struct file *filp, int on)
483{
484 struct inode *inode = filp->f_dentry->d_inode;
485 int retval;
486
1b1dcc1b 487 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 488 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
1b1dcc1b 489 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
490
491 if (retval < 0)
492 return retval;
493
494 return 0;
495}
496
497
498static int
499pipe_write_fasync(int fd, struct file *filp, int on)
500{
501 struct inode *inode = filp->f_dentry->d_inode;
502 int retval;
503
1b1dcc1b 504 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 505 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
1b1dcc1b 506 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
507
508 if (retval < 0)
509 return retval;
510
511 return 0;
512}
513
514
515static int
516pipe_rdwr_fasync(int fd, struct file *filp, int on)
517{
518 struct inode *inode = filp->f_dentry->d_inode;
519 int retval;
520
1b1dcc1b 521 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
522
523 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
524
525 if (retval >= 0)
526 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
527
1b1dcc1b 528 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
529
530 if (retval < 0)
531 return retval;
532
533 return 0;
534}
535
536
537static int
538pipe_read_release(struct inode *inode, struct file *filp)
539{
540 pipe_read_fasync(-1, filp, 0);
541 return pipe_release(inode, 1, 0);
542}
543
544static int
545pipe_write_release(struct inode *inode, struct file *filp)
546{
547 pipe_write_fasync(-1, filp, 0);
548 return pipe_release(inode, 0, 1);
549}
550
551static int
552pipe_rdwr_release(struct inode *inode, struct file *filp)
553{
554 int decr, decw;
555
556 pipe_rdwr_fasync(-1, filp, 0);
557 decr = (filp->f_mode & FMODE_READ) != 0;
558 decw = (filp->f_mode & FMODE_WRITE) != 0;
559 return pipe_release(inode, decr, decw);
560}
561
562static int
563pipe_read_open(struct inode *inode, struct file *filp)
564{
565 /* We could have perhaps used atomic_t, but this and friends
566 below are the only places. So it doesn't seem worthwhile. */
1b1dcc1b 567 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 568 PIPE_READERS(*inode)++;
1b1dcc1b 569 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
570
571 return 0;
572}
573
574static int
575pipe_write_open(struct inode *inode, struct file *filp)
576{
1b1dcc1b 577 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 578 PIPE_WRITERS(*inode)++;
1b1dcc1b 579 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
580
581 return 0;
582}
583
584static int
585pipe_rdwr_open(struct inode *inode, struct file *filp)
586{
1b1dcc1b 587 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
588 if (filp->f_mode & FMODE_READ)
589 PIPE_READERS(*inode)++;
590 if (filp->f_mode & FMODE_WRITE)
591 PIPE_WRITERS(*inode)++;
1b1dcc1b 592 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
593
594 return 0;
595}
596
597/*
598 * The file_operations structs are not static because they
599 * are also used in linux/fs/fifo.c to do operations on FIFOs.
600 */
4b6f5d20 601const struct file_operations read_fifo_fops = {
1da177e4
LT
602 .llseek = no_llseek,
603 .read = pipe_read,
604 .readv = pipe_readv,
605 .write = bad_pipe_w,
5e5d7a22 606 .poll = pipe_poll,
1da177e4
LT
607 .ioctl = pipe_ioctl,
608 .open = pipe_read_open,
609 .release = pipe_read_release,
610 .fasync = pipe_read_fasync,
611};
612
4b6f5d20 613const struct file_operations write_fifo_fops = {
1da177e4
LT
614 .llseek = no_llseek,
615 .read = bad_pipe_r,
616 .write = pipe_write,
617 .writev = pipe_writev,
5e5d7a22 618 .poll = pipe_poll,
1da177e4
LT
619 .ioctl = pipe_ioctl,
620 .open = pipe_write_open,
621 .release = pipe_write_release,
622 .fasync = pipe_write_fasync,
623};
624
4b6f5d20 625const struct file_operations rdwr_fifo_fops = {
1da177e4
LT
626 .llseek = no_llseek,
627 .read = pipe_read,
628 .readv = pipe_readv,
629 .write = pipe_write,
630 .writev = pipe_writev,
5e5d7a22 631 .poll = pipe_poll,
1da177e4
LT
632 .ioctl = pipe_ioctl,
633 .open = pipe_rdwr_open,
634 .release = pipe_rdwr_release,
635 .fasync = pipe_rdwr_fasync,
636};
637
a19cbd4b 638static struct file_operations read_pipe_fops = {
1da177e4
LT
639 .llseek = no_llseek,
640 .read = pipe_read,
641 .readv = pipe_readv,
642 .write = bad_pipe_w,
643 .poll = pipe_poll,
644 .ioctl = pipe_ioctl,
645 .open = pipe_read_open,
646 .release = pipe_read_release,
647 .fasync = pipe_read_fasync,
648};
649
a19cbd4b 650static struct file_operations write_pipe_fops = {
1da177e4
LT
651 .llseek = no_llseek,
652 .read = bad_pipe_r,
653 .write = pipe_write,
654 .writev = pipe_writev,
655 .poll = pipe_poll,
656 .ioctl = pipe_ioctl,
657 .open = pipe_write_open,
658 .release = pipe_write_release,
659 .fasync = pipe_write_fasync,
660};
661
a19cbd4b 662static struct file_operations rdwr_pipe_fops = {
1da177e4
LT
663 .llseek = no_llseek,
664 .read = pipe_read,
665 .readv = pipe_readv,
666 .write = pipe_write,
667 .writev = pipe_writev,
668 .poll = pipe_poll,
669 .ioctl = pipe_ioctl,
670 .open = pipe_rdwr_open,
671 .release = pipe_rdwr_release,
672 .fasync = pipe_rdwr_fasync,
673};
674
675void free_pipe_info(struct inode *inode)
676{
677 int i;
678 struct pipe_inode_info *info = inode->i_pipe;
679
680 inode->i_pipe = NULL;
681 for (i = 0; i < PIPE_BUFFERS; i++) {
682 struct pipe_buffer *buf = info->bufs + i;
683 if (buf->ops)
684 buf->ops->release(info, buf);
685 }
686 if (info->tmp_page)
687 __free_page(info->tmp_page);
688 kfree(info);
689}
690
691struct inode* pipe_new(struct inode* inode)
692{
693 struct pipe_inode_info *info;
694
11b0b5ab 695 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
1da177e4
LT
696 if (!info)
697 goto fail_page;
1da177e4
LT
698 inode->i_pipe = info;
699
700 init_waitqueue_head(PIPE_WAIT(*inode));
701 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
702
703 return inode;
704fail_page:
705 return NULL;
706}
707
fa3536cc 708static struct vfsmount *pipe_mnt __read_mostly;
1da177e4
LT
709static int pipefs_delete_dentry(struct dentry *dentry)
710{
711 return 1;
712}
713static struct dentry_operations pipefs_dentry_operations = {
714 .d_delete = pipefs_delete_dentry,
715};
716
717static struct inode * get_pipe_inode(void)
718{
719 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
720
721 if (!inode)
722 goto fail_inode;
723
724 if(!pipe_new(inode))
725 goto fail_iput;
726 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
727 inode->i_fop = &rdwr_pipe_fops;
728
729 /*
730 * Mark the inode dirty from the very beginning,
731 * that way it will never be moved to the dirty
732 * list because "mark_inode_dirty()" will think
733 * that it already _is_ on the dirty list.
734 */
735 inode->i_state = I_DIRTY;
736 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
737 inode->i_uid = current->fsuid;
738 inode->i_gid = current->fsgid;
739 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
740 inode->i_blksize = PAGE_SIZE;
741 return inode;
742
743fail_iput:
744 iput(inode);
745fail_inode:
746 return NULL;
747}
748
749int do_pipe(int *fd)
750{
751 struct qstr this;
752 char name[32];
753 struct dentry *dentry;
754 struct inode * inode;
755 struct file *f1, *f2;
756 int error;
757 int i,j;
758
759 error = -ENFILE;
760 f1 = get_empty_filp();
761 if (!f1)
762 goto no_files;
763
764 f2 = get_empty_filp();
765 if (!f2)
766 goto close_f1;
767
768 inode = get_pipe_inode();
769 if (!inode)
770 goto close_f12;
771
772 error = get_unused_fd();
773 if (error < 0)
774 goto close_f12_inode;
775 i = error;
776
777 error = get_unused_fd();
778 if (error < 0)
779 goto close_f12_inode_i;
780 j = error;
781
782 error = -ENOMEM;
783 sprintf(name, "[%lu]", inode->i_ino);
784 this.name = name;
785 this.len = strlen(name);
786 this.hash = inode->i_ino; /* will go */
787 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
788 if (!dentry)
789 goto close_f12_inode_i_j;
790 dentry->d_op = &pipefs_dentry_operations;
791 d_add(dentry, inode);
792 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
793 f1->f_dentry = f2->f_dentry = dget(dentry);
794 f1->f_mapping = f2->f_mapping = inode->i_mapping;
795
796 /* read file */
797 f1->f_pos = f2->f_pos = 0;
798 f1->f_flags = O_RDONLY;
799 f1->f_op = &read_pipe_fops;
800 f1->f_mode = FMODE_READ;
801 f1->f_version = 0;
802
803 /* write file */
804 f2->f_flags = O_WRONLY;
805 f2->f_op = &write_pipe_fops;
806 f2->f_mode = FMODE_WRITE;
807 f2->f_version = 0;
808
809 fd_install(i, f1);
810 fd_install(j, f2);
811 fd[0] = i;
812 fd[1] = j;
813 return 0;
814
815close_f12_inode_i_j:
816 put_unused_fd(j);
817close_f12_inode_i:
818 put_unused_fd(i);
819close_f12_inode:
820 free_pipe_info(inode);
821 iput(inode);
822close_f12:
823 put_filp(f2);
824close_f1:
825 put_filp(f1);
826no_files:
827 return error;
828}
829
830/*
831 * pipefs should _never_ be mounted by userland - too much of security hassle,
832 * no real gain from having the whole whorehouse mounted. So we don't need
833 * any operations on the root directory. However, we need a non-trivial
834 * d_name - pipe: will go nicely and kill the special-casing in procfs.
835 */
836
837static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
838 int flags, const char *dev_name, void *data)
839{
840 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
841}
842
843static struct file_system_type pipe_fs_type = {
844 .name = "pipefs",
845 .get_sb = pipefs_get_sb,
846 .kill_sb = kill_anon_super,
847};
848
849static int __init init_pipe_fs(void)
850{
851 int err = register_filesystem(&pipe_fs_type);
852 if (!err) {
853 pipe_mnt = kern_mount(&pipe_fs_type);
854 if (IS_ERR(pipe_mnt)) {
855 err = PTR_ERR(pipe_mnt);
856 unregister_filesystem(&pipe_fs_type);
857 }
858 }
859 return err;
860}
861
862static void __exit exit_pipe_fs(void)
863{
864 unregister_filesystem(&pipe_fs_type);
865 mntput(pipe_mnt);
866}
867
868fs_initcall(init_pipe_fs);
869module_exit(exit_pipe_fs);