Merge branch 'rwsem-optimizations'
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / fuse / dev.c
1 /*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19 #include <linux/pipe_fs_i.h>
20 #include <linux/swap.h>
21 #include <linux/splice.h>
22
23 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
24 MODULE_ALIAS("devname:fuse");
25
26 static struct kmem_cache *fuse_req_cachep;
27
28 static struct fuse_conn *fuse_get_conn(struct file *file)
29 {
30 /*
31 * Lockless access is OK, because file->private data is set
32 * once during mount and is valid until the file is released.
33 */
34 return file->private_data;
35 }
36
37 static void fuse_request_init(struct fuse_req *req, struct page **pages,
38 struct fuse_page_desc *page_descs,
39 unsigned npages)
40 {
41 memset(req, 0, sizeof(*req));
42 memset(pages, 0, sizeof(*pages) * npages);
43 memset(page_descs, 0, sizeof(*page_descs) * npages);
44 INIT_LIST_HEAD(&req->list);
45 INIT_LIST_HEAD(&req->intr_entry);
46 init_waitqueue_head(&req->waitq);
47 atomic_set(&req->count, 1);
48 req->pages = pages;
49 req->page_descs = page_descs;
50 req->max_pages = npages;
51 }
52
53 static struct fuse_req *__fuse_request_alloc(unsigned npages, gfp_t flags)
54 {
55 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, flags);
56 if (req) {
57 struct page **pages;
58 struct fuse_page_desc *page_descs;
59
60 if (npages <= FUSE_REQ_INLINE_PAGES) {
61 pages = req->inline_pages;
62 page_descs = req->inline_page_descs;
63 } else {
64 pages = kmalloc(sizeof(struct page *) * npages, flags);
65 page_descs = kmalloc(sizeof(struct fuse_page_desc) *
66 npages, flags);
67 }
68
69 if (!pages || !page_descs) {
70 kfree(pages);
71 kfree(page_descs);
72 kmem_cache_free(fuse_req_cachep, req);
73 return NULL;
74 }
75
76 fuse_request_init(req, pages, page_descs, npages);
77 }
78 return req;
79 }
80
81 struct fuse_req *fuse_request_alloc(unsigned npages)
82 {
83 return __fuse_request_alloc(npages, GFP_KERNEL);
84 }
85 EXPORT_SYMBOL_GPL(fuse_request_alloc);
86
87 struct fuse_req *fuse_request_alloc_nofs(unsigned npages)
88 {
89 return __fuse_request_alloc(npages, GFP_NOFS);
90 }
91
92 void fuse_request_free(struct fuse_req *req)
93 {
94 if (req->pages != req->inline_pages) {
95 kfree(req->pages);
96 kfree(req->page_descs);
97 }
98 kmem_cache_free(fuse_req_cachep, req);
99 }
100
101 static void block_sigs(sigset_t *oldset)
102 {
103 sigset_t mask;
104
105 siginitsetinv(&mask, sigmask(SIGKILL));
106 sigprocmask(SIG_BLOCK, &mask, oldset);
107 }
108
109 static void restore_sigs(sigset_t *oldset)
110 {
111 sigprocmask(SIG_SETMASK, oldset, NULL);
112 }
113
114 static void __fuse_get_request(struct fuse_req *req)
115 {
116 atomic_inc(&req->count);
117 }
118
119 /* Must be called with > 1 refcount */
120 static void __fuse_put_request(struct fuse_req *req)
121 {
122 BUG_ON(atomic_read(&req->count) < 2);
123 atomic_dec(&req->count);
124 }
125
126 static void fuse_req_init_context(struct fuse_req *req)
127 {
128 req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid());
129 req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid());
130 req->in.h.pid = current->pid;
131 }
132
133 struct fuse_req *fuse_get_req(struct fuse_conn *fc, unsigned npages)
134 {
135 struct fuse_req *req;
136 sigset_t oldset;
137 int intr;
138 int err;
139
140 atomic_inc(&fc->num_waiting);
141 block_sigs(&oldset);
142 intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
143 restore_sigs(&oldset);
144 err = -EINTR;
145 if (intr)
146 goto out;
147
148 err = -ENOTCONN;
149 if (!fc->connected)
150 goto out;
151
152 req = fuse_request_alloc(npages);
153 err = -ENOMEM;
154 if (!req)
155 goto out;
156
157 fuse_req_init_context(req);
158 req->waiting = 1;
159 return req;
160
161 out:
162 atomic_dec(&fc->num_waiting);
163 return ERR_PTR(err);
164 }
165 EXPORT_SYMBOL_GPL(fuse_get_req);
166
167 /*
168 * Return request in fuse_file->reserved_req. However that may
169 * currently be in use. If that is the case, wait for it to become
170 * available.
171 */
172 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
173 struct file *file)
174 {
175 struct fuse_req *req = NULL;
176 struct fuse_file *ff = file->private_data;
177
178 do {
179 wait_event(fc->reserved_req_waitq, ff->reserved_req);
180 spin_lock(&fc->lock);
181 if (ff->reserved_req) {
182 req = ff->reserved_req;
183 ff->reserved_req = NULL;
184 req->stolen_file = get_file(file);
185 }
186 spin_unlock(&fc->lock);
187 } while (!req);
188
189 return req;
190 }
191
192 /*
193 * Put stolen request back into fuse_file->reserved_req
194 */
195 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
196 {
197 struct file *file = req->stolen_file;
198 struct fuse_file *ff = file->private_data;
199
200 spin_lock(&fc->lock);
201 fuse_request_init(req, req->pages, req->page_descs, req->max_pages);
202 BUG_ON(ff->reserved_req);
203 ff->reserved_req = req;
204 wake_up_all(&fc->reserved_req_waitq);
205 spin_unlock(&fc->lock);
206 fput(file);
207 }
208
209 /*
210 * Gets a requests for a file operation, always succeeds
211 *
212 * This is used for sending the FLUSH request, which must get to
213 * userspace, due to POSIX locks which may need to be unlocked.
214 *
215 * If allocation fails due to OOM, use the reserved request in
216 * fuse_file.
217 *
218 * This is very unlikely to deadlock accidentally, since the
219 * filesystem should not have it's own file open. If deadlock is
220 * intentional, it can still be broken by "aborting" the filesystem.
221 */
222 struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
223 struct file *file)
224 {
225 struct fuse_req *req;
226
227 atomic_inc(&fc->num_waiting);
228 wait_event(fc->blocked_waitq, !fc->blocked);
229 req = fuse_request_alloc(0);
230 if (!req)
231 req = get_reserved_req(fc, file);
232
233 fuse_req_init_context(req);
234 req->waiting = 1;
235 return req;
236 }
237
238 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
239 {
240 if (atomic_dec_and_test(&req->count)) {
241 if (req->waiting)
242 atomic_dec(&fc->num_waiting);
243
244 if (req->stolen_file)
245 put_reserved_req(fc, req);
246 else
247 fuse_request_free(req);
248 }
249 }
250 EXPORT_SYMBOL_GPL(fuse_put_request);
251
252 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
253 {
254 unsigned nbytes = 0;
255 unsigned i;
256
257 for (i = 0; i < numargs; i++)
258 nbytes += args[i].size;
259
260 return nbytes;
261 }
262
263 static u64 fuse_get_unique(struct fuse_conn *fc)
264 {
265 fc->reqctr++;
266 /* zero is special */
267 if (fc->reqctr == 0)
268 fc->reqctr = 1;
269
270 return fc->reqctr;
271 }
272
273 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
274 {
275 req->in.h.len = sizeof(struct fuse_in_header) +
276 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
277 list_add_tail(&req->list, &fc->pending);
278 req->state = FUSE_REQ_PENDING;
279 if (!req->waiting) {
280 req->waiting = 1;
281 atomic_inc(&fc->num_waiting);
282 }
283 wake_up(&fc->waitq);
284 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
285 }
286
287 void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
288 u64 nodeid, u64 nlookup)
289 {
290 forget->forget_one.nodeid = nodeid;
291 forget->forget_one.nlookup = nlookup;
292
293 spin_lock(&fc->lock);
294 if (fc->connected) {
295 fc->forget_list_tail->next = forget;
296 fc->forget_list_tail = forget;
297 wake_up(&fc->waitq);
298 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
299 } else {
300 kfree(forget);
301 }
302 spin_unlock(&fc->lock);
303 }
304
305 static void flush_bg_queue(struct fuse_conn *fc)
306 {
307 while (fc->active_background < fc->max_background &&
308 !list_empty(&fc->bg_queue)) {
309 struct fuse_req *req;
310
311 req = list_entry(fc->bg_queue.next, struct fuse_req, list);
312 list_del(&req->list);
313 fc->active_background++;
314 req->in.h.unique = fuse_get_unique(fc);
315 queue_request(fc, req);
316 }
317 }
318
319 /*
320 * This function is called when a request is finished. Either a reply
321 * has arrived or it was aborted (and not yet sent) or some error
322 * occurred during communication with userspace, or the device file
323 * was closed. The requester thread is woken up (if still waiting),
324 * the 'end' callback is called if given, else the reference to the
325 * request is released
326 *
327 * Called with fc->lock, unlocks it
328 */
329 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
330 __releases(fc->lock)
331 {
332 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
333 req->end = NULL;
334 list_del(&req->list);
335 list_del(&req->intr_entry);
336 req->state = FUSE_REQ_FINISHED;
337 if (req->background) {
338 if (fc->num_background == fc->max_background) {
339 fc->blocked = 0;
340 wake_up_all(&fc->blocked_waitq);
341 }
342 if (fc->num_background == fc->congestion_threshold &&
343 fc->connected && fc->bdi_initialized) {
344 clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
345 clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
346 }
347 fc->num_background--;
348 fc->active_background--;
349 flush_bg_queue(fc);
350 }
351 spin_unlock(&fc->lock);
352 wake_up(&req->waitq);
353 if (end)
354 end(fc, req);
355 fuse_put_request(fc, req);
356 }
357
358 static void wait_answer_interruptible(struct fuse_conn *fc,
359 struct fuse_req *req)
360 __releases(fc->lock)
361 __acquires(fc->lock)
362 {
363 if (signal_pending(current))
364 return;
365
366 spin_unlock(&fc->lock);
367 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
368 spin_lock(&fc->lock);
369 }
370
371 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
372 {
373 list_add_tail(&req->intr_entry, &fc->interrupts);
374 wake_up(&fc->waitq);
375 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
376 }
377
378 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
379 __releases(fc->lock)
380 __acquires(fc->lock)
381 {
382 if (!fc->no_interrupt) {
383 /* Any signal may interrupt this */
384 wait_answer_interruptible(fc, req);
385
386 if (req->aborted)
387 goto aborted;
388 if (req->state == FUSE_REQ_FINISHED)
389 return;
390
391 req->interrupted = 1;
392 if (req->state == FUSE_REQ_SENT)
393 queue_interrupt(fc, req);
394 }
395
396 if (!req->force) {
397 sigset_t oldset;
398
399 /* Only fatal signals may interrupt this */
400 block_sigs(&oldset);
401 wait_answer_interruptible(fc, req);
402 restore_sigs(&oldset);
403
404 if (req->aborted)
405 goto aborted;
406 if (req->state == FUSE_REQ_FINISHED)
407 return;
408
409 /* Request is not yet in userspace, bail out */
410 if (req->state == FUSE_REQ_PENDING) {
411 list_del(&req->list);
412 __fuse_put_request(req);
413 req->out.h.error = -EINTR;
414 return;
415 }
416 }
417
418 /*
419 * Either request is already in userspace, or it was forced.
420 * Wait it out.
421 */
422 spin_unlock(&fc->lock);
423 wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
424 spin_lock(&fc->lock);
425
426 if (!req->aborted)
427 return;
428
429 aborted:
430 BUG_ON(req->state != FUSE_REQ_FINISHED);
431 if (req->locked) {
432 /* This is uninterruptible sleep, because data is
433 being copied to/from the buffers of req. During
434 locked state, there mustn't be any filesystem
435 operation (e.g. page fault), since that could lead
436 to deadlock */
437 spin_unlock(&fc->lock);
438 wait_event(req->waitq, !req->locked);
439 spin_lock(&fc->lock);
440 }
441 }
442
443 static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
444 {
445 spin_lock(&fc->lock);
446 if (!fc->connected)
447 req->out.h.error = -ENOTCONN;
448 else if (fc->conn_error)
449 req->out.h.error = -ECONNREFUSED;
450 else {
451 req->in.h.unique = fuse_get_unique(fc);
452 queue_request(fc, req);
453 /* acquire extra reference, since request is still needed
454 after request_end() */
455 __fuse_get_request(req);
456
457 request_wait_answer(fc, req);
458 }
459 spin_unlock(&fc->lock);
460 }
461
462 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
463 {
464 req->isreply = 1;
465 __fuse_request_send(fc, req);
466 }
467 EXPORT_SYMBOL_GPL(fuse_request_send);
468
469 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
470 struct fuse_req *req)
471 {
472 req->background = 1;
473 fc->num_background++;
474 if (fc->num_background == fc->max_background)
475 fc->blocked = 1;
476 if (fc->num_background == fc->congestion_threshold &&
477 fc->bdi_initialized) {
478 set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
479 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
480 }
481 list_add_tail(&req->list, &fc->bg_queue);
482 flush_bg_queue(fc);
483 }
484
485 static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
486 {
487 spin_lock(&fc->lock);
488 if (fc->connected) {
489 fuse_request_send_nowait_locked(fc, req);
490 spin_unlock(&fc->lock);
491 } else {
492 req->out.h.error = -ENOTCONN;
493 request_end(fc, req);
494 }
495 }
496
497 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
498 {
499 req->isreply = 1;
500 fuse_request_send_nowait(fc, req);
501 }
502 EXPORT_SYMBOL_GPL(fuse_request_send_background);
503
504 static int fuse_request_send_notify_reply(struct fuse_conn *fc,
505 struct fuse_req *req, u64 unique)
506 {
507 int err = -ENODEV;
508
509 req->isreply = 0;
510 req->in.h.unique = unique;
511 spin_lock(&fc->lock);
512 if (fc->connected) {
513 queue_request(fc, req);
514 err = 0;
515 }
516 spin_unlock(&fc->lock);
517
518 return err;
519 }
520
521 /*
522 * Called under fc->lock
523 *
524 * fc->connected must have been checked previously
525 */
526 void fuse_request_send_background_locked(struct fuse_conn *fc,
527 struct fuse_req *req)
528 {
529 req->isreply = 1;
530 fuse_request_send_nowait_locked(fc, req);
531 }
532
533 void fuse_force_forget(struct file *file, u64 nodeid)
534 {
535 struct inode *inode = file_inode(file);
536 struct fuse_conn *fc = get_fuse_conn(inode);
537 struct fuse_req *req;
538 struct fuse_forget_in inarg;
539
540 memset(&inarg, 0, sizeof(inarg));
541 inarg.nlookup = 1;
542 req = fuse_get_req_nofail_nopages(fc, file);
543 req->in.h.opcode = FUSE_FORGET;
544 req->in.h.nodeid = nodeid;
545 req->in.numargs = 1;
546 req->in.args[0].size = sizeof(inarg);
547 req->in.args[0].value = &inarg;
548 req->isreply = 0;
549 __fuse_request_send(fc, req);
550 /* ignore errors */
551 fuse_put_request(fc, req);
552 }
553
554 /*
555 * Lock the request. Up to the next unlock_request() there mustn't be
556 * anything that could cause a page-fault. If the request was already
557 * aborted bail out.
558 */
559 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
560 {
561 int err = 0;
562 if (req) {
563 spin_lock(&fc->lock);
564 if (req->aborted)
565 err = -ENOENT;
566 else
567 req->locked = 1;
568 spin_unlock(&fc->lock);
569 }
570 return err;
571 }
572
573 /*
574 * Unlock request. If it was aborted during being locked, the
575 * requester thread is currently waiting for it to be unlocked, so
576 * wake it up.
577 */
578 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
579 {
580 if (req) {
581 spin_lock(&fc->lock);
582 req->locked = 0;
583 if (req->aborted)
584 wake_up(&req->waitq);
585 spin_unlock(&fc->lock);
586 }
587 }
588
589 struct fuse_copy_state {
590 struct fuse_conn *fc;
591 int write;
592 struct fuse_req *req;
593 const struct iovec *iov;
594 struct pipe_buffer *pipebufs;
595 struct pipe_buffer *currbuf;
596 struct pipe_inode_info *pipe;
597 unsigned long nr_segs;
598 unsigned long seglen;
599 unsigned long addr;
600 struct page *pg;
601 void *mapaddr;
602 void *buf;
603 unsigned len;
604 unsigned move_pages:1;
605 };
606
607 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
608 int write,
609 const struct iovec *iov, unsigned long nr_segs)
610 {
611 memset(cs, 0, sizeof(*cs));
612 cs->fc = fc;
613 cs->write = write;
614 cs->iov = iov;
615 cs->nr_segs = nr_segs;
616 }
617
618 /* Unmap and put previous page of userspace buffer */
619 static void fuse_copy_finish(struct fuse_copy_state *cs)
620 {
621 if (cs->currbuf) {
622 struct pipe_buffer *buf = cs->currbuf;
623
624 if (!cs->write) {
625 buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
626 } else {
627 kunmap(buf->page);
628 buf->len = PAGE_SIZE - cs->len;
629 }
630 cs->currbuf = NULL;
631 cs->mapaddr = NULL;
632 } else if (cs->mapaddr) {
633 kunmap(cs->pg);
634 if (cs->write) {
635 flush_dcache_page(cs->pg);
636 set_page_dirty_lock(cs->pg);
637 }
638 put_page(cs->pg);
639 cs->mapaddr = NULL;
640 }
641 }
642
643 /*
644 * Get another pagefull of userspace buffer, and map it to kernel
645 * address space, and lock request
646 */
647 static int fuse_copy_fill(struct fuse_copy_state *cs)
648 {
649 unsigned long offset;
650 int err;
651
652 unlock_request(cs->fc, cs->req);
653 fuse_copy_finish(cs);
654 if (cs->pipebufs) {
655 struct pipe_buffer *buf = cs->pipebufs;
656
657 if (!cs->write) {
658 err = buf->ops->confirm(cs->pipe, buf);
659 if (err)
660 return err;
661
662 BUG_ON(!cs->nr_segs);
663 cs->currbuf = buf;
664 cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
665 cs->len = buf->len;
666 cs->buf = cs->mapaddr + buf->offset;
667 cs->pipebufs++;
668 cs->nr_segs--;
669 } else {
670 struct page *page;
671
672 if (cs->nr_segs == cs->pipe->buffers)
673 return -EIO;
674
675 page = alloc_page(GFP_HIGHUSER);
676 if (!page)
677 return -ENOMEM;
678
679 buf->page = page;
680 buf->offset = 0;
681 buf->len = 0;
682
683 cs->currbuf = buf;
684 cs->mapaddr = kmap(page);
685 cs->buf = cs->mapaddr;
686 cs->len = PAGE_SIZE;
687 cs->pipebufs++;
688 cs->nr_segs++;
689 }
690 } else {
691 if (!cs->seglen) {
692 BUG_ON(!cs->nr_segs);
693 cs->seglen = cs->iov[0].iov_len;
694 cs->addr = (unsigned long) cs->iov[0].iov_base;
695 cs->iov++;
696 cs->nr_segs--;
697 }
698 err = get_user_pages_fast(cs->addr, 1, cs->write, &cs->pg);
699 if (err < 0)
700 return err;
701 BUG_ON(err != 1);
702 offset = cs->addr % PAGE_SIZE;
703 cs->mapaddr = kmap(cs->pg);
704 cs->buf = cs->mapaddr + offset;
705 cs->len = min(PAGE_SIZE - offset, cs->seglen);
706 cs->seglen -= cs->len;
707 cs->addr += cs->len;
708 }
709
710 return lock_request(cs->fc, cs->req);
711 }
712
713 /* Do as much copy to/from userspace buffer as we can */
714 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
715 {
716 unsigned ncpy = min(*size, cs->len);
717 if (val) {
718 if (cs->write)
719 memcpy(cs->buf, *val, ncpy);
720 else
721 memcpy(*val, cs->buf, ncpy);
722 *val += ncpy;
723 }
724 *size -= ncpy;
725 cs->len -= ncpy;
726 cs->buf += ncpy;
727 return ncpy;
728 }
729
730 static int fuse_check_page(struct page *page)
731 {
732 if (page_mapcount(page) ||
733 page->mapping != NULL ||
734 page_count(page) != 1 ||
735 (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
736 ~(1 << PG_locked |
737 1 << PG_referenced |
738 1 << PG_uptodate |
739 1 << PG_lru |
740 1 << PG_active |
741 1 << PG_reclaim))) {
742 printk(KERN_WARNING "fuse: trying to steal weird page\n");
743 printk(KERN_WARNING " page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
744 return 1;
745 }
746 return 0;
747 }
748
749 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
750 {
751 int err;
752 struct page *oldpage = *pagep;
753 struct page *newpage;
754 struct pipe_buffer *buf = cs->pipebufs;
755
756 unlock_request(cs->fc, cs->req);
757 fuse_copy_finish(cs);
758
759 err = buf->ops->confirm(cs->pipe, buf);
760 if (err)
761 return err;
762
763 BUG_ON(!cs->nr_segs);
764 cs->currbuf = buf;
765 cs->len = buf->len;
766 cs->pipebufs++;
767 cs->nr_segs--;
768
769 if (cs->len != PAGE_SIZE)
770 goto out_fallback;
771
772 if (buf->ops->steal(cs->pipe, buf) != 0)
773 goto out_fallback;
774
775 newpage = buf->page;
776
777 if (WARN_ON(!PageUptodate(newpage)))
778 return -EIO;
779
780 ClearPageMappedToDisk(newpage);
781
782 if (fuse_check_page(newpage) != 0)
783 goto out_fallback_unlock;
784
785 /*
786 * This is a new and locked page, it shouldn't be mapped or
787 * have any special flags on it
788 */
789 if (WARN_ON(page_mapped(oldpage)))
790 goto out_fallback_unlock;
791 if (WARN_ON(page_has_private(oldpage)))
792 goto out_fallback_unlock;
793 if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
794 goto out_fallback_unlock;
795 if (WARN_ON(PageMlocked(oldpage)))
796 goto out_fallback_unlock;
797
798 err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
799 if (err) {
800 unlock_page(newpage);
801 return err;
802 }
803
804 page_cache_get(newpage);
805
806 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
807 lru_cache_add_file(newpage);
808
809 err = 0;
810 spin_lock(&cs->fc->lock);
811 if (cs->req->aborted)
812 err = -ENOENT;
813 else
814 *pagep = newpage;
815 spin_unlock(&cs->fc->lock);
816
817 if (err) {
818 unlock_page(newpage);
819 page_cache_release(newpage);
820 return err;
821 }
822
823 unlock_page(oldpage);
824 page_cache_release(oldpage);
825 cs->len = 0;
826
827 return 0;
828
829 out_fallback_unlock:
830 unlock_page(newpage);
831 out_fallback:
832 cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
833 cs->buf = cs->mapaddr + buf->offset;
834
835 err = lock_request(cs->fc, cs->req);
836 if (err)
837 return err;
838
839 return 1;
840 }
841
842 static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
843 unsigned offset, unsigned count)
844 {
845 struct pipe_buffer *buf;
846
847 if (cs->nr_segs == cs->pipe->buffers)
848 return -EIO;
849
850 unlock_request(cs->fc, cs->req);
851 fuse_copy_finish(cs);
852
853 buf = cs->pipebufs;
854 page_cache_get(page);
855 buf->page = page;
856 buf->offset = offset;
857 buf->len = count;
858
859 cs->pipebufs++;
860 cs->nr_segs++;
861 cs->len = 0;
862
863 return 0;
864 }
865
866 /*
867 * Copy a page in the request to/from the userspace buffer. Must be
868 * done atomically
869 */
870 static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
871 unsigned offset, unsigned count, int zeroing)
872 {
873 int err;
874 struct page *page = *pagep;
875
876 if (page && zeroing && count < PAGE_SIZE)
877 clear_highpage(page);
878
879 while (count) {
880 if (cs->write && cs->pipebufs && page) {
881 return fuse_ref_page(cs, page, offset, count);
882 } else if (!cs->len) {
883 if (cs->move_pages && page &&
884 offset == 0 && count == PAGE_SIZE) {
885 err = fuse_try_move_page(cs, pagep);
886 if (err <= 0)
887 return err;
888 } else {
889 err = fuse_copy_fill(cs);
890 if (err)
891 return err;
892 }
893 }
894 if (page) {
895 void *mapaddr = kmap_atomic(page);
896 void *buf = mapaddr + offset;
897 offset += fuse_copy_do(cs, &buf, &count);
898 kunmap_atomic(mapaddr);
899 } else
900 offset += fuse_copy_do(cs, NULL, &count);
901 }
902 if (page && !cs->write)
903 flush_dcache_page(page);
904 return 0;
905 }
906
907 /* Copy pages in the request to/from userspace buffer */
908 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
909 int zeroing)
910 {
911 unsigned i;
912 struct fuse_req *req = cs->req;
913
914 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
915 int err;
916 unsigned offset = req->page_descs[i].offset;
917 unsigned count = min(nbytes, req->page_descs[i].length);
918
919 err = fuse_copy_page(cs, &req->pages[i], offset, count,
920 zeroing);
921 if (err)
922 return err;
923
924 nbytes -= count;
925 }
926 return 0;
927 }
928
929 /* Copy a single argument in the request to/from userspace buffer */
930 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
931 {
932 while (size) {
933 if (!cs->len) {
934 int err = fuse_copy_fill(cs);
935 if (err)
936 return err;
937 }
938 fuse_copy_do(cs, &val, &size);
939 }
940 return 0;
941 }
942
943 /* Copy request arguments to/from userspace buffer */
944 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
945 unsigned argpages, struct fuse_arg *args,
946 int zeroing)
947 {
948 int err = 0;
949 unsigned i;
950
951 for (i = 0; !err && i < numargs; i++) {
952 struct fuse_arg *arg = &args[i];
953 if (i == numargs - 1 && argpages)
954 err = fuse_copy_pages(cs, arg->size, zeroing);
955 else
956 err = fuse_copy_one(cs, arg->value, arg->size);
957 }
958 return err;
959 }
960
961 static int forget_pending(struct fuse_conn *fc)
962 {
963 return fc->forget_list_head.next != NULL;
964 }
965
966 static int request_pending(struct fuse_conn *fc)
967 {
968 return !list_empty(&fc->pending) || !list_empty(&fc->interrupts) ||
969 forget_pending(fc);
970 }
971
972 /* Wait until a request is available on the pending list */
973 static void request_wait(struct fuse_conn *fc)
974 __releases(fc->lock)
975 __acquires(fc->lock)
976 {
977 DECLARE_WAITQUEUE(wait, current);
978
979 add_wait_queue_exclusive(&fc->waitq, &wait);
980 while (fc->connected && !request_pending(fc)) {
981 set_current_state(TASK_INTERRUPTIBLE);
982 if (signal_pending(current))
983 break;
984
985 spin_unlock(&fc->lock);
986 schedule();
987 spin_lock(&fc->lock);
988 }
989 set_current_state(TASK_RUNNING);
990 remove_wait_queue(&fc->waitq, &wait);
991 }
992
993 /*
994 * Transfer an interrupt request to userspace
995 *
996 * Unlike other requests this is assembled on demand, without a need
997 * to allocate a separate fuse_req structure.
998 *
999 * Called with fc->lock held, releases it
1000 */
1001 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_copy_state *cs,
1002 size_t nbytes, struct fuse_req *req)
1003 __releases(fc->lock)
1004 {
1005 struct fuse_in_header ih;
1006 struct fuse_interrupt_in arg;
1007 unsigned reqsize = sizeof(ih) + sizeof(arg);
1008 int err;
1009
1010 list_del_init(&req->intr_entry);
1011 req->intr_unique = fuse_get_unique(fc);
1012 memset(&ih, 0, sizeof(ih));
1013 memset(&arg, 0, sizeof(arg));
1014 ih.len = reqsize;
1015 ih.opcode = FUSE_INTERRUPT;
1016 ih.unique = req->intr_unique;
1017 arg.unique = req->in.h.unique;
1018
1019 spin_unlock(&fc->lock);
1020 if (nbytes < reqsize)
1021 return -EINVAL;
1022
1023 err = fuse_copy_one(cs, &ih, sizeof(ih));
1024 if (!err)
1025 err = fuse_copy_one(cs, &arg, sizeof(arg));
1026 fuse_copy_finish(cs);
1027
1028 return err ? err : reqsize;
1029 }
1030
1031 static struct fuse_forget_link *dequeue_forget(struct fuse_conn *fc,
1032 unsigned max,
1033 unsigned *countp)
1034 {
1035 struct fuse_forget_link *head = fc->forget_list_head.next;
1036 struct fuse_forget_link **newhead = &head;
1037 unsigned count;
1038
1039 for (count = 0; *newhead != NULL && count < max; count++)
1040 newhead = &(*newhead)->next;
1041
1042 fc->forget_list_head.next = *newhead;
1043 *newhead = NULL;
1044 if (fc->forget_list_head.next == NULL)
1045 fc->forget_list_tail = &fc->forget_list_head;
1046
1047 if (countp != NULL)
1048 *countp = count;
1049
1050 return head;
1051 }
1052
1053 static int fuse_read_single_forget(struct fuse_conn *fc,
1054 struct fuse_copy_state *cs,
1055 size_t nbytes)
1056 __releases(fc->lock)
1057 {
1058 int err;
1059 struct fuse_forget_link *forget = dequeue_forget(fc, 1, NULL);
1060 struct fuse_forget_in arg = {
1061 .nlookup = forget->forget_one.nlookup,
1062 };
1063 struct fuse_in_header ih = {
1064 .opcode = FUSE_FORGET,
1065 .nodeid = forget->forget_one.nodeid,
1066 .unique = fuse_get_unique(fc),
1067 .len = sizeof(ih) + sizeof(arg),
1068 };
1069
1070 spin_unlock(&fc->lock);
1071 kfree(forget);
1072 if (nbytes < ih.len)
1073 return -EINVAL;
1074
1075 err = fuse_copy_one(cs, &ih, sizeof(ih));
1076 if (!err)
1077 err = fuse_copy_one(cs, &arg, sizeof(arg));
1078 fuse_copy_finish(cs);
1079
1080 if (err)
1081 return err;
1082
1083 return ih.len;
1084 }
1085
1086 static int fuse_read_batch_forget(struct fuse_conn *fc,
1087 struct fuse_copy_state *cs, size_t nbytes)
1088 __releases(fc->lock)
1089 {
1090 int err;
1091 unsigned max_forgets;
1092 unsigned count;
1093 struct fuse_forget_link *head;
1094 struct fuse_batch_forget_in arg = { .count = 0 };
1095 struct fuse_in_header ih = {
1096 .opcode = FUSE_BATCH_FORGET,
1097 .unique = fuse_get_unique(fc),
1098 .len = sizeof(ih) + sizeof(arg),
1099 };
1100
1101 if (nbytes < ih.len) {
1102 spin_unlock(&fc->lock);
1103 return -EINVAL;
1104 }
1105
1106 max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1107 head = dequeue_forget(fc, max_forgets, &count);
1108 spin_unlock(&fc->lock);
1109
1110 arg.count = count;
1111 ih.len += count * sizeof(struct fuse_forget_one);
1112 err = fuse_copy_one(cs, &ih, sizeof(ih));
1113 if (!err)
1114 err = fuse_copy_one(cs, &arg, sizeof(arg));
1115
1116 while (head) {
1117 struct fuse_forget_link *forget = head;
1118
1119 if (!err) {
1120 err = fuse_copy_one(cs, &forget->forget_one,
1121 sizeof(forget->forget_one));
1122 }
1123 head = forget->next;
1124 kfree(forget);
1125 }
1126
1127 fuse_copy_finish(cs);
1128
1129 if (err)
1130 return err;
1131
1132 return ih.len;
1133 }
1134
1135 static int fuse_read_forget(struct fuse_conn *fc, struct fuse_copy_state *cs,
1136 size_t nbytes)
1137 __releases(fc->lock)
1138 {
1139 if (fc->minor < 16 || fc->forget_list_head.next->next == NULL)
1140 return fuse_read_single_forget(fc, cs, nbytes);
1141 else
1142 return fuse_read_batch_forget(fc, cs, nbytes);
1143 }
1144
1145 /*
1146 * Read a single request into the userspace filesystem's buffer. This
1147 * function waits until a request is available, then removes it from
1148 * the pending list and copies request data to userspace buffer. If
1149 * no reply is needed (FORGET) or request has been aborted or there
1150 * was an error during the copying then it's finished by calling
1151 * request_end(). Otherwise add it to the processing list, and set
1152 * the 'sent' flag.
1153 */
1154 static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
1155 struct fuse_copy_state *cs, size_t nbytes)
1156 {
1157 int err;
1158 struct fuse_req *req;
1159 struct fuse_in *in;
1160 unsigned reqsize;
1161
1162 restart:
1163 spin_lock(&fc->lock);
1164 err = -EAGAIN;
1165 if ((file->f_flags & O_NONBLOCK) && fc->connected &&
1166 !request_pending(fc))
1167 goto err_unlock;
1168
1169 request_wait(fc);
1170 err = -ENODEV;
1171 if (!fc->connected)
1172 goto err_unlock;
1173 err = -ERESTARTSYS;
1174 if (!request_pending(fc))
1175 goto err_unlock;
1176
1177 if (!list_empty(&fc->interrupts)) {
1178 req = list_entry(fc->interrupts.next, struct fuse_req,
1179 intr_entry);
1180 return fuse_read_interrupt(fc, cs, nbytes, req);
1181 }
1182
1183 if (forget_pending(fc)) {
1184 if (list_empty(&fc->pending) || fc->forget_batch-- > 0)
1185 return fuse_read_forget(fc, cs, nbytes);
1186
1187 if (fc->forget_batch <= -8)
1188 fc->forget_batch = 16;
1189 }
1190
1191 req = list_entry(fc->pending.next, struct fuse_req, list);
1192 req->state = FUSE_REQ_READING;
1193 list_move(&req->list, &fc->io);
1194
1195 in = &req->in;
1196 reqsize = in->h.len;
1197 /* If request is too large, reply with an error and restart the read */
1198 if (nbytes < reqsize) {
1199 req->out.h.error = -EIO;
1200 /* SETXATTR is special, since it may contain too large data */
1201 if (in->h.opcode == FUSE_SETXATTR)
1202 req->out.h.error = -E2BIG;
1203 request_end(fc, req);
1204 goto restart;
1205 }
1206 spin_unlock(&fc->lock);
1207 cs->req = req;
1208 err = fuse_copy_one(cs, &in->h, sizeof(in->h));
1209 if (!err)
1210 err = fuse_copy_args(cs, in->numargs, in->argpages,
1211 (struct fuse_arg *) in->args, 0);
1212 fuse_copy_finish(cs);
1213 spin_lock(&fc->lock);
1214 req->locked = 0;
1215 if (req->aborted) {
1216 request_end(fc, req);
1217 return -ENODEV;
1218 }
1219 if (err) {
1220 req->out.h.error = -EIO;
1221 request_end(fc, req);
1222 return err;
1223 }
1224 if (!req->isreply)
1225 request_end(fc, req);
1226 else {
1227 req->state = FUSE_REQ_SENT;
1228 list_move_tail(&req->list, &fc->processing);
1229 if (req->interrupted)
1230 queue_interrupt(fc, req);
1231 spin_unlock(&fc->lock);
1232 }
1233 return reqsize;
1234
1235 err_unlock:
1236 spin_unlock(&fc->lock);
1237 return err;
1238 }
1239
1240 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
1241 unsigned long nr_segs, loff_t pos)
1242 {
1243 struct fuse_copy_state cs;
1244 struct file *file = iocb->ki_filp;
1245 struct fuse_conn *fc = fuse_get_conn(file);
1246 if (!fc)
1247 return -EPERM;
1248
1249 fuse_copy_init(&cs, fc, 1, iov, nr_segs);
1250
1251 return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
1252 }
1253
1254 static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
1255 struct pipe_buffer *buf)
1256 {
1257 return 1;
1258 }
1259
1260 static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
1261 .can_merge = 0,
1262 .map = generic_pipe_buf_map,
1263 .unmap = generic_pipe_buf_unmap,
1264 .confirm = generic_pipe_buf_confirm,
1265 .release = generic_pipe_buf_release,
1266 .steal = fuse_dev_pipe_buf_steal,
1267 .get = generic_pipe_buf_get,
1268 };
1269
1270 static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1271 struct pipe_inode_info *pipe,
1272 size_t len, unsigned int flags)
1273 {
1274 int ret;
1275 int page_nr = 0;
1276 int do_wakeup = 0;
1277 struct pipe_buffer *bufs;
1278 struct fuse_copy_state cs;
1279 struct fuse_conn *fc = fuse_get_conn(in);
1280 if (!fc)
1281 return -EPERM;
1282
1283 bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1284 if (!bufs)
1285 return -ENOMEM;
1286
1287 fuse_copy_init(&cs, fc, 1, NULL, 0);
1288 cs.pipebufs = bufs;
1289 cs.pipe = pipe;
1290 ret = fuse_dev_do_read(fc, in, &cs, len);
1291 if (ret < 0)
1292 goto out;
1293
1294 ret = 0;
1295 pipe_lock(pipe);
1296
1297 if (!pipe->readers) {
1298 send_sig(SIGPIPE, current, 0);
1299 if (!ret)
1300 ret = -EPIPE;
1301 goto out_unlock;
1302 }
1303
1304 if (pipe->nrbufs + cs.nr_segs > pipe->buffers) {
1305 ret = -EIO;
1306 goto out_unlock;
1307 }
1308
1309 while (page_nr < cs.nr_segs) {
1310 int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
1311 struct pipe_buffer *buf = pipe->bufs + newbuf;
1312
1313 buf->page = bufs[page_nr].page;
1314 buf->offset = bufs[page_nr].offset;
1315 buf->len = bufs[page_nr].len;
1316 buf->ops = &fuse_dev_pipe_buf_ops;
1317
1318 pipe->nrbufs++;
1319 page_nr++;
1320 ret += buf->len;
1321
1322 if (pipe->files)
1323 do_wakeup = 1;
1324 }
1325
1326 out_unlock:
1327 pipe_unlock(pipe);
1328
1329 if (do_wakeup) {
1330 smp_mb();
1331 if (waitqueue_active(&pipe->wait))
1332 wake_up_interruptible(&pipe->wait);
1333 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
1334 }
1335
1336 out:
1337 for (; page_nr < cs.nr_segs; page_nr++)
1338 page_cache_release(bufs[page_nr].page);
1339
1340 kfree(bufs);
1341 return ret;
1342 }
1343
1344 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1345 struct fuse_copy_state *cs)
1346 {
1347 struct fuse_notify_poll_wakeup_out outarg;
1348 int err = -EINVAL;
1349
1350 if (size != sizeof(outarg))
1351 goto err;
1352
1353 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1354 if (err)
1355 goto err;
1356
1357 fuse_copy_finish(cs);
1358 return fuse_notify_poll_wakeup(fc, &outarg);
1359
1360 err:
1361 fuse_copy_finish(cs);
1362 return err;
1363 }
1364
1365 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1366 struct fuse_copy_state *cs)
1367 {
1368 struct fuse_notify_inval_inode_out outarg;
1369 int err = -EINVAL;
1370
1371 if (size != sizeof(outarg))
1372 goto err;
1373
1374 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1375 if (err)
1376 goto err;
1377 fuse_copy_finish(cs);
1378
1379 down_read(&fc->killsb);
1380 err = -ENOENT;
1381 if (fc->sb) {
1382 err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
1383 outarg.off, outarg.len);
1384 }
1385 up_read(&fc->killsb);
1386 return err;
1387
1388 err:
1389 fuse_copy_finish(cs);
1390 return err;
1391 }
1392
1393 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1394 struct fuse_copy_state *cs)
1395 {
1396 struct fuse_notify_inval_entry_out outarg;
1397 int err = -ENOMEM;
1398 char *buf;
1399 struct qstr name;
1400
1401 buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1402 if (!buf)
1403 goto err;
1404
1405 err = -EINVAL;
1406 if (size < sizeof(outarg))
1407 goto err;
1408
1409 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1410 if (err)
1411 goto err;
1412
1413 err = -ENAMETOOLONG;
1414 if (outarg.namelen > FUSE_NAME_MAX)
1415 goto err;
1416
1417 err = -EINVAL;
1418 if (size != sizeof(outarg) + outarg.namelen + 1)
1419 goto err;
1420
1421 name.name = buf;
1422 name.len = outarg.namelen;
1423 err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1424 if (err)
1425 goto err;
1426 fuse_copy_finish(cs);
1427 buf[outarg.namelen] = 0;
1428 name.hash = full_name_hash(name.name, name.len);
1429
1430 down_read(&fc->killsb);
1431 err = -ENOENT;
1432 if (fc->sb)
1433 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
1434 up_read(&fc->killsb);
1435 kfree(buf);
1436 return err;
1437
1438 err:
1439 kfree(buf);
1440 fuse_copy_finish(cs);
1441 return err;
1442 }
1443
1444 static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1445 struct fuse_copy_state *cs)
1446 {
1447 struct fuse_notify_delete_out outarg;
1448 int err = -ENOMEM;
1449 char *buf;
1450 struct qstr name;
1451
1452 buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1453 if (!buf)
1454 goto err;
1455
1456 err = -EINVAL;
1457 if (size < sizeof(outarg))
1458 goto err;
1459
1460 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1461 if (err)
1462 goto err;
1463
1464 err = -ENAMETOOLONG;
1465 if (outarg.namelen > FUSE_NAME_MAX)
1466 goto err;
1467
1468 err = -EINVAL;
1469 if (size != sizeof(outarg) + outarg.namelen + 1)
1470 goto err;
1471
1472 name.name = buf;
1473 name.len = outarg.namelen;
1474 err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1475 if (err)
1476 goto err;
1477 fuse_copy_finish(cs);
1478 buf[outarg.namelen] = 0;
1479 name.hash = full_name_hash(name.name, name.len);
1480
1481 down_read(&fc->killsb);
1482 err = -ENOENT;
1483 if (fc->sb)
1484 err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
1485 outarg.child, &name);
1486 up_read(&fc->killsb);
1487 kfree(buf);
1488 return err;
1489
1490 err:
1491 kfree(buf);
1492 fuse_copy_finish(cs);
1493 return err;
1494 }
1495
1496 static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1497 struct fuse_copy_state *cs)
1498 {
1499 struct fuse_notify_store_out outarg;
1500 struct inode *inode;
1501 struct address_space *mapping;
1502 u64 nodeid;
1503 int err;
1504 pgoff_t index;
1505 unsigned int offset;
1506 unsigned int num;
1507 loff_t file_size;
1508 loff_t end;
1509
1510 err = -EINVAL;
1511 if (size < sizeof(outarg))
1512 goto out_finish;
1513
1514 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1515 if (err)
1516 goto out_finish;
1517
1518 err = -EINVAL;
1519 if (size - sizeof(outarg) != outarg.size)
1520 goto out_finish;
1521
1522 nodeid = outarg.nodeid;
1523
1524 down_read(&fc->killsb);
1525
1526 err = -ENOENT;
1527 if (!fc->sb)
1528 goto out_up_killsb;
1529
1530 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1531 if (!inode)
1532 goto out_up_killsb;
1533
1534 mapping = inode->i_mapping;
1535 index = outarg.offset >> PAGE_CACHE_SHIFT;
1536 offset = outarg.offset & ~PAGE_CACHE_MASK;
1537 file_size = i_size_read(inode);
1538 end = outarg.offset + outarg.size;
1539 if (end > file_size) {
1540 file_size = end;
1541 fuse_write_update_size(inode, file_size);
1542 }
1543
1544 num = outarg.size;
1545 while (num) {
1546 struct page *page;
1547 unsigned int this_num;
1548
1549 err = -ENOMEM;
1550 page = find_or_create_page(mapping, index,
1551 mapping_gfp_mask(mapping));
1552 if (!page)
1553 goto out_iput;
1554
1555 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1556 err = fuse_copy_page(cs, &page, offset, this_num, 0);
1557 if (!err && offset == 0 && (num != 0 || file_size == end))
1558 SetPageUptodate(page);
1559 unlock_page(page);
1560 page_cache_release(page);
1561
1562 if (err)
1563 goto out_iput;
1564
1565 num -= this_num;
1566 offset = 0;
1567 index++;
1568 }
1569
1570 err = 0;
1571
1572 out_iput:
1573 iput(inode);
1574 out_up_killsb:
1575 up_read(&fc->killsb);
1576 out_finish:
1577 fuse_copy_finish(cs);
1578 return err;
1579 }
1580
1581 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
1582 {
1583 release_pages(req->pages, req->num_pages, 0);
1584 }
1585
1586 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1587 struct fuse_notify_retrieve_out *outarg)
1588 {
1589 int err;
1590 struct address_space *mapping = inode->i_mapping;
1591 struct fuse_req *req;
1592 pgoff_t index;
1593 loff_t file_size;
1594 unsigned int num;
1595 unsigned int offset;
1596 size_t total_len = 0;
1597 int num_pages;
1598
1599 offset = outarg->offset & ~PAGE_CACHE_MASK;
1600 file_size = i_size_read(inode);
1601
1602 num = outarg->size;
1603 if (outarg->offset > file_size)
1604 num = 0;
1605 else if (outarg->offset + num > file_size)
1606 num = file_size - outarg->offset;
1607
1608 num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1609 num_pages = min(num_pages, FUSE_MAX_PAGES_PER_REQ);
1610
1611 req = fuse_get_req(fc, num_pages);
1612 if (IS_ERR(req))
1613 return PTR_ERR(req);
1614
1615 req->in.h.opcode = FUSE_NOTIFY_REPLY;
1616 req->in.h.nodeid = outarg->nodeid;
1617 req->in.numargs = 2;
1618 req->in.argpages = 1;
1619 req->page_descs[0].offset = offset;
1620 req->end = fuse_retrieve_end;
1621
1622 index = outarg->offset >> PAGE_CACHE_SHIFT;
1623
1624 while (num && req->num_pages < num_pages) {
1625 struct page *page;
1626 unsigned int this_num;
1627
1628 page = find_get_page(mapping, index);
1629 if (!page)
1630 break;
1631
1632 this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
1633 req->pages[req->num_pages] = page;
1634 req->page_descs[req->num_pages].length = this_num;
1635 req->num_pages++;
1636
1637 offset = 0;
1638 num -= this_num;
1639 total_len += this_num;
1640 index++;
1641 }
1642 req->misc.retrieve_in.offset = outarg->offset;
1643 req->misc.retrieve_in.size = total_len;
1644 req->in.args[0].size = sizeof(req->misc.retrieve_in);
1645 req->in.args[0].value = &req->misc.retrieve_in;
1646 req->in.args[1].size = total_len;
1647
1648 err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
1649 if (err)
1650 fuse_retrieve_end(fc, req);
1651
1652 return err;
1653 }
1654
1655 static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1656 struct fuse_copy_state *cs)
1657 {
1658 struct fuse_notify_retrieve_out outarg;
1659 struct inode *inode;
1660 int err;
1661
1662 err = -EINVAL;
1663 if (size != sizeof(outarg))
1664 goto copy_finish;
1665
1666 err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1667 if (err)
1668 goto copy_finish;
1669
1670 fuse_copy_finish(cs);
1671
1672 down_read(&fc->killsb);
1673 err = -ENOENT;
1674 if (fc->sb) {
1675 u64 nodeid = outarg.nodeid;
1676
1677 inode = ilookup5(fc->sb, nodeid, fuse_inode_eq, &nodeid);
1678 if (inode) {
1679 err = fuse_retrieve(fc, inode, &outarg);
1680 iput(inode);
1681 }
1682 }
1683 up_read(&fc->killsb);
1684
1685 return err;
1686
1687 copy_finish:
1688 fuse_copy_finish(cs);
1689 return err;
1690 }
1691
1692 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1693 unsigned int size, struct fuse_copy_state *cs)
1694 {
1695 switch (code) {
1696 case FUSE_NOTIFY_POLL:
1697 return fuse_notify_poll(fc, size, cs);
1698
1699 case FUSE_NOTIFY_INVAL_INODE:
1700 return fuse_notify_inval_inode(fc, size, cs);
1701
1702 case FUSE_NOTIFY_INVAL_ENTRY:
1703 return fuse_notify_inval_entry(fc, size, cs);
1704
1705 case FUSE_NOTIFY_STORE:
1706 return fuse_notify_store(fc, size, cs);
1707
1708 case FUSE_NOTIFY_RETRIEVE:
1709 return fuse_notify_retrieve(fc, size, cs);
1710
1711 case FUSE_NOTIFY_DELETE:
1712 return fuse_notify_delete(fc, size, cs);
1713
1714 default:
1715 fuse_copy_finish(cs);
1716 return -EINVAL;
1717 }
1718 }
1719
1720 /* Look up request on processing list by unique ID */
1721 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
1722 {
1723 struct list_head *entry;
1724
1725 list_for_each(entry, &fc->processing) {
1726 struct fuse_req *req;
1727 req = list_entry(entry, struct fuse_req, list);
1728 if (req->in.h.unique == unique || req->intr_unique == unique)
1729 return req;
1730 }
1731 return NULL;
1732 }
1733
1734 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
1735 unsigned nbytes)
1736 {
1737 unsigned reqsize = sizeof(struct fuse_out_header);
1738
1739 if (out->h.error)
1740 return nbytes != reqsize ? -EINVAL : 0;
1741
1742 reqsize += len_args(out->numargs, out->args);
1743
1744 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
1745 return -EINVAL;
1746 else if (reqsize > nbytes) {
1747 struct fuse_arg *lastarg = &out->args[out->numargs-1];
1748 unsigned diffsize = reqsize - nbytes;
1749 if (diffsize > lastarg->size)
1750 return -EINVAL;
1751 lastarg->size -= diffsize;
1752 }
1753 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
1754 out->page_zeroing);
1755 }
1756
1757 /*
1758 * Write a single reply to a request. First the header is copied from
1759 * the write buffer. The request is then searched on the processing
1760 * list by the unique ID found in the header. If found, then remove
1761 * it from the list and copy the rest of the buffer to the request.
1762 * The request is finished by calling request_end()
1763 */
1764 static ssize_t fuse_dev_do_write(struct fuse_conn *fc,
1765 struct fuse_copy_state *cs, size_t nbytes)
1766 {
1767 int err;
1768 struct fuse_req *req;
1769 struct fuse_out_header oh;
1770
1771 if (nbytes < sizeof(struct fuse_out_header))
1772 return -EINVAL;
1773
1774 err = fuse_copy_one(cs, &oh, sizeof(oh));
1775 if (err)
1776 goto err_finish;
1777
1778 err = -EINVAL;
1779 if (oh.len != nbytes)
1780 goto err_finish;
1781
1782 /*
1783 * Zero oh.unique indicates unsolicited notification message
1784 * and error contains notification code.
1785 */
1786 if (!oh.unique) {
1787 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1788 return err ? err : nbytes;
1789 }
1790
1791 err = -EINVAL;
1792 if (oh.error <= -1000 || oh.error > 0)
1793 goto err_finish;
1794
1795 spin_lock(&fc->lock);
1796 err = -ENOENT;
1797 if (!fc->connected)
1798 goto err_unlock;
1799
1800 req = request_find(fc, oh.unique);
1801 if (!req)
1802 goto err_unlock;
1803
1804 if (req->aborted) {
1805 spin_unlock(&fc->lock);
1806 fuse_copy_finish(cs);
1807 spin_lock(&fc->lock);
1808 request_end(fc, req);
1809 return -ENOENT;
1810 }
1811 /* Is it an interrupt reply? */
1812 if (req->intr_unique == oh.unique) {
1813 err = -EINVAL;
1814 if (nbytes != sizeof(struct fuse_out_header))
1815 goto err_unlock;
1816
1817 if (oh.error == -ENOSYS)
1818 fc->no_interrupt = 1;
1819 else if (oh.error == -EAGAIN)
1820 queue_interrupt(fc, req);
1821
1822 spin_unlock(&fc->lock);
1823 fuse_copy_finish(cs);
1824 return nbytes;
1825 }
1826
1827 req->state = FUSE_REQ_WRITING;
1828 list_move(&req->list, &fc->io);
1829 req->out.h = oh;
1830 req->locked = 1;
1831 cs->req = req;
1832 if (!req->out.page_replace)
1833 cs->move_pages = 0;
1834 spin_unlock(&fc->lock);
1835
1836 err = copy_out_args(cs, &req->out, nbytes);
1837 fuse_copy_finish(cs);
1838
1839 spin_lock(&fc->lock);
1840 req->locked = 0;
1841 if (!err) {
1842 if (req->aborted)
1843 err = -ENOENT;
1844 } else if (!req->aborted)
1845 req->out.h.error = -EIO;
1846 request_end(fc, req);
1847
1848 return err ? err : nbytes;
1849
1850 err_unlock:
1851 spin_unlock(&fc->lock);
1852 err_finish:
1853 fuse_copy_finish(cs);
1854 return err;
1855 }
1856
1857 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
1858 unsigned long nr_segs, loff_t pos)
1859 {
1860 struct fuse_copy_state cs;
1861 struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1862 if (!fc)
1863 return -EPERM;
1864
1865 fuse_copy_init(&cs, fc, 0, iov, nr_segs);
1866
1867 return fuse_dev_do_write(fc, &cs, iov_length(iov, nr_segs));
1868 }
1869
1870 static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
1871 struct file *out, loff_t *ppos,
1872 size_t len, unsigned int flags)
1873 {
1874 unsigned nbuf;
1875 unsigned idx;
1876 struct pipe_buffer *bufs;
1877 struct fuse_copy_state cs;
1878 struct fuse_conn *fc;
1879 size_t rem;
1880 ssize_t ret;
1881
1882 fc = fuse_get_conn(out);
1883 if (!fc)
1884 return -EPERM;
1885
1886 bufs = kmalloc(pipe->buffers * sizeof(struct pipe_buffer), GFP_KERNEL);
1887 if (!bufs)
1888 return -ENOMEM;
1889
1890 pipe_lock(pipe);
1891 nbuf = 0;
1892 rem = 0;
1893 for (idx = 0; idx < pipe->nrbufs && rem < len; idx++)
1894 rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
1895
1896 ret = -EINVAL;
1897 if (rem < len) {
1898 pipe_unlock(pipe);
1899 goto out;
1900 }
1901
1902 rem = len;
1903 while (rem) {
1904 struct pipe_buffer *ibuf;
1905 struct pipe_buffer *obuf;
1906
1907 BUG_ON(nbuf >= pipe->buffers);
1908 BUG_ON(!pipe->nrbufs);
1909 ibuf = &pipe->bufs[pipe->curbuf];
1910 obuf = &bufs[nbuf];
1911
1912 if (rem >= ibuf->len) {
1913 *obuf = *ibuf;
1914 ibuf->ops = NULL;
1915 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
1916 pipe->nrbufs--;
1917 } else {
1918 ibuf->ops->get(pipe, ibuf);
1919 *obuf = *ibuf;
1920 obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1921 obuf->len = rem;
1922 ibuf->offset += obuf->len;
1923 ibuf->len -= obuf->len;
1924 }
1925 nbuf++;
1926 rem -= obuf->len;
1927 }
1928 pipe_unlock(pipe);
1929
1930 fuse_copy_init(&cs, fc, 0, NULL, nbuf);
1931 cs.pipebufs = bufs;
1932 cs.pipe = pipe;
1933
1934 if (flags & SPLICE_F_MOVE)
1935 cs.move_pages = 1;
1936
1937 ret = fuse_dev_do_write(fc, &cs, len);
1938
1939 for (idx = 0; idx < nbuf; idx++) {
1940 struct pipe_buffer *buf = &bufs[idx];
1941 buf->ops->release(pipe, buf);
1942 }
1943 out:
1944 kfree(bufs);
1945 return ret;
1946 }
1947
1948 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1949 {
1950 unsigned mask = POLLOUT | POLLWRNORM;
1951 struct fuse_conn *fc = fuse_get_conn(file);
1952 if (!fc)
1953 return POLLERR;
1954
1955 poll_wait(file, &fc->waitq, wait);
1956
1957 spin_lock(&fc->lock);
1958 if (!fc->connected)
1959 mask = POLLERR;
1960 else if (request_pending(fc))
1961 mask |= POLLIN | POLLRDNORM;
1962 spin_unlock(&fc->lock);
1963
1964 return mask;
1965 }
1966
1967 /*
1968 * Abort all requests on the given list (pending or processing)
1969 *
1970 * This function releases and reacquires fc->lock
1971 */
1972 static void end_requests(struct fuse_conn *fc, struct list_head *head)
1973 __releases(fc->lock)
1974 __acquires(fc->lock)
1975 {
1976 while (!list_empty(head)) {
1977 struct fuse_req *req;
1978 req = list_entry(head->next, struct fuse_req, list);
1979 req->out.h.error = -ECONNABORTED;
1980 request_end(fc, req);
1981 spin_lock(&fc->lock);
1982 }
1983 }
1984
1985 /*
1986 * Abort requests under I/O
1987 *
1988 * The requests are set to aborted and finished, and the request
1989 * waiter is woken up. This will make request_wait_answer() wait
1990 * until the request is unlocked and then return.
1991 *
1992 * If the request is asynchronous, then the end function needs to be
1993 * called after waiting for the request to be unlocked (if it was
1994 * locked).
1995 */
1996 static void end_io_requests(struct fuse_conn *fc)
1997 __releases(fc->lock)
1998 __acquires(fc->lock)
1999 {
2000 while (!list_empty(&fc->io)) {
2001 struct fuse_req *req =
2002 list_entry(fc->io.next, struct fuse_req, list);
2003 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
2004
2005 req->aborted = 1;
2006 req->out.h.error = -ECONNABORTED;
2007 req->state = FUSE_REQ_FINISHED;
2008 list_del_init(&req->list);
2009 wake_up(&req->waitq);
2010 if (end) {
2011 req->end = NULL;
2012 __fuse_get_request(req);
2013 spin_unlock(&fc->lock);
2014 wait_event(req->waitq, !req->locked);
2015 end(fc, req);
2016 fuse_put_request(fc, req);
2017 spin_lock(&fc->lock);
2018 }
2019 }
2020 }
2021
2022 static void end_queued_requests(struct fuse_conn *fc)
2023 __releases(fc->lock)
2024 __acquires(fc->lock)
2025 {
2026 fc->max_background = UINT_MAX;
2027 flush_bg_queue(fc);
2028 end_requests(fc, &fc->pending);
2029 end_requests(fc, &fc->processing);
2030 while (forget_pending(fc))
2031 kfree(dequeue_forget(fc, 1, NULL));
2032 }
2033
2034 static void end_polls(struct fuse_conn *fc)
2035 {
2036 struct rb_node *p;
2037
2038 p = rb_first(&fc->polled_files);
2039
2040 while (p) {
2041 struct fuse_file *ff;
2042 ff = rb_entry(p, struct fuse_file, polled_node);
2043 wake_up_interruptible_all(&ff->poll_wait);
2044
2045 p = rb_next(p);
2046 }
2047 }
2048
2049 /*
2050 * Abort all requests.
2051 *
2052 * Emergency exit in case of a malicious or accidental deadlock, or
2053 * just a hung filesystem.
2054 *
2055 * The same effect is usually achievable through killing the
2056 * filesystem daemon and all users of the filesystem. The exception
2057 * is the combination of an asynchronous request and the tricky
2058 * deadlock (see Documentation/filesystems/fuse.txt).
2059 *
2060 * During the aborting, progression of requests from the pending and
2061 * processing lists onto the io list, and progression of new requests
2062 * onto the pending list is prevented by req->connected being false.
2063 *
2064 * Progression of requests under I/O to the processing list is
2065 * prevented by the req->aborted flag being true for these requests.
2066 * For this reason requests on the io list must be aborted first.
2067 */
2068 void fuse_abort_conn(struct fuse_conn *fc)
2069 {
2070 spin_lock(&fc->lock);
2071 if (fc->connected) {
2072 fc->connected = 0;
2073 fc->blocked = 0;
2074 end_io_requests(fc);
2075 end_queued_requests(fc);
2076 end_polls(fc);
2077 wake_up_all(&fc->waitq);
2078 wake_up_all(&fc->blocked_waitq);
2079 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
2080 }
2081 spin_unlock(&fc->lock);
2082 }
2083 EXPORT_SYMBOL_GPL(fuse_abort_conn);
2084
2085 int fuse_dev_release(struct inode *inode, struct file *file)
2086 {
2087 struct fuse_conn *fc = fuse_get_conn(file);
2088 if (fc) {
2089 spin_lock(&fc->lock);
2090 fc->connected = 0;
2091 fc->blocked = 0;
2092 end_queued_requests(fc);
2093 end_polls(fc);
2094 wake_up_all(&fc->blocked_waitq);
2095 spin_unlock(&fc->lock);
2096 fuse_conn_put(fc);
2097 }
2098
2099 return 0;
2100 }
2101 EXPORT_SYMBOL_GPL(fuse_dev_release);
2102
2103 static int fuse_dev_fasync(int fd, struct file *file, int on)
2104 {
2105 struct fuse_conn *fc = fuse_get_conn(file);
2106 if (!fc)
2107 return -EPERM;
2108
2109 /* No locking - fasync_helper does its own locking */
2110 return fasync_helper(fd, file, on, &fc->fasync);
2111 }
2112
2113 const struct file_operations fuse_dev_operations = {
2114 .owner = THIS_MODULE,
2115 .llseek = no_llseek,
2116 .read = do_sync_read,
2117 .aio_read = fuse_dev_read,
2118 .splice_read = fuse_dev_splice_read,
2119 .write = do_sync_write,
2120 .aio_write = fuse_dev_write,
2121 .splice_write = fuse_dev_splice_write,
2122 .poll = fuse_dev_poll,
2123 .release = fuse_dev_release,
2124 .fasync = fuse_dev_fasync,
2125 };
2126 EXPORT_SYMBOL_GPL(fuse_dev_operations);
2127
2128 static struct miscdevice fuse_miscdevice = {
2129 .minor = FUSE_MINOR,
2130 .name = "fuse",
2131 .fops = &fuse_dev_operations,
2132 };
2133
2134 int __init fuse_dev_init(void)
2135 {
2136 int err = -ENOMEM;
2137 fuse_req_cachep = kmem_cache_create("fuse_request",
2138 sizeof(struct fuse_req),
2139 0, 0, NULL);
2140 if (!fuse_req_cachep)
2141 goto out;
2142
2143 err = misc_register(&fuse_miscdevice);
2144 if (err)
2145 goto out_cache_clean;
2146
2147 return 0;
2148
2149 out_cache_clean:
2150 kmem_cache_destroy(fuse_req_cachep);
2151 out:
2152 return err;
2153 }
2154
2155 void fuse_dev_cleanup(void)
2156 {
2157 misc_deregister(&fuse_miscdevice);
2158 kmem_cache_destroy(fuse_req_cachep);
2159 }