From 3cc3f710ce0effe397b830826a1a081fa81f11c7 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 15 Oct 2007 15:40:33 +0100 Subject: [PATCH] [GFS2] Use ->page_mkwrite() for mmap() This cleans up the mmap() code path for GFS2 by implementing the page_mkwrite function for GFS2. We are thus able to use the generic filemap_fault function for our ->fault() implementation. This now means that shared writable mappings will be much more efficiently shared across the cluster if there is a reasonable proportion of read activity (the greater proportion, the better). As a side effect, it also reduces the size of the code, removes special cases from readpage and readpages, and makes the code path easier to follow. Signed-off-by: Steven Whitehouse --- fs/gfs2/Makefile | 2 +- fs/gfs2/glops.c | 9 +-- fs/gfs2/incore.h | 8 -- fs/gfs2/ops_address.c | 45 ++--------- fs/gfs2/ops_file.c | 131 +++++++++++++++++++++++++++++--- fs/gfs2/ops_vm.c | 169 ------------------------------------------ fs/gfs2/ops_vm.h | 18 ----- 7 files changed, 131 insertions(+), 251 deletions(-) delete mode 100644 fs/gfs2/ops_vm.c delete mode 100644 fs/gfs2/ops_vm.h diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 04ad0caebedb..8fff11058cee 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_GFS2_FS) += gfs2.o gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ - ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ + ops_fstype.o ops_inode.o ops_super.o quota.o \ recovery.o rgrp.o super.o sys.o trans.o util.o obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4670dcb2a877..110f03d66f4b 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -86,15 +86,10 @@ static void gfs2_pte_inval(struct gfs2_glock *gl) if (!ip || !S_ISREG(inode->i_mode)) return; - if (!test_bit(GIF_PAGED, &ip->i_flags)) - return; - unmap_shared_mapping_range(inode->i_mapping, 0, 0); - if (test_bit(GIF_SW_PAGED, &ip->i_flags)) set_bit(GLF_DIRTY, &gl->gl_flags); - clear_bit(GIF_SW_PAGED, &ip->i_flags); } /** @@ -234,10 +229,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) set_bit(GIF_INVALID, &ip->i_flags); } - if (ip && S_ISREG(ip->i_inode.i_mode)) { + if (ip && S_ISREG(ip->i_inode.i_mode)) truncate_inode_pages(ip->i_inode.i_mapping, 0); - clear_bit(GIF_PAGED, &ip->i_flags); - } } /** diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 662182bfbff7..55c72f01cf31 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -241,7 +241,6 @@ struct gfs2_alloc { enum { GIF_INVALID = 0, GIF_QD_LOCKED = 1, - GIF_PAGED = 2, GIF_SW_PAGED = 3, }; @@ -289,19 +288,12 @@ static inline struct gfs2_inode *GFS2_I(struct inode *inode) return container_of(inode, struct gfs2_inode, i_inode); } -/* To be removed? */ static inline struct gfs2_sbd *GFS2_SB(struct inode *inode) { return inode->i_sb->s_fs_info; } -enum { - GFF_DID_DIRECT_ALLOC = 0, - GFF_EXLOCK = 1, -}; - struct gfs2_file { - unsigned long f_flags; /* GFF_... */ struct mutex f_fl_mutex; struct gfs2_holder f_fl_gh; }; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 9bb24b1d9c05..1696e5d9d112 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -265,9 +265,7 @@ static int __gfs2_readpage(void *file, struct page *page) * @file: The file to read * @page: The page of the file * - * This deals with the locking required. If the GFF_EXLOCK flags is set - * then we already hold the glock (due to page fault) and thus we call - * __gfs2_readpage() directly. Otherwise we use a trylock in order to + * This deals with the locking required. We use a trylock in order to * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE * in the event that we are unable to get the lock. */ @@ -278,12 +276,6 @@ static int gfs2_readpage(struct file *file, struct page *page) struct gfs2_holder gh; int error; - if (file) { - struct gfs2_file *gf = file->private_data; - if (test_bit(GFF_EXLOCK, &gf->f_flags)) - return __gfs2_readpage(file, page); - } - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); error = gfs2_glock_nq_atime(&gh); if (unlikely(error)) { @@ -354,9 +346,8 @@ int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state, * 2. We don't handle stuffed files here we let readpage do the honours. * 3. mpage_readpages() does most of the heavy lifting in the common case. * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places. - * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as - * well as read-ahead. */ + static int gfs2_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { @@ -364,40 +355,20 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder gh; - int ret = 0; - int do_unlock = 0; + int ret; - if (file) { - struct gfs2_file *gf = file->private_data; - if (test_bit(GFF_EXLOCK, &gf->f_flags)) - goto skip_lock; - } - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_TRY_1CB|GL_ATIME, &gh); - do_unlock = 1; + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); ret = gfs2_glock_nq_atime(&gh); - if (ret == GLR_TRYFAILED) - goto out_noerror; if (unlikely(ret)) - goto out_unlock; -skip_lock: + goto out_uninit; if (!gfs2_is_stuffed(ip)) ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block); - - if (do_unlock) { - gfs2_glock_dq_m(1, &gh); - gfs2_holder_uninit(&gh); - } -out: + gfs2_glock_dq(&gh); +out_uninit: + gfs2_holder_uninit(&gh); if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) ret = -EIO; return ret; -out_noerror: - ret = 0; -out_unlock: - if (do_unlock) - gfs2_holder_uninit(&gh); - goto out; } /** diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index a729c86b8be1..6f3aeb059c61 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -33,7 +33,6 @@ #include "lm.h" #include "log.h" #include "meta_io.h" -#include "ops_vm.h" #include "quota.h" #include "rgrp.h" #include "trans.h" @@ -169,7 +168,7 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) if (put_user(fsflags, ptr)) error = -EFAULT; - gfs2_glock_dq_m(1, &gh); + gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); return error; } @@ -293,6 +292,125 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -ENOTTY; } +/** + * gfs2_allocate_page_backing - Use bmap to allocate blocks + * @page: The (locked) page to allocate backing for + * + * We try to allocate all the blocks required for the page in + * one go. This might fail for various reasons, so we keep + * trying until all the blocks to back this page are allocated. + * If some of the blocks are already allocated, thats ok too. + */ + +static int gfs2_allocate_page_backing(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct buffer_head bh; + unsigned long size = PAGE_CACHE_SIZE; + u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); + + do { + bh.b_state = 0; + bh.b_size = size; + gfs2_block_map(inode, lblock, 1, &bh); + if (!buffer_mapped(&bh)) + return -EIO; + size -= bh.b_size; + lblock += (bh.b_size >> inode->i_blkbits); + } while(size > 0); + return 0; +} + +/** + * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable + * @vma: The virtual memory area + * @page: The page which is about to become writable + * + * When the page becomes writable, we need to ensure that we have + * blocks allocated on disk to back that page. + */ + +static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + unsigned long last_index; + u64 pos = page->index << (PAGE_CACHE_SIZE - inode->i_blkbits); + unsigned int data_blocks, ind_blocks, rblocks; + int alloc_required = 0; + struct gfs2_holder gh; + struct gfs2_alloc *al; + int ret; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); + ret = gfs2_glock_nq_atime(&gh); + if (ret) + goto out; + + set_bit(GIF_SW_PAGED, &ip->i_flags); + gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); + ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); + if (ret || !alloc_required) + goto out_unlock; + + ip->i_alloc.al_requested = 0; + al = gfs2_alloc_get(ip); + ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (ret) + goto out_alloc_put; + ret = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); + if (ret) + goto out_quota_unlock; + al->al_requested = data_blocks + ind_blocks; + ret = gfs2_inplace_reserve(ip); + if (ret) + goto out_quota_unlock; + + rblocks = RES_DINODE + ind_blocks; + if (gfs2_is_jdata(ip)) + rblocks += data_blocks ? data_blocks : 1; + if (ind_blocks || data_blocks) + rblocks += RES_STATFS + RES_QUOTA; + ret = gfs2_trans_begin(sdp, rblocks, 0); + if (ret) + goto out_trans_fail; + + lock_page(page); + ret = -EINVAL; + last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT; + if (page->index > last_index) + goto out_unlock_page; + if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping) + goto out_unlock_page; + if (gfs2_is_stuffed(ip)) { + ret = gfs2_unstuff_dinode(ip, page); + if (ret) + goto out_unlock_page; + } + ret = gfs2_allocate_page_backing(page); + +out_unlock_page: + unlock_page(page); + gfs2_trans_end(sdp); +out_trans_fail: + gfs2_inplace_release(ip); +out_quota_unlock: + gfs2_quota_unlock(ip); +out_alloc_put: + gfs2_alloc_put(ip); +out_unlock: + gfs2_glock_dq(&gh); +out: + gfs2_holder_uninit(&gh); + return ret; +} + +static struct vm_operations_struct gfs2_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = gfs2_page_mkwrite, +}; + /** * gfs2_mmap - @@ -315,14 +433,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) return error; } - /* This is VM_MAYWRITE instead of VM_WRITE because a call - to mprotect() can turn on VM_WRITE later. */ - - if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == - (VM_MAYSHARE | VM_MAYWRITE)) - vma->vm_ops = &gfs2_vm_ops_sharewrite; - else - vma->vm_ops = &gfs2_vm_ops_private; + vma->vm_ops = &gfs2_vm_ops; gfs2_glock_dq_uninit(&i_gh); diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c deleted file mode 100644 index 927d739d4685..000000000000 --- a/fs/gfs2/ops_vm.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gfs2.h" -#include "incore.h" -#include "bmap.h" -#include "glock.h" -#include "inode.h" -#include "ops_vm.h" -#include "quota.h" -#include "rgrp.h" -#include "trans.h" -#include "util.h" - -static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); - - set_bit(GIF_PAGED, &ip->i_flags); - return filemap_fault(vma, vmf); -} - -static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) -{ - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - unsigned long index = page->index; - u64 lblock = index << (PAGE_CACHE_SHIFT - - sdp->sd_sb.sb_bsize_shift); - unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift; - struct gfs2_alloc *al; - unsigned int data_blocks, ind_blocks; - unsigned int x; - int error; - - al = gfs2_alloc_get(ip); - - error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); - if (error) - goto out; - - error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid); - if (error) - goto out_gunlock_q; - - gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - - al->al_requested = data_blocks + ind_blocks; - - error = gfs2_inplace_reserve(ip); - if (error) - goto out_gunlock_q; - - error = gfs2_trans_begin(sdp, al->al_rgd->rd_length + - ind_blocks + RES_DINODE + - RES_STATFS + RES_QUOTA, 0); - if (error) - goto out_ipres; - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL); - if (error) - goto out_trans; - } - - for (x = 0; x < blocks; ) { - u64 dblock; - unsigned int extlen; - int new = 1; - - error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen); - if (error) - goto out_trans; - - lblock += extlen; - x += extlen; - } - - gfs2_assert_warn(sdp, al->al_alloced); - -out_trans: - gfs2_trans_end(sdp); -out_ipres: - gfs2_inplace_release(ip); -out_gunlock_q: - gfs2_quota_unlock(ip); -out: - gfs2_alloc_put(ip); - return error; -} - -static int gfs2_sharewrite_fault(struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct gfs2_file *gf = file->private_data; - struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - struct gfs2_holder i_gh; - int alloc_required; - int error; - int ret = 0; - - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); - if (error) - goto out; - - set_bit(GIF_PAGED, &ip->i_flags); - set_bit(GIF_SW_PAGED, &ip->i_flags); - - error = gfs2_write_alloc_required(ip, - (u64)vmf->pgoff << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, &alloc_required); - if (error) { - ret = VM_FAULT_OOM; /* XXX: are these right? */ - goto out_unlock; - } - - set_bit(GFF_EXLOCK, &gf->f_flags); - ret = filemap_fault(vma, vmf); - clear_bit(GFF_EXLOCK, &gf->f_flags); - if (ret & VM_FAULT_ERROR) - goto out_unlock; - - if (alloc_required) { - /* XXX: do we need to drop page lock around alloc_page_backing?*/ - error = alloc_page_backing(ip, vmf->page); - if (error) { - /* - * VM_FAULT_LOCKED should always be the case for - * filemap_fault, but it may not be in a future - * implementation. - */ - if (ret & VM_FAULT_LOCKED) - unlock_page(vmf->page); - page_cache_release(vmf->page); - ret = VM_FAULT_OOM; - goto out_unlock; - } - set_page_dirty(vmf->page); - } - -out_unlock: - gfs2_glock_dq_uninit(&i_gh); -out: - return ret; -} - -struct vm_operations_struct gfs2_vm_ops_private = { - .fault = gfs2_private_fault, -}; - -struct vm_operations_struct gfs2_vm_ops_sharewrite = { - .fault = gfs2_sharewrite_fault, -}; - diff --git a/fs/gfs2/ops_vm.h b/fs/gfs2/ops_vm.h deleted file mode 100644 index 4ae8f43ed5e3..000000000000 --- a/fs/gfs2/ops_vm.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. - */ - -#ifndef __OPS_VM_DOT_H__ -#define __OPS_VM_DOT_H__ - -#include - -extern struct vm_operations_struct gfs2_vm_ops_private; -extern struct vm_operations_struct gfs2_vm_ops_sharewrite; - -#endif /* __OPS_VM_DOT_H__ */ -- 2.20.1