From 724bb09fdc06d4ff03757b25d6dba9ef1b133e8f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Jun 2015 12:02:56 -0400 Subject: [PATCH] ufs: don't use lock_ufs() for block pointers tree protection * stores to block pointers are under per-inode seqlock (meta_lock) and mutex (truncate_mutex) * fetches of block pointers are either under truncate_mutex, or wrapped into seqretry loop on meta_lock * all changes of ->i_size are under truncate_mutex and i_mutex * all changes of ->i_lastfrag are under truncate_mutex It's similar to what ext2 is doing; the main difference is that unlike ext2 we can't rely upon the atomicity of stores into block pointers - on UFS2 they are 64bit. So we can't cut the corner when switching a pointer from NULL to non-NULL as we could in ext2_splice_branch() and need to use meta_lock on all modifications. We use seqlock where ext2 uses rwlock; ext2 could probably also benefit from such change... Another non-trivial difference is that with UFS we *cannot* have reader grab truncate_mutex in case of race - it has to keep retrying. That might be possible to change, but not until we lift tail unpacking several levels up in call chain. After that commit we do *NOT* hold fs-wide serialization on accesses to block pointers anymore. Moreover, lock_ufs() can become a normal mutex now - it's only used on statfs, remount and sync_fs and none of those uses are recursive. As the matter of fact, *now* it can be collapsed with ->s_lock, and be eventually replaced with saner per-cylinder-group spinlocks, but that's a separate story. Signed-off-by: Al Viro --- fs/ufs/balloc.c | 4 ++ fs/ufs/inode.c | 138 +++++++++++++++++++++++++++++++--------------- fs/ufs/super.c | 2 + fs/ufs/truncate.c | 22 +++++++- fs/ufs/ufs.h | 2 + 5 files changed, 121 insertions(+), 47 deletions(-) diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index a7106eda5024..fb8b54eb77c5 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -417,7 +417,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, if (oldcount == 0) { result = ufs_alloc_fragments (inode, cgno, goal, count, err); if (result) { + write_seqlock(&UFS_I(inode)->meta_lock); ufs_cpu_to_data_ptr(sb, p, result); + write_sequnlock(&UFS_I(inode)->meta_lock); *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); @@ -473,7 +475,9 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment, ufs_change_blocknr(inode, fragment - oldcount, oldcount, uspi->s_sbbase + tmp, uspi->s_sbbase + result, locked_page); + write_seqlock(&UFS_I(inode)->meta_lock); ufs_cpu_to_data_ptr(sb, p, result); + write_sequnlock(&UFS_I(inode)->meta_lock); *err = 0; UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag, fragment + count); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index a4fc3adfdc4c..100f93c6b309 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -41,8 +41,6 @@ #include "swab.h" #include "util.h" -static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock); - static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4]) { struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi; @@ -75,12 +73,53 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off return n; } +typedef struct { + void *p; + union { + __fs32 key32; + __fs64 key64; + }; + struct buffer_head *bh; +} Indirect; + +static inline int grow_chain32(struct ufs_inode_info *ufsi, + struct buffer_head *bh, __fs32 *v, + Indirect *from, Indirect *to) +{ + Indirect *p; + unsigned seq; + to->bh = bh; + do { + seq = read_seqbegin(&ufsi->meta_lock); + to->key32 = *(__fs32 *)(to->p = v); + for (p = from; p <= to && p->key32 == *(__fs32 *)p->p; p++) + ; + } while (read_seqretry(&ufsi->meta_lock, seq)); + return (p > to); +} + +static inline int grow_chain64(struct ufs_inode_info *ufsi, + struct buffer_head *bh, __fs64 *v, + Indirect *from, Indirect *to) +{ + Indirect *p; + unsigned seq; + to->bh = bh; + do { + seq = read_seqbegin(&ufsi->meta_lock); + to->key64 = *(__fs64 *)(to->p = v); + for (p = from; p <= to && p->key64 == *(__fs64 *)p->p; p++) + ; + } while (read_seqretry(&ufsi->meta_lock, seq)); + return (p > to); +} + /* * Returns the location of the fragment from * the beginning of the filesystem. */ -static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) +static u64 ufs_frag_map(struct inode *inode, sector_t frag) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; @@ -88,12 +127,10 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) u64 mask = (u64) uspi->s_apbmask>>uspi->s_fpbshift; int shift = uspi->s_apbshift-uspi->s_fpbshift; sector_t offsets[4], *p; + Indirect chain[4], *q = chain; int depth = ufs_block_to_path(inode, frag >> uspi->s_fpbshift, offsets); - u64 ret = 0L; - __fs32 block; - __fs64 u2_block = 0L; unsigned flags = UFS_SB(sb)->s_flags; - u64 temp = 0L; + u64 res = 0; UFSD(": frag = %llu depth = %d\n", (unsigned long long)frag, depth); UFSD(": uspi->s_fpbshift = %d ,uspi->s_apbmask = %x, mask=%llx\n", @@ -101,59 +138,73 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) (unsigned long long)mask); if (depth == 0) - return 0; + goto no_block; +again: p = offsets; - if (needs_lock) - lock_ufs(sb); if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) goto ufs2; - block = ufsi->i_u1.i_data[*p++]; - if (!block) - goto out; + if (!grow_chain32(ufsi, NULL, &ufsi->i_u1.i_data[*p++], chain, q)) + goto changed; + if (!q->key32) + goto no_block; while (--depth) { + __fs32 *ptr; struct buffer_head *bh; sector_t n = *p++; - bh = sb_bread(sb, uspi->s_sbbase + fs32_to_cpu(sb, block)+(n>>shift)); + bh = sb_bread(sb, uspi->s_sbbase + + fs32_to_cpu(sb, q->key32) + (n>>shift)); if (!bh) - goto out; - block = ((__fs32 *) bh->b_data)[n & mask]; - brelse (bh); - if (!block) - goto out; + goto no_block; + ptr = (__fs32 *)bh->b_data + (n & mask); + if (!grow_chain32(ufsi, bh, ptr, chain, ++q)) + goto changed; + if (!q->key32) + goto no_block; } - ret = (u64) (uspi->s_sbbase + fs32_to_cpu(sb, block) + (frag & uspi->s_fpbmask)); - goto out; -ufs2: - u2_block = ufsi->i_u1.u2_i_data[*p++]; - if (!u2_block) - goto out; + res = fs32_to_cpu(sb, q->key32); + goto found; +ufs2: + if (!grow_chain64(ufsi, NULL, &ufsi->i_u1.u2_i_data[*p++], chain, q)) + goto changed; + if (!q->key64) + goto no_block; while (--depth) { + __fs64 *ptr; struct buffer_head *bh; sector_t n = *p++; - - temp = (u64)(uspi->s_sbbase) + fs64_to_cpu(sb, u2_block); - bh = sb_bread(sb, temp +(u64) (n>>shift)); + bh = sb_bread(sb, uspi->s_sbbase + + fs64_to_cpu(sb, q->key64) + (n>>shift)); if (!bh) - goto out; - u2_block = ((__fs64 *)bh->b_data)[n & mask]; - brelse(bh); - if (!u2_block) - goto out; + goto no_block; + ptr = (__fs64 *)bh->b_data + (n & mask); + if (!grow_chain64(ufsi, bh, ptr, chain, ++q)) + goto changed; + if (!q->key64) + goto no_block; + } + res = fs64_to_cpu(sb, q->key64); +found: + res += uspi->s_sbbase + (frag & uspi->s_fpbmask); +no_block: + while (q > chain) { + brelse(q->bh); + q--; } - temp = (u64)uspi->s_sbbase + fs64_to_cpu(sb, u2_block); - ret = temp + (u64) (frag & uspi->s_fpbmask); + return res; -out: - if (needs_lock) - unlock_ufs(sb); - return ret; +changed: + while (q > chain) { + brelse(q->bh); + q--; + } + goto again; } /** @@ -421,10 +472,9 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head int ret, err, new; unsigned long ptr,phys; u64 phys64 = 0; - bool needs_lock = (sbi->mutex_owner != current); if (!create) { - phys64 = ufs_frag_map(inode, fragment, needs_lock); + phys64 = ufs_frag_map(inode, fragment); UFSD("phys64 = %llu\n", (unsigned long long)phys64); if (phys64) map_bh(bh_result, sb, phys64); @@ -438,8 +488,7 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head ret = 0; bh = NULL; - if (needs_lock) - lock_ufs(sb); + mutex_lock(&UFS_I(inode)->truncate_mutex); UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); if (fragment > @@ -501,8 +550,7 @@ out: set_buffer_new(bh_result); map_bh(bh_result, sb, phys); abort: - if (needs_lock) - unlock_ufs(sb); + mutex_unlock(&UFS_I(inode)->truncate_mutex); return err; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 250579a80d90..15cd3338340c 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1429,6 +1429,8 @@ static struct inode *ufs_alloc_inode(struct super_block *sb) return NULL; ei->vfs_inode.i_version = 1; + seqlock_init(&ei->meta_lock); + mutex_init(&ei->truncate_mutex); return &ei->vfs_inode; } diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 9908a6045d7a..ad34b7f4b499 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -128,7 +128,9 @@ next1: tmp = ufs_data_ptr_to_cpu(sb, p); if (!tmp) continue; + write_seqlock(&ufsi->meta_lock); ufs_data_ptr_clear(uspi, p); + write_sequnlock(&ufsi->meta_lock); if (free_count == 0) { frag_to_free = tmp; @@ -157,7 +159,9 @@ next1: if (!tmp ) ufs_panic(sb, "ufs_truncate_direct", "internal error"); frag4 = ufs_fragnum (frag4); + write_seqlock(&ufsi->meta_lock); ufs_data_ptr_clear(uspi, p); + write_sequnlock(&ufsi->meta_lock); ufs_free_fragments (inode, tmp, frag4); mark_inode_dirty(inode); @@ -199,7 +203,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) return 1; } if (!ind_ubh) { + write_seqlock(&UFS_I(inode)->meta_lock); ufs_data_ptr_clear(uspi, p); + write_sequnlock(&UFS_I(inode)->meta_lock); return 0; } @@ -210,7 +216,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) if (!tmp) continue; + write_seqlock(&UFS_I(inode)->meta_lock); ufs_data_ptr_clear(uspi, ind); + write_sequnlock(&UFS_I(inode)->meta_lock); ubh_mark_buffer_dirty(ind_ubh); if (free_count == 0) { frag_to_free = tmp; @@ -235,7 +243,9 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) break; if (i >= uspi->s_apb) { tmp = ufs_data_ptr_to_cpu(sb, p); + write_seqlock(&UFS_I(inode)->meta_lock); ufs_data_ptr_clear(uspi, p); + write_sequnlock(&UFS_I(inode)->meta_lock); ubh_bforget(ind_ubh); ufs_free_blocks (inode, tmp, uspi->s_fpb); @@ -278,7 +288,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) return 1; } if (!dind_bh) { + write_seqlock(&UFS_I(inode)->meta_lock); ufs_data_ptr_clear(uspi, p); + write_sequnlock(&UFS_I(inode)->meta_lock); return 0; } @@ -297,7 +309,9 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) break; if (i >= uspi->s_apb) { tmp = ufs_data_ptr_to_cpu(sb, p); + write_seqlock(&UFS_I(inode)->meta_lock); ufs_data_ptr_clear(uspi, p); + write_sequnlock(&UFS_I(inode)->meta_lock); ubh_bforget(dind_bh); ufs_free_blocks(inode, tmp, uspi->s_fpb); @@ -339,7 +353,9 @@ static int ufs_trunc_tindirect(struct inode *inode) return 1; } if (!tind_bh) { + write_seqlock(&ufsi->meta_lock); ufs_data_ptr_clear(uspi, p); + write_sequnlock(&ufsi->meta_lock); return 0; } @@ -355,7 +371,9 @@ static int ufs_trunc_tindirect(struct inode *inode) break; if (i >= uspi->s_apb) { tmp = ufs_data_ptr_to_cpu(sb, p); + write_seqlock(&ufsi->meta_lock); ufs_data_ptr_clear(uspi, p); + write_sequnlock(&ufsi->meta_lock); ubh_bforget(tind_bh); ufs_free_blocks(inode, tmp, uspi->s_fpb); @@ -447,7 +465,7 @@ static void __ufs_truncate_blocks(struct inode *inode) struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; int retry; - lock_ufs(sb); + mutex_lock(&ufsi->truncate_mutex); while (1) { retry = ufs_trunc_direct(inode); retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK, @@ -465,7 +483,7 @@ static void __ufs_truncate_blocks(struct inode *inode) } ufsi->i_lastfrag = DIRECT_FRAGMENT; - unlock_ufs(sb); + mutex_unlock(&ufsi->truncate_mutex); } int ufs_truncate(struct inode *inode, loff_t size) diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 43fcab381de1..ea28b73a8b74 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -46,6 +46,8 @@ struct ufs_inode_info { __u32 i_oeftflag; __u16 i_osync; __u64 i_lastfrag; + seqlock_t meta_lock; + struct mutex truncate_mutex; __u32 i_dir_start_lookup; struct inode vfs_inode; }; -- 2.20.1