ocfs2: Remove data locks
authorMark Fasheh <mark.fasheh@oracle.com>
Thu, 18 Oct 2007 22:23:46 +0000 (15:23 -0700)
committerMark Fasheh <mark.fasheh@oracle.com>
Fri, 25 Jan 2008 22:45:57 +0000 (14:45 -0800)
The meta lock now covers both meta data and data, so this just removes the
now-redundant data lock.

Combining locks saves us a round of lock mastery per inode and one less lock
to ping between nodes during read/write.

We don't lose much - since meta locks were always held before a data lock
(and at the same level) ordered writeout mode (the default) ensured that
flushing for the meta data lock also pushed out data anyways.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
fs/ocfs2/aops.c
fs/ocfs2/cluster/tcp_internal.h
fs/ocfs2/dlmglue.c
fs/ocfs2/dlmglue.h
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/inode.h
fs/ocfs2/mmap.c
fs/ocfs2/super.c

index 56f7790cad46d6bb0ca7c7805e62df8561d7dbb0..5fc27cfaee50827136d661967cc6d0744107c9f1 100644 (file)
@@ -305,21 +305,12 @@ static int ocfs2_readpage(struct file *file, struct page *page)
                goto out_alloc;
        }
 
-       ret = ocfs2_data_lock_with_page(inode, 0, page);
-       if (ret != 0) {
-               if (ret == AOP_TRUNCATED_PAGE)
-                       unlock = 0;
-               mlog_errno(ret);
-               goto out_alloc;
-       }
-
        if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
                ret = ocfs2_readpage_inline(inode, page);
        else
                ret = block_read_full_page(page, ocfs2_get_block);
        unlock = 0;
 
-       ocfs2_data_unlock(inode, 0);
 out_alloc:
        up_read(&OCFS2_I(inode)->ip_alloc_sem);
 out_meta_unlock:
@@ -638,34 +629,12 @@ static ssize_t ocfs2_direct_IO(int rw,
        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
                return 0;
 
-       if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
-               /*
-                * We get PR data locks even for O_DIRECT.  This
-                * allows concurrent O_DIRECT I/O but doesn't let
-                * O_DIRECT with extending and buffered zeroing writes
-                * race.  If they did race then the buffered zeroing
-                * could be written back after the O_DIRECT I/O.  It's
-                * one thing to tell people not to mix buffered and
-                * O_DIRECT writes, but expecting them to understand
-                * that file extension is also an implicit buffered
-                * write is too much.  By getting the PR we force
-                * writeback of the buffered zeroing before
-                * proceeding.
-                */
-               ret = ocfs2_data_lock(inode, 0);
-               if (ret < 0) {
-                       mlog_errno(ret);
-                       goto out;
-               }
-               ocfs2_data_unlock(inode, 0);
-       }
-
        ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
                                            inode->i_sb->s_bdev, iov, offset,
                                            nr_segs, 
                                            ocfs2_direct_IO_get_blocks,
                                            ocfs2_dio_end_io);
-out:
+
        mlog_exit(ret);
        return ret;
 }
@@ -1769,25 +1738,17 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
         */
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
-       ret = ocfs2_data_lock(inode, 1);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_fail;
-       }
-
        ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
                                       fsdata, di_bh, NULL);
        if (ret) {
                mlog_errno(ret);
-               goto out_fail_data;
+               goto out_fail;
        }
 
        brelse(di_bh);
 
        return 0;
 
-out_fail_data:
-       ocfs2_data_unlock(inode, 1);
 out_fail:
        up_write(&OCFS2_I(inode)->ip_alloc_sem);
 
@@ -1908,7 +1869,6 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
 
        ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
 
-       ocfs2_data_unlock(inode, 1);
        up_write(&OCFS2_I(inode)->ip_alloc_sem);
        ocfs2_meta_unlock(inode, 1);
 
index 79bd6665b3ca087c94eb7d2b917637786026b648..b2e832aca5679cc1bd52d05c826d59cff1954fbb 100644 (file)
@@ -38,6 +38,9 @@
  * locking semantics of the file system using the protocol.  It should 
  * be somewhere else, I'm sure, but right now it isn't.
  *
+ * New in version 10:
+ *     - Meta/data locks combined
+ *
  * New in version 9:
  *     - All votes removed
  *
@@ -63,7 +66,7 @@
  *     - full 64 bit i_size in the metadata lock lvbs
  *     - introduction of "rw" lock and pushing meta/data locking down
  */
-#define O2NET_PROTOCOL_VERSION 9ULL
+#define O2NET_PROTOCOL_VERSION 10ULL
 struct o2net_handshake {
        __be64  protocol_version;
        __be64  connector_id;
index 7e36abea8f40e056bc226309b8462cfbb88a0125..ecf58c6e2fa346ea6c0ac99b42710afb386ca489 100644 (file)
@@ -232,12 +232,6 @@ static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
        .flags          = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
 };
 
-static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
-       .get_osb        = ocfs2_get_inode_osb,
-       .downconvert_worker = ocfs2_data_convert_worker,
-       .flags          = 0,
-};
-
 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
        .flags          = LOCK_TYPE_REQUIRES_REFRESH,
 };
@@ -261,7 +255,6 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
 {
        return lockres->l_type == OCFS2_LOCK_TYPE_META ||
-               lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
                lockres->l_type == OCFS2_LOCK_TYPE_RW ||
                lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
 }
@@ -405,9 +398,6 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
                case OCFS2_LOCK_TYPE_META:
                        ops = &ocfs2_inode_meta_lops;
                        break;
-               case OCFS2_LOCK_TYPE_DATA:
-                       ops = &ocfs2_inode_data_lops;
-                       break;
                case OCFS2_LOCK_TYPE_OPEN:
                        ops = &ocfs2_inode_open_lops;
                        break;
@@ -1154,12 +1144,6 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
                goto bail;
        }
 
-       ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
-       if (ret) {
-               mlog_errno(ret);
-               goto bail;
-       }
-
        ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_open_lockres, 0, 0);
        if (ret) {
                mlog_errno(ret);
@@ -1312,67 +1296,6 @@ out:
        mlog_exit_void();
 }
 
-int ocfs2_data_lock_full(struct inode *inode,
-                        int write,
-                        int arg_flags)
-{
-       int status = 0, level;
-       struct ocfs2_lock_res *lockres;
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
-       BUG_ON(!inode);
-
-       mlog_entry_void();
-
-       mlog(0, "inode %llu take %s DATA lock\n",
-            (unsigned long long)OCFS2_I(inode)->ip_blkno,
-            write ? "EXMODE" : "PRMODE");
-
-       /* We'll allow faking a readonly data lock for
-        * rodevices. */
-       if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) {
-               if (write) {
-                       status = -EROFS;
-                       mlog_errno(status);
-               }
-               goto out;
-       }
-
-       if (ocfs2_mount_local(osb))
-               goto out;
-
-       lockres = &OCFS2_I(inode)->ip_data_lockres;
-
-       level = write ? LKM_EXMODE : LKM_PRMODE;
-
-       status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level,
-                                   0, arg_flags);
-       if (status < 0 && status != -EAGAIN)
-               mlog_errno(status);
-
-out:
-       mlog_exit(status);
-       return status;
-}
-
-/* see ocfs2_meta_lock_with_page() */
-int ocfs2_data_lock_with_page(struct inode *inode,
-                             int write,
-                             struct page *page)
-{
-       int ret;
-
-       ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK);
-       if (ret == -EAGAIN) {
-               unlock_page(page);
-               if (ocfs2_data_lock(inode, write) == 0)
-                       ocfs2_data_unlock(inode, write);
-               ret = AOP_TRUNCATED_PAGE;
-       }
-
-       return ret;
-}
-
 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
                                        struct ocfs2_lock_res *lockres)
 {
@@ -1404,26 +1327,6 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
        mlog_exit_void();
 }
 
-void ocfs2_data_unlock(struct inode *inode,
-                      int write)
-{
-       int level = write ? LKM_EXMODE : LKM_PRMODE;
-       struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres;
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
-       mlog_entry_void();
-
-       mlog(0, "inode %llu drop %s DATA lock\n",
-            (unsigned long long)OCFS2_I(inode)->ip_blkno,
-            write ? "EXMODE" : "PRMODE");
-
-       if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) &&
-           !ocfs2_mount_local(osb))
-               ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
-
-       mlog_exit_void();
-}
-
 #define OCFS2_SEC_BITS   34
 #define OCFS2_SEC_SHIFT  (64 - 34)
 #define OCFS2_NSEC_MASK  ((1ULL << OCFS2_SEC_SHIFT) - 1)
@@ -2591,13 +2494,6 @@ int ocfs2_drop_inode_locks(struct inode *inode)
 
        status = err;
 
-       err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
-                             &OCFS2_I(inode)->ip_data_lockres);
-       if (err < 0)
-               mlog_errno(err);
-       if (err < 0 && !status)
-               status = err;
-
        err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
                              &OCFS2_I(inode)->ip_meta_lockres);
        if (err < 0)
index 931f6ee55146b7f8953df444644cdbea848ac907..3fd7729daeef080ba5b9b4359d9d00e1adf66933 100644 (file)
@@ -49,7 +49,7 @@ struct ocfs2_meta_lvb {
        __be32       lvb_reserved2;
 };
 
-/* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
+/* ocfs2_meta_lock_full() 'arg_flags' flags */
 /* don't wait on recovery. */
 #define OCFS2_META_LOCK_RECOVERY       (0x01)
 /* Instruct the dlm not to queue ourselves on the other node. */
@@ -69,15 +69,6 @@ void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
 int ocfs2_create_new_inode_locks(struct inode *inode);
 int ocfs2_drop_inode_locks(struct inode *inode);
-int ocfs2_data_lock_full(struct inode *inode,
-                        int write,
-                        int arg_flags);
-#define ocfs2_data_lock(inode, write) ocfs2_data_lock_full(inode, write, 0)
-int ocfs2_data_lock_with_page(struct inode *inode,
-                             int write,
-                             struct page *page);
-void ocfs2_data_unlock(struct inode *inode,
-                      int write);
 int ocfs2_rw_lock(struct inode *inode, int write);
 void ocfs2_rw_unlock(struct inode *inode, int write);
 int ocfs2_open_lock(struct inode *inode);
index b75b2e1f0e42609e78768c1ab9d72c160639a139..c5c183ac41fe5ffc1f836e9103b89f2e2df5e20b 100644 (file)
@@ -382,18 +382,13 @@ static int ocfs2_truncate_file(struct inode *inode,
 
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
-       /* This forces other nodes to sync and drop their pages. Do
-        * this even if we have a truncate without allocation change -
-        * ocfs2 cluster sizes can be much greater than page size, so
-        * we have to truncate them anyway.  */
-       status = ocfs2_data_lock(inode, 1);
-       if (status < 0) {
-               up_write(&OCFS2_I(inode)->ip_alloc_sem);
-
-               mlog_errno(status);
-               goto bail;
-       }
-
+       /*
+        * The inode lock forced other nodes to sync and drop their
+        * pages, which (correctly) happens even if we have a truncate
+        * without allocation change - ocfs2 cluster sizes can be much
+        * greater than page size, so we have to truncate them
+        * anyway.
+        */
        unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
        truncate_inode_pages(inode->i_mapping, new_i_size);
 
@@ -403,7 +398,7 @@ static int ocfs2_truncate_file(struct inode *inode,
                if (status)
                        mlog_errno(status);
 
-               goto bail_unlock_data;
+               goto bail_unlock_sem;
        }
 
        /* alright, we're going to need to do a full blown alloc size
@@ -413,25 +408,23 @@ static int ocfs2_truncate_file(struct inode *inode,
        status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
        if (status < 0) {
                mlog_errno(status);
-               goto bail_unlock_data;
+               goto bail_unlock_sem;
        }
 
        status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
        if (status < 0) {
                mlog_errno(status);
-               goto bail_unlock_data;
+               goto bail_unlock_sem;
        }
 
        status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
        if (status < 0) {
                mlog_errno(status);
-               goto bail_unlock_data;
+               goto bail_unlock_sem;
        }
 
        /* TODO: orphan dir cleanup here. */
-bail_unlock_data:
-       ocfs2_data_unlock(inode, 1);
-
+bail_unlock_sem:
        up_write(&OCFS2_I(inode)->ip_alloc_sem);
 
 bail:
@@ -917,7 +910,7 @@ static int ocfs2_extend_file(struct inode *inode,
                             struct buffer_head *di_bh,
                             u64 new_i_size)
 {
-       int ret = 0, data_locked = 0;
+       int ret = 0;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
        BUG_ON(!di_bh);
@@ -943,20 +936,6 @@ static int ocfs2_extend_file(struct inode *inode,
            && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
                goto out_update_size;
 
-       /* 
-        * protect the pages that ocfs2_zero_extend is going to be
-        * pulling into the page cache.. we do this before the
-        * metadata extend so that we don't get into the situation
-        * where we've extended the metadata but can't get the data
-        * lock to zero.
-        */
-       ret = ocfs2_data_lock(inode, 1);
-       if (ret < 0) {
-               mlog_errno(ret);
-               goto out;
-       }
-       data_locked = 1;
-
        /*
         * The alloc sem blocks people in read/write from reading our
         * allocation until we're done changing it. We depend on
@@ -980,7 +959,7 @@ static int ocfs2_extend_file(struct inode *inode,
                        up_write(&oi->ip_alloc_sem);
 
                        mlog_errno(ret);
-                       goto out_unlock;
+                       goto out;
                }
        }
 
@@ -991,7 +970,7 @@ static int ocfs2_extend_file(struct inode *inode,
 
        if (ret < 0) {
                mlog_errno(ret);
-               goto out_unlock;
+               goto out;
        }
 
 out_update_size:
@@ -999,10 +978,6 @@ out_update_size:
        if (ret < 0)
                mlog_errno(ret);
 
-out_unlock:
-       if (data_locked)
-               ocfs2_data_unlock(inode, 1);
-
 out:
        return ret;
 }
index 86cf073996b567e5962e30c34f68906cbbb96847..8ff201d3705e2acf9691d9297cdff1dd3b9a8eef 100644 (file)
@@ -332,10 +332,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
                                  OCFS2_LOCK_TYPE_RW, inode->i_generation,
                                  inode);
 
-       ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
-                                 OCFS2_LOCK_TYPE_DATA, inode->i_generation,
-                                 inode);
-
        ocfs2_set_inode_flags(inode);
 
        status = 0;
@@ -1014,7 +1010,6 @@ void ocfs2_clear_inode(struct inode *inode)
         * the downconvert thread while waiting to destroy the locks. */
        ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres);
        ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres);
-       ocfs2_mark_lockres_freeing(&oi->ip_data_lockres);
        ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
 
        /* We very well may get a clear_inode before all an inodes
@@ -1038,7 +1033,6 @@ void ocfs2_clear_inode(struct inode *inode)
 
        ocfs2_lock_res_free(&oi->ip_rw_lockres);
        ocfs2_lock_res_free(&oi->ip_meta_lockres);
-       ocfs2_lock_res_free(&oi->ip_data_lockres);
        ocfs2_lock_res_free(&oi->ip_open_lockres);
 
        ocfs2_metadata_cache_purge(inode);
index 70e881c5553621d4e8f6fc380f1b682c5499587d..d1c54da687c92385ce2f37ceeacff3085a764b54 100644 (file)
@@ -35,7 +35,6 @@ struct ocfs2_inode_info
 
        struct ocfs2_lock_res           ip_rw_lockres;
        struct ocfs2_lock_res           ip_meta_lockres;
-       struct ocfs2_lock_res           ip_data_lockres;
        struct ocfs2_lock_res           ip_open_lockres;
 
        /* protects allocation changes on this inode. */
index 98756156d29878b8df182497214b9640d45ac79d..a7f0ccc6fdd81e95c2cff63c331c34633b2d6bb8 100644 (file)
@@ -181,17 +181,8 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
         */
        down_write(&OCFS2_I(inode)->ip_alloc_sem);
 
-       ret = ocfs2_data_lock(inode, 1);
-       if (ret < 0) {
-               mlog_errno(ret);
-               goto out_meta_unlock;
-       }
-
        ret = __ocfs2_page_mkwrite(inode, di_bh, page);
 
-       ocfs2_data_unlock(inode, 1);
-
-out_meta_unlock:
        up_write(&OCFS2_I(inode)->ip_alloc_sem);
 
        brelse(di_bh);
index 1996820488ccd26513505346941049d483ca6e66..064eba074f1e36d1abccc18ef4aad6f73cfec285 100644 (file)
@@ -1020,7 +1020,6 @@ static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data)
 
        ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
        ocfs2_lock_res_init_once(&oi->ip_meta_lockres);
-       ocfs2_lock_res_init_once(&oi->ip_data_lockres);
        ocfs2_lock_res_init_once(&oi->ip_open_lockres);
 
        ocfs2_metadata_cache_init(&oi->vfs_inode);