[PATCH 2/2] ocfs2: cluster aware flock()
authorMark Fasheh <mark.fasheh@oracle.com>
Fri, 21 Dec 2007 00:49:04 +0000 (16:49 -0800)
committerMark Fasheh <mark.fasheh@oracle.com>
Fri, 25 Jan 2008 23:05:43 +0000 (15:05 -0800)
Hook up ocfs2_flock(), using the new flock lock type in dlmglue.c. A new
mount option, "localflocks" is added so that users can revert to old
functionality as need be.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Documentation/filesystems/ocfs2.txt
fs/ocfs2/Makefile
fs/ocfs2/file.c
fs/ocfs2/locks.c [new file with mode: 0644]
fs/ocfs2/locks.h [new file with mode: 0644]
fs/ocfs2/ocfs2.h
fs/ocfs2/super.c

index 071fad137eb553b5ba2dd158c615ce185cccd294..c318a8bbb1ef1efdbd68930ed687595f3c397121 100644 (file)
@@ -75,3 +75,4 @@ commit=nrsec  (*)     Ocfs2 can be told to sync all its data and metadata
 localalloc=8(*)                Allows custom localalloc size in MB. If the value is too
                        large, the fs will silently revert it to the default.
                        Localalloc is not enabled for local mounts.
+localflocks            This disables cluster aware flock.
index 3591890b32c682df8f73edbeca23cd8741d0dede..4d4ce48bb42c2eb29f4ecb099b5bd63a97040e2a 100644 (file)
@@ -19,6 +19,7 @@ ocfs2-objs := \
        ioctl.o                 \
        journal.o               \
        localalloc.o            \
+       locks.o                 \
        mmap.o                  \
        namei.o                 \
        resize.o                \
index 432e5f3c478485d3e375df76ce2ba9f57aad9485..caefd571782e9214e62b8799ea6e820f4d0ef2cf 100644 (file)
@@ -51,6 +51,7 @@
 #include "inode.h"
 #include "ioctl.h"
 #include "journal.h"
+#include "locks.h"
 #include "mmap.h"
 #include "suballoc.h"
 #include "super.h"
@@ -63,6 +64,35 @@ static int ocfs2_sync_inode(struct inode *inode)
        return sync_mapping_buffers(inode->i_mapping);
 }
 
+static int ocfs2_init_file_private(struct inode *inode, struct file *file)
+{
+       struct ocfs2_file_private *fp;
+
+       fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL);
+       if (!fp)
+               return -ENOMEM;
+
+       fp->fp_file = file;
+       mutex_init(&fp->fp_mutex);
+       ocfs2_file_lock_res_init(&fp->fp_flock, fp);
+       file->private_data = fp;
+
+       return 0;
+}
+
+static void ocfs2_free_file_private(struct inode *inode, struct file *file)
+{
+       struct ocfs2_file_private *fp = file->private_data;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+       if (fp) {
+               ocfs2_simple_drop_lockres(osb, &fp->fp_flock);
+               ocfs2_lock_res_free(&fp->fp_flock);
+               kfree(fp);
+               file->private_data = NULL;
+       }
+}
+
 static int ocfs2_file_open(struct inode *inode, struct file *file)
 {
        int status;
@@ -89,7 +119,18 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
 
        oi->ip_open_count++;
        spin_unlock(&oi->ip_lock);
-       status = 0;
+
+       status = ocfs2_init_file_private(inode, file);
+       if (status) {
+               /*
+                * We want to set open count back if we're failing the
+                * open.
+                */
+               spin_lock(&oi->ip_lock);
+               oi->ip_open_count--;
+               spin_unlock(&oi->ip_lock);
+       }
+
 leave:
        mlog_exit(status);
        return status;
@@ -108,11 +149,24 @@ static int ocfs2_file_release(struct inode *inode, struct file *file)
                oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
        spin_unlock(&oi->ip_lock);
 
+       ocfs2_free_file_private(inode, file);
+
        mlog_exit(0);
 
        return 0;
 }
 
+static int ocfs2_dir_open(struct inode *inode, struct file *file)
+{
+       return ocfs2_init_file_private(inode, file);
+}
+
+static int ocfs2_dir_release(struct inode *inode, struct file *file)
+{
+       ocfs2_free_file_private(inode, file);
+       return 0;
+}
+
 static int ocfs2_sync_file(struct file *file,
                           struct dentry *dentry,
                           int datasync)
@@ -2191,6 +2245,7 @@ const struct file_operations ocfs2_fops = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ocfs2_compat_ioctl,
 #endif
+       .flock          = ocfs2_flock,
        .splice_read    = ocfs2_file_splice_read,
        .splice_write   = ocfs2_file_splice_write,
 };
@@ -2199,8 +2254,11 @@ const struct file_operations ocfs2_dops = {
        .read           = generic_read_dir,
        .readdir        = ocfs2_readdir,
        .fsync          = ocfs2_sync_file,
+       .release        = ocfs2_dir_release,
+       .open           = ocfs2_dir_open,
        .ioctl          = ocfs2_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = ocfs2_compat_ioctl,
 #endif
+       .flock          = ocfs2_flock,
 };
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
new file mode 100644 (file)
index 0000000..203f871
--- /dev/null
@@ -0,0 +1,125 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * locks.c
+ *
+ * Userspace file locking support
+ *
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+
+#include "dlmglue.h"
+#include "file.h"
+#include "locks.h"
+
+static int ocfs2_do_flock(struct file *file, struct inode *inode,
+                         int cmd, struct file_lock *fl)
+{
+       int ret = 0, level = 0, trylock = 0;
+       struct ocfs2_file_private *fp = file->private_data;
+       struct ocfs2_lock_res *lockres = &fp->fp_flock;
+
+       if (fl->fl_type == F_WRLCK)
+               level = 1;
+       if (!IS_SETLKW(cmd))
+               trylock = 1;
+
+       mutex_lock(&fp->fp_mutex);
+
+       if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
+           lockres->l_level > LKM_NLMODE) {
+               int old_level = 0;
+
+               if (lockres->l_level == LKM_EXMODE)
+                       old_level = 1;
+
+               if (level == old_level)
+                       goto out;
+
+               /*
+                * Converting an existing lock is not guaranteed to be
+                * atomic, so we can get away with simply unlocking
+                * here and allowing the lock code to try at the new
+                * level.
+                */
+
+               flock_lock_file_wait(file,
+                                    &(struct file_lock){.fl_type = F_UNLCK});
+
+               ocfs2_file_unlock(file);
+       }
+
+       ret = ocfs2_file_lock(file, level, trylock);
+       if (ret) {
+               if (ret == -EAGAIN && trylock)
+                       ret = -EWOULDBLOCK;
+               else
+                       mlog_errno(ret);
+               goto out;
+       }
+
+       ret = flock_lock_file_wait(file, fl);
+
+out:
+       mutex_unlock(&fp->fp_mutex);
+
+       return ret;
+}
+
+static int ocfs2_do_funlock(struct file *file, int cmd, struct file_lock *fl)
+{
+       int ret;
+       struct ocfs2_file_private *fp = file->private_data;
+
+       mutex_lock(&fp->fp_mutex);
+       ocfs2_file_unlock(file);
+       ret = flock_lock_file_wait(file, fl);
+       mutex_unlock(&fp->fp_mutex);
+
+       return ret;
+}
+
+/*
+ * Overall flow of ocfs2_flock() was influenced by gfs2_flock().
+ */
+int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
+{
+       struct inode *inode = file->f_mapping->host;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+
+       if (!(fl->fl_flags & FL_FLOCK))
+               return -ENOLCK;
+       if (__mandatory_lock(inode))
+               return -ENOLCK;
+
+       if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
+           ocfs2_mount_local(osb))
+               return flock_lock_file_wait(file, fl);
+
+       if (fl->fl_type == F_UNLCK)
+               return ocfs2_do_funlock(file, cmd, fl);
+       else
+               return ocfs2_do_flock(file, inode, cmd, fl);
+}
diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h
new file mode 100644 (file)
index 0000000..9743ef2
--- /dev/null
@@ -0,0 +1,31 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * locks.h
+ *
+ * Function prototypes for Userspace file locking support
+ *
+ * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef OCFS2_LOCKS_H
+#define OCFS2_LOCKS_H
+
+int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl);
+
+#endif /* OCFS2_LOCKS_H */
index 63c131e1cc77b4e3eb1218a8990a411cda07f8f5..22e334d125d0fad419bdd6eb21d1828f5102ede5 100644 (file)
@@ -171,6 +171,7 @@ enum ocfs2_mount_options
        OCFS2_MOUNT_NOINTR  = 1 << 2,   /* Don't catch signals */
        OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
        OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
+       OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
 };
 
 #define OCFS2_OSB_SOFT_RO      0x0001
index 1104f14c318366f4da5fc495c682f7798201a53a..4a091f586646a232ddf126c6072307d658e17f71 100644 (file)
@@ -153,6 +153,7 @@ enum {
        Opt_slot,
        Opt_commit,
        Opt_localalloc,
+       Opt_localflocks,
        Opt_err,
 };
 
@@ -170,6 +171,7 @@ static match_table_t tokens = {
        {Opt_slot, "preferred_slot=%u"},
        {Opt_commit, "commit=%u"},
        {Opt_localalloc, "localalloc=%d"},
+       {Opt_localflocks, "localflocks"},
        {Opt_err, NULL}
 };
 
@@ -848,6 +850,20 @@ static int ocfs2_parse_options(struct super_block *sb,
                        if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8))
                                mopt->localalloc_opt = option;
                        break;
+               case Opt_localflocks:
+                       /*
+                        * Changing this during remount could race
+                        * flock() requests, or "unbalance" existing
+                        * ones (e.g., a lock is taken in one mode but
+                        * dropped in the other). If users care enough
+                        * to flip locking modes during remount, we
+                        * could add a "local" flag to individual
+                        * flock structures for proper tracking of
+                        * state.
+                        */
+                       if (!is_remount)
+                               mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
+                       break;
                default:
                        mlog(ML_ERROR,
                             "Unrecognized mount option \"%s\" "
@@ -903,6 +919,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
        if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE)
                seq_printf(s, ",localalloc=%d", osb->local_alloc_size);
 
+       if (opts & OCFS2_MOUNT_LOCALFLOCKS)
+               seq_printf(s, ",localflocks,");
+
        return 0;
 }