Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Tue, 10 Jul 2007 20:56:13 +0000 (13:56 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Tue, 10 Jul 2007 20:56:13 +0000 (13:56 -0700)
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (57 commits)
  [GFS2] Accept old format NFS filehandles
  [GFS2] Small fixes to logging code
  [DLM] dump more lock values
  [GFS2] Remove i_mode passing from NFS File Handle
  [GFS2] Obtaining no_formal_ino from directory entry
  [GFS2] git-gfs2-nmw-build-fix
  [GFS2] System won't suspend with GFS2 file system mounted
  [GFS2] remounting w/o acl option leaves acls enabled
  [GFS2] inode size inconsistency
  [DLM] Telnet to port 21064 can stop all lockspaces
  [GFS2] Fix gfs2_block_truncate_page err return
  [GFS2] Addendum to the journaled file/unmount patch
  [GFS2] Simplify multiple glock aquisition
  [GFS2] assertion failure after writing to journaled file, umount
  [GFS2] Use zero_user_page() in stuffed_readpage()
  [GFS2] Remove bogus '\0' in rgrp.c
  [GFS2] Journaled file write/unstuff bug
  [DLM] don't require FS flag on all nodes
  [GFS2] Fix deallocation issues
  [GFS2] return conflicts for GETLK
  ...

62 files changed:
fs/dlm/Makefile
fs/dlm/config.c
fs/dlm/config.h
fs/dlm/debug_fs.c
fs/dlm/dlm_internal.h
fs/dlm/lock.c
fs/dlm/lock.h
fs/dlm/lockspace.c
fs/dlm/lowcomms.c
fs/dlm/main.c
fs/dlm/member.c
fs/dlm/netlink.c [new file with mode: 0644]
fs/dlm/rcom.c
fs/dlm/recoverd.c
fs/dlm/user.c
fs/gfs2/Makefile
fs/gfs2/bmap.c
fs/gfs2/daemon.c
fs/gfs2/dir.c
fs/gfs2/dir.h
fs/gfs2/eattr.c
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/inode.h
fs/gfs2/locking/dlm/lock.c
fs/gfs2/locking/dlm/lock_dlm.h
fs/gfs2/locking/dlm/mount.c
fs/gfs2/locking/dlm/plock.c
fs/gfs2/locking/dlm/thread.c
fs/gfs2/log.c
fs/gfs2/lops.c
fs/gfs2/lops.h
fs/gfs2/meta_io.c
fs/gfs2/meta_io.h
fs/gfs2/mount.c
fs/gfs2/ondisk.c [deleted file]
fs/gfs2/ops_address.c
fs/gfs2/ops_address.h
fs/gfs2/ops_dentry.c
fs/gfs2/ops_export.c
fs/gfs2/ops_export.h [deleted file]
fs/gfs2/ops_file.c
fs/gfs2/ops_fstype.c
fs/gfs2/ops_fstype.h
fs/gfs2/ops_inode.c
fs/gfs2/ops_super.c
fs/gfs2/ops_vm.c
fs/gfs2/quota.c
fs/gfs2/recovery.c
fs/gfs2/rgrp.c
fs/gfs2/rgrp.h
fs/gfs2/super.c
fs/gfs2/super.h
fs/gfs2/util.c
include/linux/Kbuild
include/linux/dlm.h
include/linux/dlm_device.h
include/linux/dlm_netlink.h [new file with mode: 0644]
include/linux/gfs2_ondisk.h

index 604cf7dc5f39856e558b8f8362867e1aaf12323f..d248e60951bac0ce8d04b19f291e380814f3826c 100644 (file)
@@ -8,6 +8,7 @@ dlm-y :=                        ast.o \
                                member.o \
                                memory.o \
                                midcomms.o \
+                               netlink.o \
                                lowcomms.o \
                                rcom.o \
                                recover.o \
index 822abdcd1434ad94638b6ec652e5720bddd74532..5069b2cb5a1f6e3fe109d148e3a7e8cd8b0930c7 100644 (file)
@@ -90,6 +90,7 @@ struct cluster {
        unsigned int cl_scan_secs;
        unsigned int cl_log_debug;
        unsigned int cl_protocol;
+       unsigned int cl_timewarn_cs;
 };
 
 enum {
@@ -103,6 +104,7 @@ enum {
        CLUSTER_ATTR_SCAN_SECS,
        CLUSTER_ATTR_LOG_DEBUG,
        CLUSTER_ATTR_PROTOCOL,
+       CLUSTER_ATTR_TIMEWARN_CS,
 };
 
 struct cluster_attribute {
@@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
 CLUSTER_ATTR(scan_secs, 1);
 CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
+CLUSTER_ATTR(timewarn_cs, 1);
 
 static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
        [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
        [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+       [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
        NULL,
 };
 
@@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g,
        cl->cl_toss_secs = dlm_config.ci_toss_secs;
        cl->cl_scan_secs = dlm_config.ci_scan_secs;
        cl->cl_log_debug = dlm_config.ci_log_debug;
+       cl->cl_protocol = dlm_config.ci_protocol;
+       cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
 
        space_list = &sps->ss_group;
        comm_list = &cms->cs_group;
@@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
 
 static struct space *get_space(char *name)
 {
+       struct config_item *i;
+
        if (!space_list)
                return NULL;
-       return to_space(config_group_find_obj(space_list, name));
+
+       down(&space_list->cg_subsys->su_sem);
+       i = config_group_find_obj(space_list, name);
+       up(&space_list->cg_subsys->su_sem);
+
+       return to_space(i);
 }
 
 static void put_space(struct space *sp)
@@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
                        if (cm->nodeid != nodeid)
                                continue;
                        found = 1;
+                       config_item_get(i);
                        break;
                } else {
                        if (!cm->addr_count ||
                            memcmp(cm->addr[0], addr, sizeof(*addr)))
                                continue;
                        found = 1;
+                       config_item_get(i);
                        break;
                }
        }
        up(&clusters_root.subsys.su_sem);
 
-       if (found)
-               config_item_get(i);
-       else
+       if (!found)
                cm = NULL;
        return cm;
 }
@@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_PROTOCOL           0
+#define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 
 struct dlm_config_info dlm_config = {
        .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = {
        .ci_toss_secs = DEFAULT_TOSS_SECS,
        .ci_scan_secs = DEFAULT_SCAN_SECS,
        .ci_log_debug = DEFAULT_LOG_DEBUG,
-       .ci_protocol = DEFAULT_PROTOCOL
+       .ci_protocol = DEFAULT_PROTOCOL,
+       .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
 };
 
index 967cc3d72e5e844a893eb2aab1bf7e23ef966fb3..a3170fe22090589198b21dd6cb7476ff397229ca 100644 (file)
@@ -27,6 +27,7 @@ struct dlm_config_info {
        int ci_scan_secs;
        int ci_log_debug;
        int ci_protocol;
+       int ci_timewarn_cs;
 };
 
 extern struct dlm_config_info dlm_config;
index 61ba670b9e025fde7f8cedb28fd96ceee483de67..12c3bfd5e660fca4ebfac6e1b4146ccc1d9640f5 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/debugfs.h>
 
 #include "dlm_internal.h"
+#include "lock.h"
 
 #define DLM_DEBUG_BUF_LEN 4096
 static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
 
 struct rsb_iter {
        int entry;
+       int locks;
+       int header;
        struct dlm_ls *ls;
        struct list_head *next;
        struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
        }
 }
 
-static void print_lock(struct seq_file *s, struct dlm_lkb *lkb,
-                      struct dlm_rsb *res)
+static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
+                               struct dlm_rsb *res)
 {
        seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
 
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
        struct dlm_lkb *lkb;
        int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
 
+       lock_rsb(res);
+
        seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
        for (i = 0; i < res->res_length; i++) {
                if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
        /* Print the locks attached to this resource */
        seq_printf(s, "Granted Queue\n");
        list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
-               print_lock(s, lkb, res);
+               print_resource_lock(s, lkb, res);
 
        seq_printf(s, "Conversion Queue\n");
        list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
-               print_lock(s, lkb, res);
+               print_resource_lock(s, lkb, res);
 
        seq_printf(s, "Waiting Queue\n");
        list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
-               print_lock(s, lkb, res);
+               print_resource_lock(s, lkb, res);
 
        if (list_empty(&res->res_lookup))
                goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
                seq_printf(s, "\n");
        }
  out:
+       unlock_rsb(res);
+       return 0;
+}
+
+static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
+{
+       struct dlm_user_args *ua;
+       unsigned int waiting = 0;
+       uint64_t xid = 0;
+
+       if (lkb->lkb_flags & DLM_IFL_USER) {
+               ua = (struct dlm_user_args *) lkb->lkb_astparam;
+               if (ua)
+                       xid = ua->xid;
+       }
+
+       if (lkb->lkb_timestamp)
+               waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+
+       /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
+          r_nodeid r_len r_name */
+
+       seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
+                  lkb->lkb_id,
+                  lkb->lkb_nodeid,
+                  lkb->lkb_remid,
+                  lkb->lkb_ownpid,
+                  (unsigned long long)xid,
+                  lkb->lkb_exflags,
+                  lkb->lkb_flags,
+                  lkb->lkb_status,
+                  lkb->lkb_grmode,
+                  lkb->lkb_rqmode,
+                  waiting,
+                  r->res_nodeid,
+                  r->res_length,
+                  r->res_name);
+}
+
+static int print_locks(struct dlm_rsb *r, struct seq_file *s)
+{
+       struct dlm_lkb *lkb;
+
+       lock_rsb(r);
+
+       list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+               print_lock(s, lkb, r);
+
+       list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+               print_lock(s, lkb, r);
+
+       list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+               print_lock(s, lkb, r);
+
+       unlock_rsb(r);
        return 0;
 }
 
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
                        read_lock(&ls->ls_rsbtbl[i].lock);
                        if (!list_empty(&ls->ls_rsbtbl[i].list)) {
                                ri->next = ls->ls_rsbtbl[i].list.next;
+                               ri->rsb = list_entry(ri->next, struct dlm_rsb,
+                                                       res_hashchain);
+                               dlm_hold_rsb(ri->rsb);
                                read_unlock(&ls->ls_rsbtbl[i].lock);
                                break;
                        }
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
                if (ri->entry >= ls->ls_rsbtbl_size)
                        return 1;
        } else {
+               struct dlm_rsb *old = ri->rsb;
                i = ri->entry;
                read_lock(&ls->ls_rsbtbl[i].lock);
                ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
                        ri->next = NULL;
                        ri->entry++;
                        read_unlock(&ls->ls_rsbtbl[i].lock);
+                       dlm_put_rsb(old);
                        goto top;
                 }
+               ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
+               dlm_hold_rsb(ri->rsb);
                read_unlock(&ls->ls_rsbtbl[i].lock);
+               dlm_put_rsb(old);
        }
-       ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
 
        return 0;
 }
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
 {
        struct rsb_iter *ri;
 
-       ri = kmalloc(sizeof *ri, GFP_KERNEL);
+       ri = kzalloc(sizeof *ri, GFP_KERNEL);
        if (!ri)
                return NULL;
 
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
 {
        struct rsb_iter *ri = iter_ptr;
 
-       print_resource(ri->rsb, file);
+       if (ri->locks) {
+               if (ri->header) {
+                       seq_printf(file, "id nodeid remid pid xid exflags flags "
+                                        "sts grmode rqmode time_ms r_nodeid "
+                                        "r_len r_name\n");
+                       ri->header = 0;
+               }
+               print_locks(ri->rsb, file);
+       } else {
+               print_resource(ri->rsb, file);
+       }
 
        return 0;
 }
@@ -295,6 +372,83 @@ static const struct file_operations rsb_fops = {
        .release = seq_release
 };
 
+/*
+ * Dump state in compact per-lock listing
+ */
+
+static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+       struct rsb_iter *ri;
+
+       ri = kzalloc(sizeof *ri, GFP_KERNEL);
+       if (!ri)
+               return NULL;
+
+       ri->ls = ls;
+       ri->entry = 0;
+       ri->next = NULL;
+       ri->locks = 1;
+
+       if (*pos == 0)
+               ri->header = 1;
+
+       if (rsb_iter_next(ri)) {
+               rsb_iter_free(ri);
+               return NULL;
+       }
+
+       return ri;
+}
+
+static void *locks_seq_start(struct seq_file *file, loff_t *pos)
+{
+       struct rsb_iter *ri;
+       loff_t n = *pos;
+
+       ri = locks_iter_init(file->private, pos);
+       if (!ri)
+               return NULL;
+
+       while (n--) {
+               if (rsb_iter_next(ri)) {
+                       rsb_iter_free(ri);
+                       return NULL;
+               }
+       }
+
+       return ri;
+}
+
+static struct seq_operations locks_seq_ops = {
+       .start = locks_seq_start,
+       .next  = rsb_seq_next,
+       .stop  = rsb_seq_stop,
+       .show  = rsb_seq_show,
+};
+
+static int locks_open(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
+
+       ret = seq_open(file, &locks_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private;
+
+       return 0;
+}
+
+static const struct file_operations locks_fops = {
+       .owner   = THIS_MODULE,
+       .open    = locks_open,
+       .read    = seq_read,
+       .llseek  = seq_lseek,
+       .release = seq_release
+};
+
 /*
  * dump lkb's on the ls_waiters list
  */
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
                return -ENOMEM;
        }
 
+       memset(name, 0, sizeof(name));
+       snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
+
+       ls->ls_debug_locks_dentry = debugfs_create_file(name,
+                                                       S_IFREG | S_IRUGO,
+                                                       dlm_root,
+                                                       ls,
+                                                       &locks_fops);
+       if (!ls->ls_debug_locks_dentry) {
+               debugfs_remove(ls->ls_debug_waiters_dentry);
+               debugfs_remove(ls->ls_debug_rsb_dentry);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
                debugfs_remove(ls->ls_debug_rsb_dentry);
        if (ls->ls_debug_waiters_dentry)
                debugfs_remove(ls->ls_debug_waiters_dentry);
+       if (ls->ls_debug_locks_dentry)
+               debugfs_remove(ls->ls_debug_locks_dentry);
 }
 
 int dlm_register_debugfs(void)
index 30994d68f6a078d7e87e0a46429093b11fc4f631..74901e981e1089363bdc501fefc9c4ed106c70bb 100644 (file)
@@ -151,6 +151,7 @@ struct dlm_args {
        void                    *bastaddr;
        int                     mode;
        struct dlm_lksb         *lksb;
+       unsigned long           timeout;
 };
 
 
@@ -213,6 +214,9 @@ struct dlm_args {
 #define DLM_IFL_OVERLAP_UNLOCK  0x00080000
 #define DLM_IFL_OVERLAP_CANCEL  0x00100000
 #define DLM_IFL_ENDOFLIFE      0x00200000
+#define DLM_IFL_WATCH_TIMEWARN 0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL        0x01000000
 #define DLM_IFL_USER           0x00000001
 #define DLM_IFL_ORPHAN         0x00000002
 
@@ -243,6 +247,9 @@ struct dlm_lkb {
        struct list_head        lkb_wait_reply; /* waiting for remote reply */
        struct list_head        lkb_astqueue;   /* need ast to be sent */
        struct list_head        lkb_ownqueue;   /* list of locks for a process */
+       struct list_head        lkb_time_list;
+       unsigned long           lkb_timestamp;
+       unsigned long           lkb_timeout_cs;
 
        char                    *lkb_lvbptr;
        struct dlm_lksb         *lkb_lksb;      /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
        struct mutex            ls_orphans_mutex;
        struct list_head        ls_orphans;
 
+       struct mutex            ls_timeout_mutex;
+       struct list_head        ls_timeout;
+
        struct list_head        ls_nodes;       /* current nodes in ls */
        struct list_head        ls_nodes_gone;  /* dead node list, recovery */
        int                     ls_num_nodes;   /* number of nodes in ls */
        int                     ls_low_nodeid;
        int                     ls_total_weight;
        int                     *ls_node_array;
+       gfp_t                   ls_allocation;
 
        struct dlm_rsb          ls_stub_rsb;    /* for returning errors */
        struct dlm_lkb          ls_stub_lkb;    /* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
 
        struct dentry           *ls_debug_rsb_dentry; /* debugfs */
        struct dentry           *ls_debug_waiters_dentry; /* debugfs */
+       struct dentry           *ls_debug_locks_dentry; /* debugfs */
 
        wait_queue_head_t       ls_uevent_wait; /* user part of join/leave */
        int                     ls_uevent_result;
+       struct completion       ls_members_done;
+       int                     ls_members_result;
 
        struct miscdevice       ls_device;
 
@@ -472,6 +486,7 @@ struct dlm_ls {
        struct task_struct      *ls_recoverd_task;
        struct mutex            ls_recoverd_active;
        spinlock_t              ls_recover_lock;
+       unsigned long           ls_recover_begin; /* jiffies timestamp */
        uint32_t                ls_recover_status; /* DLM_RS_ */
        uint64_t                ls_recover_seq;
        struct dlm_recover      *ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
 #define LSFL_RCOM_READY                3
 #define LSFL_RCOM_WAIT         4
 #define LSFL_UEVENT_WAIT       5
+#define LSFL_TIMEWARN          6
 
 /* much of this is just saving user space pointers associated with the
    lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
        void __user             *castaddr;
        void __user             *bastparam;
        void __user             *bastaddr;
+       uint64_t                xid;
 };
 
 #define DLM_PROC_FLAGS_CLOSING 1
index d8d6e729f96b669b5a6ed16bfb92c776cfc4744c..b455919c19984ad408d4ca498ad72f40a7d33d1f 100644 (file)
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
 static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static int send_remove(struct dlm_rsb *r);
 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                                    struct dlm_message *ms);
 static int receive_extralen(struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
+static void del_timeout(struct dlm_lkb *lkb);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
 
 /*
  * Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
 
 /* Threads cannot use the lockspace while it's being recovered */
 
-static inline void lock_recovery(struct dlm_ls *ls)
+static inline void dlm_lock_recovery(struct dlm_ls *ls)
 {
        down_read(&ls->ls_in_recovery);
 }
 
-static inline void unlock_recovery(struct dlm_ls *ls)
+void dlm_unlock_recovery(struct dlm_ls *ls)
 {
        up_read(&ls->ls_in_recovery);
 }
 
-static inline int lock_recovery_try(struct dlm_ls *ls)
+int dlm_lock_recovery_try(struct dlm_ls *ls)
 {
        return down_read_trylock(&ls->ls_in_recovery);
 }
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
        if (is_master_copy(lkb))
                return;
 
+       del_timeout(lkb);
+
        DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
 
+       /* if the operation was a cancel, then return -DLM_ECANCEL, if a
+          timeout caused the cancel then return -ETIMEDOUT */
+       if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
+               lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
+               rv = -ETIMEDOUT;
+       }
+
+       if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+               lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+               rv = -EDEADLK;
+       }
+
        lkb->lkb_lksb->sb_status = rv;
        lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
 
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
        kref_init(&lkb->lkb_ref);
        INIT_LIST_HEAD(&lkb->lkb_ownqueue);
        INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+       INIT_LIST_HEAD(&lkb->lkb_time_list);
 
        get_random_bytes(&bucket, sizeof(bucket));
        bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
 {
        int i;
 
-       if (dlm_locking_stopped(ls))
-               return;
-
        for (i = 0; i < ls->ls_rsbtbl_size; i++) {
                shrink_bucket(ls, i);
+               if (dlm_locking_stopped(ls))
+                       break;
                cond_resched();
        }
 }
 
+static void add_timeout(struct dlm_lkb *lkb)
+{
+       struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+       if (is_master_copy(lkb)) {
+               lkb->lkb_timestamp = jiffies;
+               return;
+       }
+
+       if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
+           !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+               lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
+               goto add_it;
+       }
+       if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+               goto add_it;
+       return;
+
+ add_it:
+       DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
+       mutex_lock(&ls->ls_timeout_mutex);
+       hold_lkb(lkb);
+       lkb->lkb_timestamp = jiffies;
+       list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
+       mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+static void del_timeout(struct dlm_lkb *lkb)
+{
+       struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+       mutex_lock(&ls->ls_timeout_mutex);
+       if (!list_empty(&lkb->lkb_time_list)) {
+               list_del_init(&lkb->lkb_time_list);
+               unhold_lkb(lkb);
+       }
+       mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
+   lkb_lksb_timeout without lock_rsb?  Note: we can't lock timeout_mutex
+   and then lock rsb because of lock ordering in add_timeout.  We may need
+   to specify some special timeout-related bits in the lkb that are just to
+   be accessed under the timeout_mutex. */
+
+void dlm_scan_timeout(struct dlm_ls *ls)
+{
+       struct dlm_rsb *r;
+       struct dlm_lkb *lkb;
+       int do_cancel, do_warn;
+
+       for (;;) {
+               if (dlm_locking_stopped(ls))
+                       break;
+
+               do_cancel = 0;
+               do_warn = 0;
+               mutex_lock(&ls->ls_timeout_mutex);
+               list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+
+                       if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+                           time_after_eq(jiffies, lkb->lkb_timestamp +
+                                         lkb->lkb_timeout_cs * HZ/100))
+                               do_cancel = 1;
+
+                       if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+                           time_after_eq(jiffies, lkb->lkb_timestamp +
+                                          dlm_config.ci_timewarn_cs * HZ/100))
+                               do_warn = 1;
+
+                       if (!do_cancel && !do_warn)
+                               continue;
+                       hold_lkb(lkb);
+                       break;
+               }
+               mutex_unlock(&ls->ls_timeout_mutex);
+
+               if (!do_cancel && !do_warn)
+                       break;
+
+               r = lkb->lkb_resource;
+               hold_rsb(r);
+               lock_rsb(r);
+
+               if (do_warn) {
+                       /* clear flag so we only warn once */
+                       lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+                       if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
+                               del_timeout(lkb);
+                       dlm_timeout_warn(lkb);
+               }
+
+               if (do_cancel) {
+                       log_debug(ls, "timeout cancel %x node %d %s",
+                                 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                       lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+                       lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
+                       del_timeout(lkb);
+                       _cancel_lock(r, lkb);
+               }
+
+               unlock_rsb(r);
+               unhold_rsb(r);
+               dlm_put_lkb(lkb);
+       }
+}
+
+/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
+   dlm_recoverd before checking/setting ls_recover_begin. */
+
+void dlm_adjust_timeouts(struct dlm_ls *ls)
+{
+       struct dlm_lkb *lkb;
+       long adj = jiffies - ls->ls_recover_begin;
+
+       ls->ls_recover_begin = 0;
+       mutex_lock(&ls->ls_timeout_mutex);
+       list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+               lkb->lkb_timestamp += adj;
+       mutex_unlock(&ls->ls_timeout_mutex);
+}
+
 /* lkb is master or local copy */
 
 static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
  * queue for one resource.  The granted mode of each lock blocks the requested
  * mode of the other lock."
  *
- * Part 2: if the granted mode of lkb is preventing the first lkb in the
- * convert queue from being granted, then demote lkb (set grmode to NL).
- * This second form requires that we check for conv-deadlk even when
- * now == 0 in _can_be_granted().
+ * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
+ * convert queue from being granted, then deadlk/demote lkb.
  *
  * Example:
  * Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
  *
  * The first lock can't be granted because of the granted mode of the second
  * lock and the second lock can't be granted because it's not first in the
- * list.  We demote the granted mode of the second lock (the lkb passed to this
- * function).
+ * list.  We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
+ * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
+ * flag set and return DEMOTED in the lksb flags.
+ *
+ * Originally, this function detected conv-deadlk in a more limited scope:
+ * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
+ * - if lkb1 was the first entry in the queue (not just earlier), and was
+ *   blocked by the granted mode of lkb2, and there was nothing on the
+ *   granted queue preventing lkb1 from being granted immediately, i.e.
+ *   lkb2 was the only thing preventing lkb1 from being granted.
+ *
+ * That second condition meant we'd only say there was conv-deadlk if
+ * resolving it (by demotion) would lead to the first lock on the convert
+ * queue being granted right away.  It allowed conversion deadlocks to exist
+ * between locks on the convert queue while they couldn't be granted anyway.
  *
- * After the resolution, the "grant pending" function needs to go back and try
- * to grant locks on the convert queue again since the first lock can now be
- * granted.
+ * Now, we detect and take action on conversion deadlocks immediately when
+ * they're created, even if they may not be immediately consequential.  If
+ * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
+ * mode that would prevent lkb1's conversion from being granted, we do a
+ * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
+ * I think this means that the lkb_is_ahead condition below should always
+ * be zero, i.e. there will never be conv-deadlk between two locks that are
+ * both already on the convert queue.
  */
 
-static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
+static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
 {
-       struct dlm_lkb *this, *first = NULL, *self = NULL;
+       struct dlm_lkb *lkb1;
+       int lkb_is_ahead = 0;
 
-       list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
-               if (!first)
-                       first = this;
-               if (this == lkb) {
-                       self = lkb;
+       list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
+               if (lkb1 == lkb2) {
+                       lkb_is_ahead = 1;
                        continue;
                }
 
-               if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
-                       return 1;
-       }
-
-       /* if lkb is on the convert queue and is preventing the first
-          from being granted, then there's deadlock and we demote lkb.
-          multiple converting locks may need to do this before the first
-          converting lock can be granted. */
-
-       if (self && self != first) {
-               if (!modes_compat(lkb, first) &&
-                   !queue_conflict(&rsb->res_grantqueue, first))
-                       return 1;
+               if (!lkb_is_ahead) {
+                       if (!modes_compat(lkb2, lkb1))
+                               return 1;
+               } else {
+                       if (!modes_compat(lkb2, lkb1) &&
+                           !modes_compat(lkb1, lkb2))
+                               return 1;
+               }
        }
-
        return 0;
 }
 
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
        if (!now && !conv && list_empty(&r->res_convertqueue) &&
            first_in_list(lkb, &r->res_waitqueue))
                return 1;
-
  out:
-       /*
-        * The following, enabled by CONVDEADLK, departs from VMS.
-        */
-
-       if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
-           conversion_deadlock_detect(r, lkb)) {
-               lkb->lkb_grmode = DLM_LOCK_NL;
-               lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
-       }
-
        return 0;
 }
 
-/*
- * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
- * simple way to provide a big optimization to applications that can use them.
- */
-
-static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
+static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
+                         int *err)
 {
-       uint32_t flags = lkb->lkb_exflags;
        int rv;
        int8_t alt = 0, rqmode = lkb->lkb_rqmode;
+       int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
+
+       if (err)
+               *err = 0;
 
        rv = _can_be_granted(r, lkb, now);
        if (rv)
                goto out;
 
-       if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
+       /*
+        * The CONVDEADLK flag is non-standard and tells the dlm to resolve
+        * conversion deadlocks by demoting grmode to NL, otherwise the dlm
+        * cancels one of the locks.
+        */
+
+       if (is_convert && can_be_queued(lkb) &&
+           conversion_deadlock_detect(r, lkb)) {
+               if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
+                       lkb->lkb_grmode = DLM_LOCK_NL;
+                       lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
+               } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+                       if (err)
+                               *err = -EDEADLK;
+                       else {
+                               log_print("can_be_granted deadlock %x now %d",
+                                         lkb->lkb_id, now);
+                               dlm_dump_rsb(r);
+                       }
+               }
                goto out;
+       }
 
-       if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
+       /*
+        * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
+        * to grant a request in a mode other than the normal rqmode.  It's a
+        * simple way to provide a big optimization to applications that can
+        * use them.
+        */
+
+       if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
                alt = DLM_LOCK_PR;
-       else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
+       else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
                alt = DLM_LOCK_CW;
 
        if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
        return rv;
 }
 
+/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
+   for locks pending on the convert list.  Once verified (watch for these
+   log_prints), we should be able to just call _can_be_granted() and not
+   bother with the demote/deadlk cases here (and there's no easy way to deal
+   with a deadlk here, we'd have to generate something like grant_lock with
+   the deadlk error.) */
+
+/* returns the highest requested mode of all blocked conversions */
+
 static int grant_pending_convert(struct dlm_rsb *r, int high)
 {
        struct dlm_lkb *lkb, *s;
        int hi, demoted, quit, grant_restart, demote_restart;
+       int deadlk;
 
        quit = 0;
  restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
 
        list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
                demoted = is_demoted(lkb);
-               if (can_be_granted(r, lkb, 0)) {
+               deadlk = 0;
+
+               if (can_be_granted(r, lkb, 0, &deadlk)) {
                        grant_lock_pending(r, lkb);
                        grant_restart = 1;
-               } else {
-                       hi = max_t(int, lkb->lkb_rqmode, hi);
-                       if (!demoted && is_demoted(lkb))
-                               demote_restart = 1;
+                       continue;
                }
+
+               if (!demoted && is_demoted(lkb)) {
+                       log_print("WARN: pending demoted %x node %d %s",
+                                 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                       demote_restart = 1;
+                       continue;
+               }
+
+               if (deadlk) {
+                       log_print("WARN: pending deadlock %x node %d %s",
+                                 lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                       dlm_dump_rsb(r);
+                       continue;
+               }
+
+               hi = max_t(int, lkb->lkb_rqmode, hi);
        }
 
        if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
        struct dlm_lkb *lkb, *s;
 
        list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
-               if (can_be_granted(r, lkb, 0))
+               if (can_be_granted(r, lkb, 0, NULL))
                        grant_lock_pending(r, lkb);
                 else
                        high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
 }
 
 static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
-                        int namelen, uint32_t parent_lkid, void *ast,
+                        int namelen, unsigned long timeout_cs, void *ast,
                         void *astarg, void *bast, struct dlm_args *args)
 {
        int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
        if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
                goto out;
 
-       /* parent/child locks not yet supported */
-       if (parent_lkid)
-               goto out;
-
        if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
                goto out;
 
@@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
        args->astaddr = ast;
        args->astparam = (long) astarg;
        args->bastaddr = bast;
+       args->timeout = timeout_cs;
        args->mode = mode;
        args->lksb = lksb;
        rv = 0;
@@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
        lkb->lkb_lksb = args->lksb;
        lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
        lkb->lkb_ownpid = (int) current->pid;
+       lkb->lkb_timeout_cs = args->timeout;
        rv = 0;
  out:
        return rv;
@@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
                if (is_overlap(lkb))
                        goto out;
 
+               /* don't let scand try to do a cancel */
+               del_timeout(lkb);
+
                if (lkb->lkb_flags & DLM_IFL_RESEND) {
                        lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
                        rv = -EBUSY;
@@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
                if (is_overlap_unlock(lkb))
                        goto out;
 
+               /* don't let scand try to do a cancel */
+               del_timeout(lkb);
+
                if (lkb->lkb_flags & DLM_IFL_RESEND) {
                        lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
                        rv = -EBUSY;
@@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
        int error = 0;
 
-       if (can_be_granted(r, lkb, 1)) {
+       if (can_be_granted(r, lkb, 1, NULL)) {
                grant_lock(r, lkb);
                queue_cast(r, lkb, 0);
                goto out;
@@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
                error = -EINPROGRESS;
                add_lkb(r, lkb, DLM_LKSTS_WAITING);
                send_blocking_asts(r, lkb);
+               add_timeout(lkb);
                goto out;
        }
 
@@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
        int error = 0;
+       int deadlk = 0;
 
        /* changing an existing lock may allow others to be granted */
 
-       if (can_be_granted(r, lkb, 1)) {
+       if (can_be_granted(r, lkb, 1, &deadlk)) {
                grant_lock(r, lkb);
                queue_cast(r, lkb, 0);
                grant_pending_locks(r);
                goto out;
        }
 
+       /* can_be_granted() detected that this lock would block in a conversion
+          deadlock, so we leave it on the granted queue and return EDEADLK in
+          the ast for the convert. */
+
+       if (deadlk) {
+               /* it's left on the granted queue */
+               log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
+                         lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
+                         lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
+               revert_lock(r, lkb);
+               queue_cast(r, lkb, -EDEADLK);
+               error = -EDEADLK;
+               goto out;
+       }
+
        /* is_demoted() means the can_be_granted() above set the grmode
           to NL, and left us on the granted queue.  This auto-demotion
           (due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
                del_lkb(r, lkb);
                add_lkb(r, lkb, DLM_LKSTS_CONVERT);
                send_blocking_asts(r, lkb);
+               add_timeout(lkb);
                goto out;
        }
 
@@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
        if (!ls)
                return -EINVAL;
 
-       lock_recovery(ls);
+       dlm_lock_recovery(ls);
 
        if (convert)
                error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
        if (error)
                goto out;
 
-       error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
+       error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
                              astarg, bast, &args);
        if (error)
                goto out_put;
@@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
  out_put:
        if (convert || error)
                __put_lkb(ls, lkb);
-       if (error == -EAGAIN)
+       if (error == -EAGAIN || error == -EDEADLK)
                error = 0;
  out:
-       unlock_recovery(ls);
+       dlm_unlock_recovery(ls);
        dlm_put_lockspace(ls);
        return error;
 }
@@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
        if (!ls)
                return -EINVAL;
 
-       lock_recovery(ls);
+       dlm_lock_recovery(ls);
 
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
  out_put:
        dlm_put_lkb(lkb);
  out:
-       unlock_recovery(ls);
+       dlm_unlock_recovery(ls);
        dlm_put_lockspace(ls);
        return error;
 }
@@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
           pass into lowcomms_commit and a message buffer (mb) that we
           write our data into */
 
-       mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+       mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh)
                return -ENOBUFS;
 
@@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
                lkb->lkb_remid = ms->m_lkid;
                if (is_altmode(lkb))
                        munge_altmode(lkb, ms);
-               if (result)
+               if (result) {
                        add_lkb(r, lkb, DLM_LKSTS_WAITING);
-               else {
+                       add_timeout(lkb);
+               } else {
                        grant_lock_pc(r, lkb, ms);
                        queue_cast(r, lkb, 0);
                }
@@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                queue_cast(r, lkb, -EAGAIN);
                break;
 
+       case -EDEADLK:
+               receive_flags_reply(lkb, ms);
+               revert_lock_pc(r, lkb);
+               queue_cast(r, lkb, -EDEADLK);
+               break;
+
        case -EINPROGRESS:
                /* convert was queued on remote master */
                receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                        munge_demoted(lkb, ms);
                del_lkb(r, lkb);
                add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+               add_timeout(lkb);
                break;
 
        case 0:
@@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
        case -DLM_ECANCEL:
                receive_flags_reply(lkb, ms);
                revert_lock_pc(r, lkb);
-               if (ms->m_result)
-                       queue_cast(r, lkb, -DLM_ECANCEL);
+               queue_cast(r, lkb, -DLM_ECANCEL);
                break;
        case 0:
                break;
@@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
                        }
                }
 
-               if (lock_recovery_try(ls))
+               if (dlm_lock_recovery_try(ls))
                        break;
                schedule();
        }
@@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
                log_error(ls, "unknown message type %d", ms->m_type);
        }
 
-       unlock_recovery(ls);
+       dlm_unlock_recovery(ls);
  out:
        dlm_put_lockspace(ls);
        dlm_astd_wake();
@@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
 
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
                     int mode, uint32_t flags, void *name, unsigned int namelen,
-                    uint32_t parent_lkid)
+                    unsigned long timeout_cs)
 {
        struct dlm_lkb *lkb;
        struct dlm_args args;
        int error;
 
-       lock_recovery(ls);
+       dlm_lock_recovery(ls);
 
        error = create_lkb(ls, &lkb);
        if (error) {
@@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
           When DLM_IFL_USER is set, the dlm knows that this is a userspace
           lock and that lkb_astparam is the dlm_user_args structure. */
 
-       error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid,
+       error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
                              DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
        lkb->lkb_flags |= DLM_IFL_USER;
        ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
        list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
        spin_unlock(&ua->proc->locks_spin);
  out:
-       unlock_recovery(ls);
+       dlm_unlock_recovery(ls);
        return error;
 }
 
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-                    int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+                    int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+                    unsigned long timeout_cs)
 {
        struct dlm_lkb *lkb;
        struct dlm_args args;
        struct dlm_user_args *ua;
        int error;
 
-       lock_recovery(ls);
+       dlm_lock_recovery(ls);
 
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        if (lvb_in && ua->lksb.sb_lvbptr)
                memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
 
+       ua->xid = ua_tmp->xid;
        ua->castparam = ua_tmp->castparam;
        ua->castaddr = ua_tmp->castaddr;
        ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        ua->user_lksb = ua_tmp->user_lksb;
        ua->old_mode = lkb->lkb_grmode;
 
-       error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST,
-                             ua, DLM_FAKE_USER_AST, &args);
+       error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
+                             DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
        if (error)
                goto out_put;
 
        error = convert_lock(ls, lkb, &args);
 
-       if (error == -EINPROGRESS || error == -EAGAIN)
+       if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
                error = 0;
  out_put:
        dlm_put_lkb(lkb);
  out:
-       unlock_recovery(ls);
+       dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
@@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        struct dlm_user_args *ua;
        int error;
 
-       lock_recovery(ls);
+       dlm_lock_recovery(ls);
 
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
  out_put:
        dlm_put_lkb(lkb);
  out:
-       unlock_recovery(ls);
+       dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
@@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        struct dlm_user_args *ua;
        int error;
 
-       lock_recovery(ls);
+       dlm_lock_recovery(ls);
 
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
  out_put:
        dlm_put_lkb(lkb);
  out:
-       unlock_recovery(ls);
+       dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
 
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
+{
+       struct dlm_lkb *lkb;
+       struct dlm_args args;
+       struct dlm_user_args *ua;
+       struct dlm_rsb *r;
+       int error;
+
+       dlm_lock_recovery(ls);
+
+       error = find_lkb(ls, lkid, &lkb);
+       if (error)
+               goto out;
+
+       ua = (struct dlm_user_args *)lkb->lkb_astparam;
+
+       error = set_unlock_args(flags, ua, &args);
+       if (error)
+               goto out_put;
+
+       /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
+
+       r = lkb->lkb_resource;
+       hold_rsb(r);
+       lock_rsb(r);
+
+       error = validate_unlock_args(lkb, &args);
+       if (error)
+               goto out_r;
+       lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
+
+       error = _cancel_lock(r, lkb);
+ out_r:
+       unlock_rsb(r);
+       put_rsb(r);
+
+       if (error == -DLM_ECANCEL)
+               error = 0;
+       /* from validate_unlock_args() */
+       if (error == -EBUSY)
+               error = 0;
+ out_put:
+       dlm_put_lkb(lkb);
+ out:
+       dlm_unlock_recovery(ls);
+       return error;
+}
+
 /* lkb's that are removed from the waiters list by revert are just left on the
    orphans list with the granted orphan locks, to be freed by purge */
 
@@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 {
        struct dlm_lkb *lkb, *safe;
 
-       lock_recovery(ls);
+       dlm_lock_recovery(ls);
 
        while (1) {
                lkb = del_proc_lock(ls, proc);
                if (!lkb)
                        break;
+               del_timeout(lkb);
                if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
                        orphan_proc_lock(ls, lkb);
                else
@@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
        }
 
        mutex_unlock(&ls->ls_clear_proc_locks);
-       unlock_recovery(ls);
+       dlm_unlock_recovery(ls);
 }
 
 static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
        if (nodeid != dlm_our_nodeid()) {
                error = send_purge(ls, nodeid, pid);
        } else {
-               lock_recovery(ls);
+               dlm_lock_recovery(ls);
                if (pid == current->pid)
                        purge_proc_locks(ls, proc);
                else
                        do_purge(ls, nodeid, pid);
-               unlock_recovery(ls);
+               dlm_unlock_recovery(ls);
        }
        return error;
 }
index 64fc4ec406683b2b855a4e7b059f442c8c7e34c6..1720313c22dfc455fd6c25f47d94fc02db075a64 100644 (file)
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
 void dlm_hold_rsb(struct dlm_rsb *r);
 int dlm_put_lkb(struct dlm_lkb *lkb);
 void dlm_scan_rsbs(struct dlm_ls *ls);
+int dlm_lock_recovery_try(struct dlm_ls *ls);
+void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_timeout(struct dlm_ls *ls);
+void dlm_adjust_timeouts(struct dlm_ls *ls);
 
 int dlm_purge_locks(struct dlm_ls *ls);
 void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
-       uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid);
+       uint32_t flags, void *name, unsigned int namelen,
+       unsigned long timeout_cs);
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-       int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+       int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+       unsigned long timeout_cs);
 int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        uint32_t flags, uint32_t lkid, char *lvb_in);
 int dlm_user_cancel(struct dlm_ls *ls,  struct dlm_user_args *ua_tmp,
        uint32_t flags, uint32_t lkid);
 int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
        int nodeid, int pid);
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
 
 static inline int is_master(struct dlm_rsb *r)
index a677b2a5eed4dff8cc40be9c6f41c298565fa3bf..1dc72105ab125171b4bb3356ea0a09577178ec77 100644 (file)
@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
        else
                kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
 
+       log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
+
+       /* dlm_controld will see the uevent, do the necessary group management
+          and then write to sysfs to wake us */
+
        error = wait_event_interruptible(ls->ls_uevent_wait,
                        test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+
+       log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
+
        if (error)
                goto out;
 
        error = ls->ls_uevent_result;
  out:
+       if (error)
+               log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
+                         error, ls->ls_uevent_result);
        return error;
 }
 
@@ -234,8 +245,13 @@ static int dlm_scand(void *data)
        struct dlm_ls *ls;
 
        while (!kthread_should_stop()) {
-               list_for_each_entry(ls, &lslist, ls_list)
-                       dlm_scan_rsbs(ls);
+               list_for_each_entry(ls, &lslist, ls_list) {
+                       if (dlm_lock_recovery_try(ls)) {
+                               dlm_scan_rsbs(ls);
+                               dlm_scan_timeout(ls);
+                               dlm_unlock_recovery(ls);
+                       }
+               }
                schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
        }
        return 0;
@@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 {
        struct dlm_ls *ls;
        int i, size, error = -ENOMEM;
+       int do_unreg = 0;
 
        if (namelen > DLM_LOCKSPACE_LEN)
                return -EINVAL;
@@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
                goto out;
        memcpy(ls->ls_name, name, namelen);
        ls->ls_namelen = namelen;
-       ls->ls_exflags = flags;
        ls->ls_lvblen = lvblen;
        ls->ls_count = 0;
        ls->ls_flags = 0;
 
+       if (flags & DLM_LSFL_TIMEWARN)
+               set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+
+       if (flags & DLM_LSFL_FS)
+               ls->ls_allocation = GFP_NOFS;
+       else
+               ls->ls_allocation = GFP_KERNEL;
+
+       /* ls_exflags are forced to match among nodes, and we don't
+          need to require all nodes to have TIMEWARN or FS set */
+       ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
+
        size = dlm_config.ci_rsbtbl_size;
        ls->ls_rsbtbl_size = size;
 
@@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        mutex_init(&ls->ls_waiters_mutex);
        INIT_LIST_HEAD(&ls->ls_orphans);
        mutex_init(&ls->ls_orphans_mutex);
+       INIT_LIST_HEAD(&ls->ls_timeout);
+       mutex_init(&ls->ls_timeout_mutex);
 
        INIT_LIST_HEAD(&ls->ls_nodes);
        INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 
        init_waitqueue_head(&ls->ls_uevent_wait);
        ls->ls_uevent_result = 0;
+       init_completion(&ls->ls_members_done);
+       ls->ls_members_result = -1;
 
        ls->ls_recoverd_task = NULL;
        mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        error = dlm_recoverd_start(ls);
        if (error) {
                log_error(ls, "can't start dlm_recoverd %d", error);
-               goto out_rcomfree;
+               goto out_delist;
        }
 
-       dlm_create_debug_file(ls);
-
        error = kobject_setup(ls);
        if (error)
-               goto out_del;
+               goto out_stop;
 
        error = kobject_register(&ls->ls_kobj);
        if (error)
-               goto out_del;
+               goto out_stop;
+
+       /* let kobject handle freeing of ls if there's an error */
+       do_unreg = 1;
+
+       /* This uevent triggers dlm_controld in userspace to add us to the
+          group of nodes that are members of this lockspace (managed by the
+          cluster infrastructure.)  Once it's done that, it tells us who the
+          current lockspace members are (via configfs) and then tells the
+          lockspace to start running (via sysfs) in dlm_ls_start(). */
 
        error = do_uevent(ls, 1);
        if (error)
-               goto out_unreg;
+               goto out_stop;
+
+       wait_for_completion(&ls->ls_members_done);
+       error = ls->ls_members_result;
+       if (error)
+               goto out_members;
+
+       dlm_create_debug_file(ls);
+
+       log_debug(ls, "join complete");
 
        *lockspace = ls;
        return 0;
 
- out_unreg:
-       kobject_unregister(&ls->ls_kobj);
- out_del:
-       dlm_delete_debug_file(ls);
+ out_members:
+       do_uevent(ls, 0);
+       dlm_clear_members(ls);
+       kfree(ls->ls_node_array);
+ out_stop:
        dlm_recoverd_stop(ls);
- out_rcomfree:
+ out_delist:
        spin_lock(&lslist_lock);
        list_del(&ls->ls_list);
        spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
  out_rsbfree:
        kfree(ls->ls_rsbtbl);
  out_lsfree:
-       kfree(ls);
+       if (do_unreg)
+               kobject_unregister(&ls->ls_kobj);
+       else
+               kfree(ls);
  out:
        module_put(THIS_MODULE);
        return error;
@@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
        error = new_lockspace(name, namelen, lockspace, flags, lvblen);
        if (!error)
                ls_count++;
+       else if (!ls_count)
+               threads_stop();
  out:
        mutex_unlock(&ls_lock);
        return error;
@@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
        dlm_clear_members_gone(ls);
        kfree(ls->ls_node_array);
        kobject_unregister(&ls->ls_kobj);
-        /* The ls structure will be freed when the kobject is done with */
+       /* The ls structure will be freed when the kobject is done with */
 
        mutex_lock(&ls_lock);
        ls_count--;
index 27970a58d29b255aa1d09ebb71a9c11697f9e0c5..0553a6158dcbcf1bcc89d4e5d0733419ffc88214 100644 (file)
@@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 static void lowcomms_data_ready(struct sock *sk, int count_unused)
 {
        struct connection *con = sock2con(sk);
-       if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+       if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
                queue_work(recv_workqueue, &con->rwork);
 }
 
@@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
 {
        struct connection *con = sock2con(sk);
 
-       if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+       if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
                queue_work(send_workqueue, &con->swork);
 }
 
@@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
                        INIT_WORK(&othercon->rwork, process_recv_sockets);
                        set_bit(CF_IS_OTHERCON, &othercon->flags);
                        newcon->othercon = othercon;
+                       othercon->sock = newsock;
+                       newsock->sk->sk_user_data = othercon;
+                       add_sock(newsock, othercon);
+                       addcon = othercon;
+               }
+               else {
+                       printk("Extra connection from node %d attempted\n", nodeid);
+                       result = -EAGAIN;
+                       mutex_unlock(&newcon->sock_mutex);
+                       goto accept_err;
                }
-               othercon->sock = newsock;
-               newsock->sk->sk_user_data = othercon;
-               add_sock(newsock, othercon);
-               addcon = othercon;
        }
        else {
                newsock->sk->sk_user_data = newcon;
@@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
        down(&connections_lock);
        for (i = 0; i <= max_nodeid; i++) {
                con = __nodeid2con(i, 0);
-               if (con)
+               if (con) {
                        con->flags |= 0xFF;
+                       if (con->sock)
+                               con->sock->sk->sk_user_data = NULL;
+               }
        }
        up(&connections_lock);
 
index 162fbae58fe556df3150f96a8423718e47261d9a..eca2907f2386da93396d19d5195bd59101614b54 100644 (file)
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
 static inline int dlm_register_debugfs(void) { return 0; }
 static inline void dlm_unregister_debugfs(void) { }
 #endif
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);
 
 static int __init init_dlm(void)
 {
@@ -50,10 +52,16 @@ static int __init init_dlm(void)
        if (error)
                goto out_debug;
 
+       error = dlm_netlink_init();
+       if (error)
+               goto out_user;
+
        printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
 
        return 0;
 
+ out_user:
+       dlm_user_exit();
  out_debug:
        dlm_unregister_debugfs();
  out_config:
@@ -68,6 +76,7 @@ static int __init init_dlm(void)
 
 static void __exit exit_dlm(void)
 {
+       dlm_netlink_exit();
        dlm_user_exit();
        dlm_config_exit();
        dlm_memory_exit();
index 85e2897bd7400fc4155948fc8eb1c81cb1ff8e01..073599dced2ab4b2562f8f7df44db66aeaa079cc 100644 (file)
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
        *neg_out = neg;
 
        error = ping_members(ls);
+       if (!error || error == -EPROTO) {
+               /* new_lockspace() may be waiting to know if the config
+                  is good or bad */
+               ls->ls_members_result = error;
+               complete(&ls->ls_members_done);
+       }
        if (error)
                goto out;
 
@@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
        dlm_recoverd_suspend(ls);
        ls->ls_recover_status = 0;
        dlm_recoverd_resume(ls);
+
+       if (!ls->ls_recover_begin)
+               ls->ls_recover_begin = jiffies;
        return 0;
 }
 
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
new file mode 100644 (file)
index 0000000..863b87d
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <net/genetlink.h>
+#include <linux/dlm.h>
+#include <linux/dlm_netlink.h>
+
+#include "dlm_internal.h"
+
+static uint32_t dlm_nl_seqnum;
+static uint32_t listener_nlpid;
+
+static struct genl_family family = {
+       .id             = GENL_ID_GENERATE,
+       .name           = DLM_GENL_NAME,
+       .version        = DLM_GENL_VERSION,
+};
+
+static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+{
+       struct sk_buff *skb;
+       void *data;
+
+       skb = genlmsg_new(size, GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+
+       /* add the message headers */
+       data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
+       if (!data) {
+               nlmsg_free(skb);
+               return -EINVAL;
+       }
+
+       *skbp = skb;
+       return 0;
+}
+
+static struct dlm_lock_data *mk_data(struct sk_buff *skb)
+{
+       struct nlattr *ret;
+
+       ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
+       if (!ret)
+               return NULL;
+       return nla_data(ret);
+}
+
+static int send_data(struct sk_buff *skb)
+{
+       struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+       void *data = genlmsg_data(genlhdr);
+       int rv;
+
+       rv = genlmsg_end(skb, data);
+       if (rv < 0) {
+               nlmsg_free(skb);
+               return rv;
+       }
+
+       return genlmsg_unicast(skb, listener_nlpid);
+}
+
+static int user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+       listener_nlpid = info->snd_pid;
+       printk("user_cmd nlpid %u\n", listener_nlpid);
+       return 0;
+}
+
+static struct genl_ops dlm_nl_ops = {
+       .cmd            = DLM_CMD_HELLO,
+       .doit           = user_cmd,
+};
+
+int dlm_netlink_init(void)
+{
+       int rv;
+
+       rv = genl_register_family(&family);
+       if (rv)
+               return rv;
+
+       rv = genl_register_ops(&family, &dlm_nl_ops);
+       if (rv < 0)
+               goto err;
+       return 0;
+ err:
+       genl_unregister_family(&family);
+       return rv;
+}
+
+void dlm_netlink_exit(void)
+{
+       genl_unregister_ops(&family, &dlm_nl_ops);
+       genl_unregister_family(&family);
+}
+
+static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
+{
+       struct dlm_rsb *r = lkb->lkb_resource;
+       struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
+
+       memset(data, 0, sizeof(struct dlm_lock_data));
+
+       data->version = DLM_LOCK_DATA_VERSION;
+       data->nodeid = lkb->lkb_nodeid;
+       data->ownpid = lkb->lkb_ownpid;
+       data->id = lkb->lkb_id;
+       data->remid = lkb->lkb_remid;
+       data->status = lkb->lkb_status;
+       data->grmode = lkb->lkb_grmode;
+       data->rqmode = lkb->lkb_rqmode;
+       data->timestamp = lkb->lkb_timestamp;
+       if (ua)
+               data->xid = ua->xid;
+       if (r) {
+               data->lockspace_id = r->res_ls->ls_global_id;
+               data->resource_namelen = r->res_length;
+               memcpy(data->resource_name, r->res_name, r->res_length);
+       }
+}
+
+void dlm_timeout_warn(struct dlm_lkb *lkb)
+{
+       struct dlm_lock_data *data;
+       struct sk_buff *send_skb;
+       size_t size;
+       int rv;
+
+       size = nla_total_size(sizeof(struct dlm_lock_data)) +
+              nla_total_size(0); /* why this? */
+
+       rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
+       if (rv < 0)
+               return;
+
+       data = mk_data(send_skb);
+       if (!data) {
+               nlmsg_free(send_skb);
+               return;
+       }
+
+       fill_data(data, lkb);
+
+       send_data(send_skb);
+}
+
index 6bfbd61538094f72cb1001bc782cc246faf55552..e3a1527cbdbe7c8cd4bb0a8b6605e486b8ea538c 100644 (file)
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
        char *mb;
        int mb_len = sizeof(struct dlm_rcom) + len;
 
-       mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+       mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh) {
                log_print("create_rcom to %d type %d len %d ENOBUFS",
                          to_nodeid, type, len);
@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
                log_error(ls, "version mismatch: %x nodeid %d: %x",
                          DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
                          rc->rc_header.h_version);
-               return -EINVAL;
+               return -EPROTO;
        }
 
        if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
                log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
                          ls->ls_lvblen, ls->ls_exflags,
                          nodeid, rf->rf_lvblen, rf->rf_lsflags);
-               return -EINVAL;
+               return -EPROTO;
        }
        return 0;
 }
@@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
        dlm_recover_process_copy(ls, rc_in);
 }
 
-static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
+                            struct dlm_rcom *rc_in)
 {
        struct dlm_rcom *rc;
        struct rcom_config *rf;
@@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
        char *mb;
        int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
 
-       mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
+       mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh)
                return -ENOBUFS;
        memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
                log_print("lockspace %x from %d type %x not found",
                          hd->h_lockspace, nodeid, rc->rc_type);
                if (rc->rc_type == DLM_RCOM_STATUS)
-                       send_ls_not_ready(nodeid, rc);
+                       send_ls_not_ready(ls, nodeid, rc);
                return;
        }
 
index 3cb636d6024912b8aa96b55f6e0c731c11d56b82..66575997861cad1da5e02e9528deaaf6240751b7 100644 (file)
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
        dlm_clear_members_gone(ls);
 
+       dlm_adjust_timeouts(ls);
+
        error = enable_locking(ls, rv->seq);
        if (error) {
                log_debug(ls, "enable_locking failed %d", error);
index b0201ec325a79578761a686c85ab376944ec2abe..6438941ab1f8baf21e4ca88ce5d05b120b525f8b 100644 (file)
@@ -33,16 +33,17 @@ static const struct file_operations device_fops;
 struct dlm_lock_params32 {
        __u8 mode;
        __u8 namelen;
-       __u16 flags;
+       __u16 unused;
+       __u32 flags;
        __u32 lkid;
        __u32 parent;
-
+       __u64 xid;
+       __u64 timeout;
        __u32 castparam;
        __u32 castaddr;
        __u32 bastparam;
        __u32 bastaddr;
        __u32 lksb;
-
        char lvb[DLM_USER_LVB_LEN];
        char name[0];
 };
@@ -68,6 +69,7 @@ struct dlm_lksb32 {
 };
 
 struct dlm_lock_result32 {
+       __u32 version[3];
        __u32 length;
        __u32 user_astaddr;
        __u32 user_astparam;
@@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
                kb->i.lock.flags = kb32->i.lock.flags;
                kb->i.lock.lkid = kb32->i.lock.lkid;
                kb->i.lock.parent = kb32->i.lock.parent;
+               kb->i.lock.xid = kb32->i.lock.xid;
+               kb->i.lock.timeout = kb32->i.lock.timeout;
                kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
                kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
                kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
 static void compat_output(struct dlm_lock_result *res,
                          struct dlm_lock_result32 *res32)
 {
+       res32->version[0] = res->version[0];
+       res32->version[1] = res->version[1];
+       res32->version[2] = res->version[2];
+
        res32->user_astaddr = (__u32)(long)res->user_astaddr;
        res32->user_astparam = (__u32)(long)res->user_astparam;
        res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
 }
 #endif
 
+/* Figure out if this lock is at the end of its life and no longer
+   available for the application to use.  The lkb still exists until
+   the final ast is read.  A lock becomes EOL in three situations:
+     1. a noqueue request fails with EAGAIN
+     2. an unlock completes with EUNLOCK
+     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+   An EOL lock needs to be removed from the process's list of locks.
+   And we can't allow any new operation on an EOL lock.  This is
+   not related to the lifetime of the lkb struct which is managed
+   entirely by refcount. */
+
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+       switch (sb_status) {
+       case -DLM_EUNLOCK:
+               return 1;
+       case -DLM_ECANCEL:
+       case -ETIMEDOUT:
+       case -EDEADLK:
+               if (lkb->lkb_grmode == DLM_LOCK_IV)
+                       return 1;
+               break;
+       case -EAGAIN:
+               if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+                       return 1;
+               break;
+       }
+       return 0;
+}
+
 /* we could possibly check if the cancel of an orphan has resulted in the lkb
    being removed and then remove that lkb from the orphans list and free it */
 
@@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
                log_debug(ls, "ast overlap %x status %x %x",
                          lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
 
-       /* Figure out if this lock is at the end of its life and no longer
-          available for the application to use.  The lkb still exists until
-          the final ast is read.  A lock becomes EOL in three situations:
-            1. a noqueue request fails with EAGAIN
-            2. an unlock completes with EUNLOCK
-            3. a cancel of a waiting request completes with ECANCEL
-          An EOL lock needs to be removed from the process's list of locks.
-          And we can't allow any new operation on an EOL lock.  This is
-          not related to the lifetime of the lkb struct which is managed
-          entirely by refcount. */
-
-       if (type == AST_COMP &&
-           lkb->lkb_grmode == DLM_LOCK_IV &&
-           ua->lksb.sb_status == -EAGAIN)
-               eol = 1;
-       else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
-           (ua->lksb.sb_status == -DLM_ECANCEL &&
-            lkb->lkb_grmode == DLM_LOCK_IV))
-               eol = 1;
+       eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
        if (eol) {
                lkb->lkb_ast_type &= ~AST_BAST;
                lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
        ua->castaddr = params->castaddr;
        ua->bastparam = params->bastparam;
        ua->bastaddr = params->bastaddr;
+       ua->xid = params->xid;
 
        if (params->flags & DLM_LKF_CONVERT)
                error = dlm_user_convert(ls, ua,
                                         params->mode, params->flags,
-                                        params->lkid, params->lvb);
+                                        params->lkid, params->lvb,
+                                        (unsigned long) params->timeout);
        else {
                error = dlm_user_request(ls, ua,
                                         params->mode, params->flags,
                                         params->name, params->namelen,
-                                        params->parent);
+                                        (unsigned long) params->timeout);
                if (!error)
                        error = ua->lksb.sb_lkid;
        }
@@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
        return error;
 }
 
+static int device_user_deadlock(struct dlm_user_proc *proc,
+                               struct dlm_lock_params *params)
+{
+       struct dlm_ls *ls;
+       int error;
+
+       ls = dlm_find_lockspace_local(proc->lockspace);
+       if (!ls)
+               return -ENOENT;
+
+       error = dlm_user_deadlock(ls, params->flags, params->lkid);
+
+       dlm_put_lockspace(ls);
+       return error;
+}
+
 static int create_misc_device(struct dlm_ls *ls, char *name)
 {
        int error, len;
@@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
                return -EPERM;
 
        error = dlm_new_lockspace(params->name, strlen(params->name),
-                                 &lockspace, 0, DLM_USER_LVB_LEN);
+                                 &lockspace, params->flags, DLM_USER_LVB_LEN);
        if (error)
                return error;
 
@@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
                error = device_user_unlock(proc, &kbuf->i.lock);
                break;
 
+       case DLM_USER_DEADLOCK:
+               if (!proc) {
+                       log_print("no locking on control device");
+                       goto out_sig;
+               }
+               error = device_user_deadlock(proc, &kbuf->i.lock);
+               break;
+
        case DLM_USER_CREATE_LOCKSPACE:
                if (proc) {
                        log_print("create/remove only on control device");
@@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
        int struct_len;
 
        memset(&result, 0, sizeof(struct dlm_lock_result));
+       result.version[0] = DLM_DEVICE_VERSION_MAJOR;
+       result.version[1] = DLM_DEVICE_VERSION_MINOR;
+       result.version[2] = DLM_DEVICE_VERSION_PATCH;
        memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
        result.user_lksb = ua->user_lksb;
 
@@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
        return error;
 }
 
+static int copy_version_to_user(char __user *buf, size_t count)
+{
+       struct dlm_device_version ver;
+
+       memset(&ver, 0, sizeof(struct dlm_device_version));
+       ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
+       ver.version[1] = DLM_DEVICE_VERSION_MINOR;
+       ver.version[2] = DLM_DEVICE_VERSION_PATCH;
+
+       if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
+               return -EFAULT;
+       return sizeof(struct dlm_device_version);
+}
+
 /* a read returns a single ast described in a struct dlm_lock_result */
 
 static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
        DECLARE_WAITQUEUE(wait, current);
        int error, type=0, bmode=0, removed = 0;
 
+       if (count == sizeof(struct dlm_device_version)) {
+               error = copy_version_to_user(buf, count);
+               return error;
+       }
+
+       if (!proc) {
+               log_print("non-version read from control device %zu", count);
+               return -EINVAL;
+       }
+
 #ifdef CONFIG_COMPAT
        if (count < sizeof(struct dlm_lock_result32))
 #else
@@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
                }
        }
 
-       if (list_empty(&proc->asts)) {
-               spin_unlock(&proc->asts_spin);
-               return -EAGAIN;
-       }
-
        /* there may be both completion and blocking asts to return for
           the lkb, don't remove lkb from asts list unless no asts remain */
 
@@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
 static const struct file_operations ctl_device_fops = {
        .open    = ctl_device_open,
        .release = ctl_device_close,
+       .read    = device_read,
        .write   = device_write,
        .owner   = THIS_MODULE,
 };
index e3f1ada643ac110008cfdfdfe28f65d7e4374172..04ad0caebedb40ff302eaf56dbbdd90232d45c83 100644 (file)
@@ -1,7 +1,7 @@
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
        glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
-       mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
+       mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
        ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
        recovery.o rgrp.o super.o sys.o trans.o util.o
 
index c53a5d2d0590ea0a90fc38ff5d470df88736b57e..cd805a66880ddd3f24459be574bd868d29f2d801 100644 (file)
@@ -718,7 +718,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-               rg_blocks += rgd->rd_ri.ri_length;
+               rg_blocks += rgd->rd_length;
        }
 
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
@@ -772,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
                        gfs2_free_data(ip, bstart, blen);
        }
 
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
        gfs2_dinode_out(ip, dibh->b_data);
 
@@ -824,7 +824,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
                goto out_gunlock_q;
 
        error = gfs2_trans_begin(sdp,
-                       sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
+                       sdp->sd_max_height + al->al_rgd->rd_length +
                        RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
        if (error)
                goto out_ipres;
@@ -847,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
        }
 
        ip->i_di.di_size = size;
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (error)
@@ -885,7 +885,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
        unsigned blocksize, iblock, length, pos;
        struct buffer_head *bh;
        struct page *page;
-       void *kaddr;
        int err;
 
        page = grab_cache_page(mapping, index);
@@ -928,15 +927,13 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
                /* Uhhuh. Read error. Complain and punt. */
                if (!buffer_uptodate(bh))
                        goto unlock;
+               err = 0;
        }
 
        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
                gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
-       kaddr = kmap_atomic(page, KM_USER0);
-       memset(kaddr + offset, 0, length);
-       flush_dcache_page(page);
-       kunmap_atomic(kaddr, KM_USER0);
+       zero_user_page(page, offset, length, KM_USER0);
 
 unlock:
        unlock_page(page);
@@ -962,7 +959,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 
        if (gfs2_is_stuffed(ip)) {
                ip->i_di.di_size = size;
-               ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+               ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -974,7 +971,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
 
                if (!error) {
                        ip->i_di.di_size = size;
-                       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                        ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                        gfs2_dinode_out(ip, dibh->b_data);
@@ -1044,10 +1041,10 @@ static int trunc_end(struct gfs2_inode *ip)
                ip->i_di.di_height = 0;
                ip->i_di.di_goal_meta =
                        ip->i_di.di_goal_data =
-                       ip->i_num.no_addr;
+                       ip->i_no_addr;
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
        }
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
index 683cb5bda870fc372e79500c105ddf16c9ab93b9..3548d9f31e0d5c6918d74115a5fd0f9f2b133f48 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/freezer.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -49,6 +50,8 @@ int gfs2_scand(void *data)
        while (!kthread_should_stop()) {
                gfs2_scand_internal(sdp);
                t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
+               if (freezing(current))
+                       refrigerator();
                schedule_timeout_interruptible(t);
        }
 
@@ -74,6 +77,8 @@ int gfs2_glockd(void *data)
                wait_event_interruptible(sdp->sd_reclaim_wq,
                                         (atomic_read(&sdp->sd_reclaim_count) ||
                                         kthread_should_stop()));
+               if (freezing(current))
+                       refrigerator();
        }
 
        return 0;
@@ -93,6 +98,8 @@ int gfs2_recoverd(void *data)
        while (!kthread_should_stop()) {
                gfs2_check_journals(sdp);
                t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
+               if (freezing(current))
+                       refrigerator();
                schedule_timeout_interruptible(t);
        }
 
@@ -141,6 +148,8 @@ int gfs2_logd(void *data)
                }
 
                t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+               if (freezing(current))
+                       refrigerator();
                schedule_timeout_interruptible(t);
        }
 
@@ -191,6 +200,8 @@ int gfs2_quotad(void *data)
                gfs2_quota_scan(sdp);
 
                t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
+               if (freezing(current))
+                       refrigerator();
                schedule_timeout_interruptible(t);
        }
 
index a96fa07b3f3bfdd382b0e41c565bfb26be71f82e..2beb2f401aa24a2b23cf511d813d21db06e521b7 100644 (file)
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
        memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
        if (ip->i_di.di_size < offset + size)
                ip->i_di.di_size = offset + size;
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(ip, dibh->b_data);
 
        brelse(dibh);
@@ -228,7 +228,7 @@ out:
 
        if (ip->i_di.di_size < offset + copied)
                ip->i_di.di_size = offset + copied;
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
 
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
@@ -1456,7 +1456,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
                if (dip->i_di.di_entries != g.offset) {
                        fs_warn(sdp, "Number of entries corrupt in dir %llu, "
                                "ip->i_di.di_entries (%u) != g.offset (%u)\n",
-                               (unsigned long long)dip->i_num.no_addr,
+                               (unsigned long long)dip->i_no_addr,
                                dip->i_di.di_entries,
                                g.offset);
                        error = -EIO;
@@ -1488,24 +1488,55 @@ out:
  * Returns: errno
  */
 
-int gfs2_dir_search(struct inode *dir, const struct qstr *name,
-                   struct gfs2_inum_host *inum, unsigned int *type)
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
 {
        struct buffer_head *bh;
        struct gfs2_dirent *dent;
+       struct inode *inode;
+
+       dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
+       if (dent) {
+               if (IS_ERR(dent))
+                       return ERR_PTR(PTR_ERR(dent));
+               inode = gfs2_inode_lookup(dir->i_sb, 
+                               be16_to_cpu(dent->de_type),
+                               be64_to_cpu(dent->de_inum.no_addr),
+                               be64_to_cpu(dent->de_inum.no_formal_ino));
+               brelse(bh);
+               return inode;
+       }
+       return ERR_PTR(-ENOENT);
+}
+
+int gfs2_dir_check(struct inode *dir, const struct qstr *name,
+                  const struct gfs2_inode *ip)
+{
+       struct buffer_head *bh;
+       struct gfs2_dirent *dent;
+       int ret = -ENOENT;
 
        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
        if (dent) {
                if (IS_ERR(dent))
                        return PTR_ERR(dent);
-               if (inum)
-                       gfs2_inum_in(inum, (char *)&dent->de_inum);
-               if (type)
-                       *type = be16_to_cpu(dent->de_type);
+               if (ip) {
+                       if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
+                               goto out;
+                       if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
+                           ip->i_no_formal_ino)
+                               goto out;
+                       if (unlikely(IF2DT(ip->i_inode.i_mode) !=
+                           be16_to_cpu(dent->de_type))) {
+                               gfs2_consist_inode(GFS2_I(dir));
+                               ret = -EIO;
+                               goto out;
+                       }
+               }
+               ret = 0;
+out:
                brelse(bh);
-               return 0;
        }
-       return -ENOENT;
+       return ret;
 }
 
 static int dir_new_leaf(struct inode *inode, const struct qstr *name)
@@ -1565,7 +1596,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
  */
 
 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-                const struct gfs2_inum_host *inum, unsigned type)
+                const struct gfs2_inode *nip, unsigned type)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct buffer_head *bh;
@@ -1580,7 +1611,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                        if (IS_ERR(dent))
                                return PTR_ERR(dent);
                        dent = gfs2_init_dirent(inode, dent, name, bh);
-                       gfs2_inum_out(inum, (char *)&dent->de_inum);
+                       gfs2_inum_out(nip, dent);
                        dent->de_type = cpu_to_be16(type);
                        if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
                                leaf = (struct gfs2_leaf *)bh->b_data;
@@ -1592,7 +1623,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                                break;
                        gfs2_trans_add_bh(ip->i_gl, bh, 1);
                        ip->i_di.di_entries++;
-                       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                       ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                        gfs2_dinode_out(ip, bh->b_data);
                        brelse(bh);
                        error = 0;
@@ -1678,7 +1709,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
                gfs2_consist_inode(dip);
        gfs2_trans_add_bh(dip->i_gl, bh, 1);
        dip->i_di.di_entries--;
-       dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(dip, bh->b_data);
        brelse(bh);
        mark_inode_dirty(&dip->i_inode);
@@ -1700,7 +1731,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
  */
 
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-                  struct gfs2_inum_host *inum, unsigned int new_type)
+                  const struct gfs2_inode *nip, unsigned int new_type)
 {
        struct buffer_head *bh;
        struct gfs2_dirent *dent;
@@ -1715,7 +1746,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
                return PTR_ERR(dent);
 
        gfs2_trans_add_bh(dip->i_gl, bh, 1);
-       gfs2_inum_out(inum, (char *)&dent->de_inum);
+       gfs2_inum_out(nip, dent);
        dent->de_type = cpu_to_be16(new_type);
 
        if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
@@ -1726,7 +1757,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
                gfs2_trans_add_bh(dip->i_gl, bh, 1);
        }
 
-       dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(dip, bh->b_data);
        brelse(bh);
        return 0;
@@ -1867,7 +1898,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-               rg_blocks += rgd->rd_ri.ri_length;
+               rg_blocks += rgd->rd_length;
        }
 
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
index 48fe89046bbad39af0277bb3a38d0e1f7f5de813..8a468cac9328abaa4a3b0444bf553caddcdac1ed 100644 (file)
@@ -16,15 +16,16 @@ struct inode;
 struct gfs2_inode;
 struct gfs2_inum;
 
-int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
-                   struct gfs2_inum_host *inum, unsigned int *type);
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+                  const struct gfs2_inode *ip);
 int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-                const struct gfs2_inum_host *inum, unsigned int type);
+                const struct gfs2_inode *ip, unsigned int type);
 int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
                  filldir_t filldir);
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-                  struct gfs2_inum_host *new_inum, unsigned int new_type);
+                  const struct gfs2_inode *nip, unsigned int new_type);
 
 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
 
index 5b83ca6acab1e5f93829722844a7333f43c305fd..2a7435b5c4dc54442fd464d2053742bea923a56d 100644 (file)
@@ -254,7 +254,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        if (error)
                return error;
 
-       error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE +
+       error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
                                 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
        if (error)
                goto out_gunlock;
@@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
 
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
-               ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+               ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -700,7 +700,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                goto out_gunlock_q;
 
        error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
-                                blks + al->al_rgd->rd_ri.ri_length +
+                                blks + al->al_rgd->rd_length +
                                 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
        if (error)
                goto out_ipres;
@@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                                            (er->er_mode & S_IFMT));
                        ip->i_inode.i_mode = er->er_mode;
                }
-               ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+               ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
                        (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
                ip->i_inode.i_mode = er->er_mode;
        }
-       ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
 
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
-               ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+               ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-               rg_blocks += rgd->rd_ri.ri_length;
+               rg_blocks += rgd->rd_length;
        }
 
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
index 1815429a29789c7c507752cbb1be118b9954c832..3f0974e1afef8bbd2b706a418f616f38e2817f71 100644 (file)
@@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 static void gfs2_holder_wake(struct gfs2_holder *gh)
 {
        clear_bit(HIF_WAIT, &gh->gh_iflags);
-       smp_mb();
+       smp_mb__after_clear_bit();
        wake_up_bit(&gh->gh_iflags, HIF_WAIT);
 }
 
-static int holder_wait(void *word)
+static int just_schedule(void *word)
 {
         schedule();
         return 0;
@@ -435,7 +435,20 @@ static int holder_wait(void *word)
 static void wait_on_holder(struct gfs2_holder *gh)
 {
        might_sleep();
-       wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+       wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+}
+
+static void gfs2_demote_wake(struct gfs2_glock *gl)
+{
+        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+
+static void wait_on_demote(struct gfs2_glock *gl)
+{
+       might_sleep();
+       wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
 }
 
 /**
@@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl)
 
        if (gl->gl_state == gl->gl_demote_state ||
            gl->gl_state == LM_ST_UNLOCKED) {
-               clear_bit(GLF_DEMOTE, &gl->gl_flags);
+               gfs2_demote_wake(gl);
                return 0;
        }
        set_bit(GLF_LOCK, &gl->gl_flags);
@@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
  */
 
-static void handle_callback(struct gfs2_glock *gl, unsigned int state)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
 {
        spin_lock(&gl->gl_spin);
        if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
                gl->gl_demote_state = state;
                gl->gl_demote_time = jiffies;
+               if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
+                   gl->gl_object) {
+                       struct inode *inode = igrab(gl->gl_object);
+                       spin_unlock(&gl->gl_spin);
+                       if (inode) {
+                               d_prune_aliases(inode);
+                               iput(inode);
+                       }
+                       return;
+               }
        } else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
                gl->gl_demote_state = state;
        }
@@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
                if (ret & LM_OUT_CANCELED)
                        op_done = 0;
                else
-                       clear_bit(GLF_DEMOTE, &gl->gl_flags);
+                       gfs2_demote_wake(gl);
        } else {
                spin_lock(&gl->gl_spin);
                list_del_init(&gh->gh_list);
@@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
        gfs2_assert_warn(sdp, !ret);
 
        state_change(gl, LM_ST_UNLOCKED);
-       clear_bit(GLF_DEMOTE, &gl->gl_flags);
+       gfs2_demote_wake(gl);
 
        if (glops->go_inval)
                glops->go_inval(gl, DIO_METADATA);
@@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        const struct gfs2_glock_operations *glops = gl->gl_ops;
 
        if (gh->gh_flags & GL_NOCACHE)
-               handle_callback(gl, LM_ST_UNLOCKED);
+               handle_callback(gl, LM_ST_UNLOCKED, 0);
 
        gfs2_glmutex_lock(gl);
 
@@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        spin_unlock(&gl->gl_spin);
 }
 
+void gfs2_glock_dq_wait(struct gfs2_holder *gh)
+{
+       struct gfs2_glock *gl = gh->gh_gl;
+       gfs2_glock_dq(gh);
+       wait_on_demote(gl);
+}
+
 /**
  * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
  * @gh: the holder structure
@@ -1297,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
  * @num_gh: the number of structures
  * @ghs: an array of struct gfs2_holder structures
  *
- * Figure out how big an impact this function has.  Either:
- * 1) Replace this code with code that calls gfs2_glock_prefetch()
- * 2) Forget async stuff and just call nq_m_sync()
- * 3) Leave it like it is
  *
  * Returns: 0 on success (all glocks acquired),
  *          errno on failure (no glocks acquired)
@@ -1308,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
 
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 {
-       int *e;
-       unsigned int x;
-       int borked = 0, serious = 0;
+       struct gfs2_holder *tmp[4];
+       struct gfs2_holder **pph = tmp;
        int error = 0;
 
-       if (!num_gh)
+       switch(num_gh) {
+       case 0:
                return 0;
-
-       if (num_gh == 1) {
+       case 1:
                ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
                return gfs2_glock_nq(ghs);
-       }
-
-       e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
-       if (!e)
-               return -ENOMEM;
-
-       for (x = 0; x < num_gh; x++) {
-               ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
-               error = gfs2_glock_nq(&ghs[x]);
-               if (error) {
-                       borked = 1;
-                       serious = error;
-                       num_gh = x;
+       default:
+               if (num_gh <= 4)
                        break;
-               }
-       }
-
-       for (x = 0; x < num_gh; x++) {
-               error = e[x] = glock_wait_internal(&ghs[x]);
-               if (error) {
-                       borked = 1;
-                       if (error != GLR_TRYFAILED && error != GLR_CANCELED)
-                               serious = error;
-               }
+               pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
+               if (!pph)
+                       return -ENOMEM;
        }
 
-       if (!borked) {
-               kfree(e);
-               return 0;
-       }
-
-       for (x = 0; x < num_gh; x++)
-               if (!e[x])
-                       gfs2_glock_dq(&ghs[x]);
-
-       if (serious)
-               error = serious;
-       else {
-               for (x = 0; x < num_gh; x++)
-                       gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
-                                         &ghs[x]);
-               error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
-       }
+       error = nq_m_sync(num_gh, ghs, pph);
 
-       kfree(e);
+       if (pph != tmp)
+               kfree(pph);
 
        return error;
 }
@@ -1456,7 +1448,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
        if (!gl)
                return;
 
-       handle_callback(gl, state);
+       handle_callback(gl, state, 1);
 
        spin_lock(&gl->gl_spin);
        run_queue(gl);
@@ -1596,7 +1588,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
        if (gfs2_glmutex_trylock(gl)) {
                if (list_empty(&gl->gl_holders) &&
                    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-                       handle_callback(gl, LM_ST_UNLOCKED);
+                       handle_callback(gl, LM_ST_UNLOCKED, 0);
                gfs2_glmutex_unlock(gl);
        }
 
@@ -1709,7 +1701,7 @@ static void clear_glock(struct gfs2_glock *gl)
        if (gfs2_glmutex_trylock(gl)) {
                if (list_empty(&gl->gl_holders) &&
                    gl->gl_state != LM_ST_UNLOCKED)
-                       handle_callback(gl, LM_ST_UNLOCKED);
+                       handle_callback(gl, LM_ST_UNLOCKED, 0);
                gfs2_glmutex_unlock(gl);
        }
 }
@@ -1823,7 +1815,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
 
        print_dbg(gi, "  Inode:\n");
        print_dbg(gi, "    num = %llu/%llu\n",
-                   ip->i_num.no_formal_ino, ip->i_num.no_addr);
+                 (unsigned long long)ip->i_no_formal_ino,
+                 (unsigned long long)ip->i_no_addr);
        print_dbg(gi, "    type = %u\n", IF2DT(ip->i_inode.i_mode));
        print_dbg(gi, "    i_flags =");
        for (x = 0; x < 32; x++)
@@ -1909,8 +1902,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
        }
        if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
                print_dbg(gi, "  Demotion req to state %u (%llu uS ago)\n",
-                         gl->gl_demote_state,
-                         (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ));
+                         gl->gl_demote_state, (unsigned long long)
+                         (jiffies - gl->gl_demote_time)*(1000000/HZ));
        }
        if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
                if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
index b3e152db70c8b9b993bd7a03c845495967cf37b4..7721ca3fff9eee2c4789d141f209949a819f21e2 100644 (file)
@@ -87,6 +87,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh);
 int gfs2_glock_poll(struct gfs2_holder *gh);
 int gfs2_glock_wait(struct gfs2_holder *gh);
 void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);
 
 void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
 int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
index 7b82657a991096bea7291522f2bd357a5213d24e..777ca46010e875907a49d5972f937cf5f83cdba1 100644 (file)
@@ -156,9 +156,9 @@ static void inode_go_sync(struct gfs2_glock *gl)
                ip = NULL;
 
        if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-               gfs2_log_flush(gl->gl_sbd, gl);
                if (ip)
                        filemap_fdatawrite(ip->i_inode.i_mapping);
+               gfs2_log_flush(gl->gl_sbd, gl);
                gfs2_meta_sync(gl);
                if (ip) {
                        struct address_space *mapping = ip->i_inode.i_mapping;
index d995441373abb5132dc4baf792a9ef14c27b985e..170ba93829c037881996c944fc8ae02adfcd2c52 100644 (file)
@@ -28,6 +28,14 @@ struct gfs2_sbd;
 
 typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
 
+struct gfs2_log_header_host {
+       u64 lh_sequence;        /* Sequence number of this transaction */
+       u32 lh_flags;           /* GFS2_LOG_HEAD_... */
+       u32 lh_tail;            /* Block number of log tail */
+       u32 lh_blkno;
+       u32 lh_hash;
+};
+
 /*
  * Structure of operations that are associated with each
  * type of element in the log.
@@ -60,12 +68,23 @@ struct gfs2_bitmap {
        u32 bi_len;
 };
 
+struct gfs2_rgrp_host {
+       u32 rg_flags;
+       u32 rg_free;
+       u32 rg_dinodes;
+       u64 rg_igeneration;
+};
+
 struct gfs2_rgrpd {
        struct list_head rd_list;       /* Link with superblock */
        struct list_head rd_list_mru;
        struct list_head rd_recent;     /* Recently used rgrps */
        struct gfs2_glock *rd_gl;       /* Glock for this rgrp */
-       struct gfs2_rindex_host rd_ri;
+       u64 rd_addr;                    /* grp block disk address */
+       u64 rd_data0;                   /* first data location */
+       u32 rd_length;                  /* length of rgrp header in fs blocks */
+       u32 rd_data;                    /* num of data blocks in rgrp */
+       u32 rd_bitbytes;                /* number of bytes in data bitmaps */
        struct gfs2_rgrp_host rd_rg;
        u64 rd_rg_vn;
        struct gfs2_bitmap *rd_bits;
@@ -76,6 +95,8 @@ struct gfs2_rgrpd {
        u32 rd_last_alloc_data;
        u32 rd_last_alloc_meta;
        struct gfs2_sbd *rd_sbd;
+       unsigned long rd_flags;
+#define GFS2_RDF_CHECK        0x0001          /* Need to check for unlinked inodes */
 };
 
 enum gfs2_state_bits {
@@ -211,10 +232,24 @@ enum {
        GIF_SW_PAGED            = 3,
 };
 
+struct gfs2_dinode_host {
+       u64 di_size;            /* number of bytes in file */
+       u64 di_blocks;          /* number of blocks in file */
+       u64 di_goal_meta;       /* rgrp to alloc from next */
+       u64 di_goal_data;       /* data block goal */
+       u64 di_generation;      /* generation number for NFS */
+       u32 di_flags;           /* GFS2_DIF_... */
+       u16 di_height;          /* height of metadata */
+       /* These only apply to directories  */
+       u16 di_depth;           /* Number of bits in the table */
+       u32 di_entries;         /* The number of entries in the directory */
+       u64 di_eattr;           /* extended attribute block number */
+};
+
 struct gfs2_inode {
        struct inode i_inode;
-       struct gfs2_inum_host i_num;
-
+       u64 i_no_addr;
+       u64 i_no_formal_ino;
        unsigned long i_flags;          /* GIF_... */
 
        struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
@@ -275,14 +310,6 @@ enum {
        QDF_LOCKED              = 2,
 };
 
-struct gfs2_quota_lvb {
-        __be32 qb_magic;
-        u32 __pad;
-        __be64 qb_limit;      /* Hard limit of # blocks to alloc */
-        __be64 qb_warn;       /* Warn user when alloc is above this # */
-        __be64 qb_value;       /* Current # blocks allocated */
-};
-
 struct gfs2_quota_data {
        struct list_head qd_list;
        unsigned int qd_count;
@@ -327,7 +354,9 @@ struct gfs2_trans {
 
        unsigned int tr_num_buf;
        unsigned int tr_num_buf_new;
+       unsigned int tr_num_databuf_new;
        unsigned int tr_num_buf_rm;
+       unsigned int tr_num_databuf_rm;
        struct list_head tr_list_buf;
 
        unsigned int tr_num_revoke;
@@ -354,6 +383,12 @@ struct gfs2_jdesc {
        unsigned int jd_blocks;
 };
 
+struct gfs2_statfs_change_host {
+       s64 sc_total;
+       s64 sc_free;
+       s64 sc_dinodes;
+};
+
 #define GFS2_GLOCKD_DEFAULT    1
 #define GFS2_GLOCKD_MAX                16
 
@@ -426,6 +461,28 @@ enum {
 
 #define GFS2_FSNAME_LEN                256
 
+struct gfs2_inum_host {
+       u64 no_formal_ino;
+       u64 no_addr;
+};
+
+struct gfs2_sb_host {
+       u32 sb_magic;
+       u32 sb_type;
+       u32 sb_format;
+
+       u32 sb_fs_format;
+       u32 sb_multihost_format;
+       u32 sb_bsize;
+       u32 sb_bsize_shift;
+
+       struct gfs2_inum_host sb_master_dir;
+       struct gfs2_inum_host sb_root_dir;
+
+       char sb_lockproto[GFS2_LOCKNAME_LEN];
+       char sb_locktable[GFS2_LOCKNAME_LEN];
+};
+
 struct gfs2_sbd {
        struct super_block *sd_vfs;
        struct super_block *sd_vfs_meta;
@@ -544,6 +601,7 @@ struct gfs2_sbd {
 
        unsigned int sd_log_blks_reserved;
        unsigned int sd_log_commited_buf;
+       unsigned int sd_log_commited_databuf;
        unsigned int sd_log_commited_revoke;
 
        unsigned int sd_log_num_gl;
@@ -552,7 +610,6 @@ struct gfs2_sbd {
        unsigned int sd_log_num_rg;
        unsigned int sd_log_num_databuf;
        unsigned int sd_log_num_jdata;
-       unsigned int sd_log_num_hdrs;
 
        struct list_head sd_log_le_gl;
        struct list_head sd_log_le_buf;
index df0b8b3018b934e72a30830b5a2936498c969be9..34f7bcdea1e972e00187013eddd98272d65a40f0 100644 (file)
 #include "trans.h"
 #include "util.h"
 
+struct gfs2_inum_range_host {
+       u64 ir_start;
+       u64 ir_length;
+};
+
 static int iget_test(struct inode *inode, void *opaque)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-       struct gfs2_inum_host *inum = opaque;
+       u64 *no_addr = opaque;
 
-       if (ip->i_num.no_addr == inum->no_addr &&
+       if (ip->i_no_addr == *no_addr &&
            inode->i_private != NULL)
                return 1;
 
@@ -53,37 +58,70 @@ static int iget_test(struct inode *inode, void *opaque)
 static int iget_set(struct inode *inode, void *opaque)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-       struct gfs2_inum_host *inum = opaque;
+       u64 *no_addr = opaque;
 
-       ip->i_num = *inum;
-       inode->i_ino = inum->no_addr;
+       inode->i_ino = (unsigned long)*no_addr;
+       ip->i_no_addr = *no_addr;
        return 0;
 }
 
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+{
+       unsigned long hash = (unsigned long)no_addr;
+       return ilookup5(sb, hash, iget_test, &no_addr);
+}
+
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
 {
-       return ilookup5(sb, (unsigned long)inum->no_addr,
-                       iget_test, inum);
+       unsigned long hash = (unsigned long)no_addr;
+       return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
 }
 
-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
+/**
+ * GFS2 lookup code fills in vfs inode contents based on info obtained
+ * from directory entry inside gfs2_inode_lookup(). This has caused issues
+ * with NFS code path since its get_dentry routine doesn't have the relevant
+ * directory entry when gfs2_inode_lookup() is invoked. Part of the code
+ * segment inside gfs2_inode_lookup code needs to get moved around.
+ *
+ * Clean up I_LOCK and I_NEW as well.
+ **/
+
+void gfs2_set_iop(struct inode *inode)
 {
-       return iget5_locked(sb, (unsigned long)inum->no_addr,
-                    iget_test, iget_set, inum);
+       umode_t mode = inode->i_mode;
+
+       if (S_ISREG(mode)) {
+               inode->i_op = &gfs2_file_iops;
+               inode->i_fop = &gfs2_file_fops;
+               inode->i_mapping->a_ops = &gfs2_file_aops;
+       } else if (S_ISDIR(mode)) {
+               inode->i_op = &gfs2_dir_iops;
+               inode->i_fop = &gfs2_dir_fops;
+       } else if (S_ISLNK(mode)) {
+               inode->i_op = &gfs2_symlink_iops;
+       } else {
+               inode->i_op = &gfs2_dev_iops;
+       }
+
+       unlock_new_inode(inode);
 }
 
 /**
  * gfs2_inode_lookup - Lookup an inode
  * @sb: The super block
- * @inum: The inode number
+ * @no_addr: The inode number
  * @type: The type of the inode
  *
  * Returns: A VFS inode, or an error
  */
 
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb, 
+                               unsigned int type,
+                               u64 no_addr,
+                               u64 no_formal_ino)
 {
-       struct inode *inode = gfs2_iget(sb, inum);
+       struct inode *inode = gfs2_iget(sb, no_addr);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_glock *io_gl;
        int error;
@@ -93,29 +131,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
 
        if (inode->i_state & I_NEW) {
                struct gfs2_sbd *sdp = GFS2_SB(inode);
-               umode_t mode = DT2IF(type);
                inode->i_private = ip;
-               inode->i_mode = mode;
-
-               if (S_ISREG(mode)) {
-                       inode->i_op = &gfs2_file_iops;
-                       inode->i_fop = &gfs2_file_fops;
-                       inode->i_mapping->a_ops = &gfs2_file_aops;
-               } else if (S_ISDIR(mode)) {
-                       inode->i_op = &gfs2_dir_iops;
-                       inode->i_fop = &gfs2_dir_fops;
-               } else if (S_ISLNK(mode)) {
-                       inode->i_op = &gfs2_symlink_iops;
-               } else {
-                       inode->i_op = &gfs2_dev_iops;
-               }
+               ip->i_no_formal_ino = no_formal_ino;
 
-               error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
+               error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
                if (unlikely(error))
                        goto fail;
                ip->i_gl->gl_object = ip;
 
-               error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
+               error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
                if (unlikely(error))
                        goto fail_put;
 
@@ -123,12 +147,38 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
                error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
                if (unlikely(error))
                        goto fail_iopen;
+               ip->i_iopen_gh.gh_gl->gl_object = ip;
 
                gfs2_glock_put(io_gl);
-               unlock_new_inode(inode);
+
+               if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
+                       goto gfs2_nfsbypass;
+
+               inode->i_mode = DT2IF(type);
+
+               /*
+                * We must read the inode in order to work out its type in
+                * this case. Note that this doesn't happen often as we normally
+                * know the type beforehand. This code path only occurs during
+                * unlinked inode recovery (where it is safe to do this glock,
+                * which is not true in the general case).
+                */
+               if (type == DT_UNKNOWN) {
+                       struct gfs2_holder gh;
+                       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+                       if (unlikely(error))
+                               goto fail_glock;
+                       /* Inode is now uptodate */
+                       gfs2_glock_dq_uninit(&gh);
+               }
+
+               gfs2_set_iop(inode);
        }
 
+gfs2_nfsbypass:
        return inode;
+fail_glock:
+       gfs2_glock_dq(&ip->i_iopen_gh);
 fail_iopen:
        gfs2_glock_put(io_gl);
 fail_put:
@@ -144,14 +194,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        struct gfs2_dinode_host *di = &ip->i_di;
        const struct gfs2_dinode *str = buf;
 
-       if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+       if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) {
                if (gfs2_consist_inode(ip))
                        gfs2_dinode_print(ip);
                return -EIO;
        }
-       if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
-               return -ESTALE;
-
+       ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
        ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
        ip->i_inode.i_rdev = 0;
        switch (ip->i_inode.i_mode & S_IFMT) {
@@ -175,11 +223,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        di->di_blocks = be64_to_cpu(str->di_blocks);
        gfs2_set_inode_blocks(&ip->i_inode);
        ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
-       ip->i_inode.i_atime.tv_nsec = 0;
+       ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
        ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-       ip->i_inode.i_mtime.tv_nsec = 0;
+       ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
        ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-       ip->i_inode.i_ctime.tv_nsec = 0;
+       ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
 
        di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
        di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -247,7 +295,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
        if (error)
                goto out_qs;
 
-       rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+       rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        if (!rgd) {
                gfs2_consist_inode(ip);
                error = -EIO;
@@ -314,7 +362,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
        else
                drop_nlink(&ip->i_inode);
 
-       ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+       ip->i_inode.i_ctime = CURRENT_TIME;
 
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
@@ -366,9 +414,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
        struct super_block *sb = dir->i_sb;
        struct gfs2_inode *dip = GFS2_I(dir);
        struct gfs2_holder d_gh;
-       struct gfs2_inum_host inum;
-       unsigned int type;
-       int error;
+       int error = 0;
        struct inode *inode = NULL;
        int unlock = 0;
 
@@ -395,12 +441,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
                        goto out;
        }
 
-       error = gfs2_dir_search(dir, name, &inum, &type);
-       if (error)
-               goto out;
-
-       inode = gfs2_inode_lookup(sb, &inum, type);
-
+       inode = gfs2_dir_search(dir, name);
+       if (IS_ERR(inode))
+               error = PTR_ERR(inode);
 out:
        if (unlock)
                gfs2_glock_dq_uninit(&d_gh);
@@ -409,6 +452,22 @@ out:
        return inode ? inode : ERR_PTR(error);
 }
 
+static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
+{
+       const struct gfs2_inum_range *str = buf;
+
+       ir->ir_start = be64_to_cpu(str->ir_start);
+       ir->ir_length = be64_to_cpu(str->ir_length);
+}
+
+static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
+{
+       struct gfs2_inum_range *str = buf;
+
+       str->ir_start = cpu_to_be64(ir->ir_start);
+       str->ir_length = cpu_to_be64(ir->ir_length);
+}
+
 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
 {
        struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
@@ -548,7 +607,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
        if (!dip->i_inode.i_nlink)
                return -EPERM;
 
-       error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
+       error = gfs2_dir_check(&dip->i_inode, name, NULL);
        switch (error) {
        case -ENOENT:
                error = 0;
@@ -588,8 +647,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
                *gid = current->fsgid;
 }
 
-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
-                       u64 *generation)
+static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        int error;
@@ -605,7 +663,7 @@ static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
        if (error)
                goto out_ipreserv;
 
-       inum->no_addr = gfs2_alloc_di(dip, generation);
+       *no_addr = gfs2_alloc_di(dip, generation);
 
        gfs2_trans_end(sdp);
 
@@ -635,6 +693,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct gfs2_dinode *di;
        struct buffer_head *dibh;
+       struct timespec tv = CURRENT_TIME;
 
        dibh = gfs2_meta_new(gl, inum->no_addr);
        gfs2_trans_add_bh(gl, dibh, 1);
@@ -650,7 +709,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_nlink = 0;
        di->di_size = 0;
        di->di_blocks = cpu_to_be64(1);
-       di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
+       di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
        di->di_major = cpu_to_be32(MAJOR(dev));
        di->di_minor = cpu_to_be32(MINOR(dev));
        di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
@@ -680,6 +739,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_entries = 0;
        memset(&di->__pad4, 0, sizeof(di->__pad4));
        di->di_eattr = 0;
+       di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
+       di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
+       di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
        memset(&di->di_reserved, 0, sizeof(di->di_reserved));
 
        brelse(dibh);
@@ -749,7 +811,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                        goto fail_quota_locks;
 
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                        al->al_rgd->rd_ri.ri_length +
+                                        al->al_rgd->rd_length +
                                         2 * RES_DINODE +
                                         RES_STATFS + RES_QUOTA, 0);
                if (error)
@@ -760,7 +822,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                        goto fail_quota_locks;
        }
 
-       error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
+       error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
        if (error)
                goto fail_end_trans;
 
@@ -840,11 +902,11 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
                           unsigned int mode, dev_t dev)
 {
-       struct inode *inode;
+       struct inode *inode = NULL;
        struct gfs2_inode *dip = ghs->gh_gl->gl_object;
        struct inode *dir = &dip->i_inode;
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-       struct gfs2_inum_host inum;
+       struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
        int error;
        u64 generation;
 
@@ -864,7 +926,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (error)
                goto fail_gunlock;
 
-       error = alloc_dinode(dip, &inum, &generation);
+       error = alloc_dinode(dip, &inum.no_addr, &generation);
        if (error)
                goto fail_gunlock;
 
@@ -877,34 +939,36 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (error)
                goto fail_gunlock2;
 
-       inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode));
+       inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
+                                       inum.no_addr,
+                                       inum.no_formal_ino);
        if (IS_ERR(inode))
                goto fail_gunlock2;
 
        error = gfs2_inode_refresh(GFS2_I(inode));
        if (error)
-               goto fail_iput;
+               goto fail_gunlock2;
 
        error = gfs2_acl_create(dip, GFS2_I(inode));
        if (error)
-               goto fail_iput;
+               goto fail_gunlock2;
 
        error = gfs2_security_init(dip, GFS2_I(inode));
        if (error)
-               goto fail_iput;
+               goto fail_gunlock2;
 
        error = link_dinode(dip, name, GFS2_I(inode));
        if (error)
-               goto fail_iput;
+               goto fail_gunlock2;
 
        if (!inode)
                return ERR_PTR(-ENOMEM);
        return inode;
 
-fail_iput:
-       iput(inode);
 fail_gunlock2:
        gfs2_glock_dq_uninit(ghs + 1);
+       if (inode)
+               iput(inode);
 fail_gunlock:
        gfs2_glock_dq(ghs);
 fail:
@@ -976,10 +1040,8 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
  */
 
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-                  struct gfs2_inode *ip)
+                  const struct gfs2_inode *ip)
 {
-       struct gfs2_inum_host inum;
-       unsigned int type;
        int error;
 
        if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
@@ -997,18 +1059,10 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
        if (error)
                return error;
 
-       error = gfs2_dir_search(&dip->i_inode, name, &inum, &type);
+       error = gfs2_dir_check(&dip->i_inode, name, ip);
        if (error)
                return error;
 
-       if (!gfs2_inum_equal(&inum, &ip->i_num))
-               return -ENOENT;
-
-       if (IF2DT(ip->i_inode.i_mode) != type) {
-               gfs2_consist_inode(dip);
-               return -EIO;
-       }
-
        return 0;
 }
 
@@ -1132,10 +1186,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
        struct gfs2_glock *gl = gh->gh_gl;
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct gfs2_inode *ip = gl->gl_object;
-       s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
+       s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
        unsigned int state;
        int flags;
        int error;
+       struct timespec tv = CURRENT_TIME;
 
        if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
            gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
@@ -1153,8 +1208,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
            (sdp->sd_vfs->s_flags & MS_RDONLY))
                return 0;
 
-       curtime = get_seconds();
-       if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+       if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
                gfs2_glock_dq(gh);
                gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
                                   gh);
@@ -1165,8 +1219,8 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
                /* Verify that atime hasn't been updated while we were
                   trying to get exclusive lock. */
 
-               curtime = get_seconds();
-               if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+               tv = CURRENT_TIME;
+               if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
                        struct buffer_head *dibh;
                        struct gfs2_dinode *di;
 
@@ -1180,11 +1234,12 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
                        if (error)
                                goto fail_end_trans;
 
-                       ip->i_inode.i_atime.tv_sec = curtime;
+                       ip->i_inode.i_atime = tv;
 
                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                        di = (struct gfs2_dinode *)dibh->b_data;
                        di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+                       di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
                        brelse(dibh);
 
                        gfs2_trans_end(sdp);
@@ -1252,3 +1307,66 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
        return error;
 }
 
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+       const struct gfs2_dinode_host *di = &ip->i_di;
+       struct gfs2_dinode *str = buf;
+
+       str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+       str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+       str->di_header.__pad0 = 0;
+       str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+       str->di_header.__pad1 = 0;
+       str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+       str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+       str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+       str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+       str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+       str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+       str->di_size = cpu_to_be64(di->di_size);
+       str->di_blocks = cpu_to_be64(di->di_blocks);
+       str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+       str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+       str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+
+       str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
+       str->di_goal_data = cpu_to_be64(di->di_goal_data);
+       str->di_generation = cpu_to_be64(di->di_generation);
+
+       str->di_flags = cpu_to_be32(di->di_flags);
+       str->di_height = cpu_to_be16(di->di_height);
+       str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+                                            !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+                                            GFS2_FORMAT_DE : 0);
+       str->di_depth = cpu_to_be16(di->di_depth);
+       str->di_entries = cpu_to_be32(di->di_entries);
+
+       str->di_eattr = cpu_to_be64(di->di_eattr);
+       str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+       str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+       str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
+
+void gfs2_dinode_print(const struct gfs2_inode *ip)
+{
+       const struct gfs2_dinode_host *di = &ip->i_di;
+
+       printk(KERN_INFO "  no_formal_ino = %llu\n",
+              (unsigned long long)ip->i_no_formal_ino);
+       printk(KERN_INFO "  no_addr = %llu\n",
+              (unsigned long long)ip->i_no_addr);
+       printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
+       printk(KERN_INFO "  di_blocks = %llu\n",
+              (unsigned long long)di->di_blocks);
+       printk(KERN_INFO "  di_goal_meta = %llu\n",
+              (unsigned long long)di->di_goal_meta);
+       printk(KERN_INFO "  di_goal_data = %llu\n",
+              (unsigned long long)di->di_goal_data);
+       printk(KERN_INFO "  di_flags = 0x%.8X\n", di->di_flags);
+       printk(KERN_INFO "  di_height = %u\n", di->di_height);
+       printk(KERN_INFO "  di_depth = %u\n", di->di_depth);
+       printk(KERN_INFO "  di_entries = %u\n", di->di_entries);
+       printk(KERN_INFO "  di_eattr = %llu\n",
+              (unsigned long long)di->di_eattr);
+}
+
index b57f448b15bc42e09c18c2889fe866bdc2efacc6..4517ac82c01c7953b75bc65375e45328262b86dc 100644 (file)
 #ifndef __INODE_DOT_H__
 #define __INODE_DOT_H__
 
-static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
+static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 {
        return !ip->i_di.di_height;
 }
 
-static inline int gfs2_is_jdata(struct gfs2_inode *ip)
+static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
 {
        return ip->i_di.di_flags & GFS2_DIF_JDATA;
 }
 
-static inline int gfs2_is_dir(struct gfs2_inode *ip)
+static inline int gfs2_is_dir(const struct gfs2_inode *ip)
 {
        return S_ISDIR(ip->i_inode.i_mode);
 }
@@ -32,9 +32,25 @@ static inline void gfs2_set_inode_blocks(struct inode *inode)
                (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
 }
 
+static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
+                                 u64 no_formal_ino)
+{
+       return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
+}
+
+static inline void gfs2_inum_out(const struct gfs2_inode *ip,
+                                struct gfs2_dirent *dent)
+{
+       dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+       dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
+}
+
+
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
+void gfs2_set_iop(struct inode *inode);
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
+                               u64 no_addr, u64 no_formal_ino);
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 
 int gfs2_inode_refresh(struct gfs2_inode *ip);
 
@@ -47,12 +63,14 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
                struct gfs2_inode *ip);
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-                  struct gfs2_inode *ip);
+                  const struct gfs2_inode *ip);
 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
 int gfs2_glock_nq_atime(struct gfs2_holder *gh);
 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+void gfs2_dinode_print(const struct gfs2_inode *ip);
 
 #endif /* __INODE_DOT_H__ */
 
index c305255bfe8ade71b4e53b8d0b7c5389da0623a0..542a797ac89a453f8cf9d691ad56c8888ac092cb 100644 (file)
@@ -174,7 +174,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
        lp->cur = DLM_LOCK_IV;
        lp->lvb = NULL;
        lp->hold_null = NULL;
-       init_completion(&lp->ast_wait);
        INIT_LIST_HEAD(&lp->clist);
        INIT_LIST_HEAD(&lp->blist);
        INIT_LIST_HEAD(&lp->delay_list);
@@ -399,6 +398,12 @@ static void gdlm_del_lvb(struct gdlm_lock *lp)
        lp->lksb.sb_lvbptr = NULL;
 }
 
+static int gdlm_ast_wait(void *word)
+{
+       schedule();
+       return 0;
+}
+
 /* This can do a synchronous dlm request (requiring a lock_dlm thread to get
    the completion) because gfs won't call hold_lvb() during a callback (from
    the context of a lock_dlm thread). */
@@ -424,10 +429,10 @@ static int hold_null_lock(struct gdlm_lock *lp)
        lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
        set_bit(LFL_NOBAST, &lpn->flags);
        set_bit(LFL_INLOCK, &lpn->flags);
+       set_bit(LFL_AST_WAIT, &lpn->flags);
 
-       init_completion(&lpn->ast_wait);
        gdlm_do_lock(lpn);
-       wait_for_completion(&lpn->ast_wait);
+       wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
        error = lpn->lksb.sb_status;
        if (error) {
                printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
index d074c6e6f9bfc0cacbbaff393f9c4f6c4a4fd454..24d70f73b65124f3a6960099a82dcf5544b0416d 100644 (file)
@@ -101,6 +101,7 @@ enum {
        LFL_NOBAST              = 10,
        LFL_HEADQUE             = 11,
        LFL_UNLOCK_DELETE       = 12,
+       LFL_AST_WAIT            = 13,
 };
 
 struct gdlm_lock {
@@ -117,7 +118,6 @@ struct gdlm_lock {
        unsigned long           flags;          /* lock_dlm flags LFL_ */
 
        int                     bast_mode;      /* protected by async_lock */
-       struct completion       ast_wait;
 
        struct list_head        clist;          /* complete */
        struct list_head        blist;          /* blocking */
index 1d8faa3da8af1f984168de88796f89281c8773ea..41c5b04caaba4194712fe955a76750462066d4b2 100644 (file)
@@ -147,7 +147,7 @@ static int gdlm_mount(char *table_name, char *host_data,
 
        error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
                                  &ls->dlm_lockspace,
-                                 nodir ? DLM_LSFL_NODIR : 0,
+                                 DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
                                  GDLM_LVB_SIZE);
        if (error) {
                log_error("dlm_new_lockspace error %d", error);
index f82495e18c2d7b32686bbf611177396ff6b10da5..fba1f1d87e4fbe92cceb66fd44eb7ee40a8a6d84 100644 (file)
@@ -242,7 +242,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
        op->info.number         = name->ln_number;
        op->info.start          = fl->fl_start;
        op->info.end            = fl->fl_end;
-
+       op->info.owner          = (__u64)(long) fl->fl_owner;
 
        send_op(op);
        wait_event(recv_wq, (op->done != 0));
@@ -254,16 +254,20 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
        }
        spin_unlock(&ops_lock);
 
+       /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
+          -ENOENT if there are no locks on the file */
+
        rv = op->info.rv;
 
        fl->fl_type = F_UNLCK;
        if (rv == -ENOENT)
                rv = 0;
-       else if (rv == 0 && op->info.pid != fl->fl_pid) {
+       else if (rv > 0) {
                fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
                fl->fl_pid = op->info.pid;
                fl->fl_start = op->info.start;
                fl->fl_end = op->info.end;
+               rv = 0;
        }
 
        kfree(op);
index 9cf1f168eaf8b281eacf00778f2c1b3d36642edb..1aca51e45092e0eb6c37691ccfa6c0af3cf51682 100644 (file)
@@ -44,6 +44,13 @@ static void process_blocking(struct gdlm_lock *lp, int bast_mode)
        ls->fscb(ls->sdp, cb, &lp->lockname);
 }
 
+static void wake_up_ast(struct gdlm_lock *lp)
+{
+       clear_bit(LFL_AST_WAIT, &lp->flags);
+       smp_mb__after_clear_bit();
+       wake_up_bit(&lp->flags, LFL_AST_WAIT);
+}
+
 static void process_complete(struct gdlm_lock *lp)
 {
        struct gdlm_ls *ls = lp->ls;
@@ -136,7 +143,7 @@ static void process_complete(struct gdlm_lock *lp)
         */
 
        if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
-               complete(&lp->ast_wait);
+               wake_up_ast(lp);
                return;
        }
 
@@ -214,7 +221,7 @@ out:
        if (test_bit(LFL_INLOCK, &lp->flags)) {
                clear_bit(LFL_NOBLOCK, &lp->flags);
                lp->cur = lp->req;
-               complete(&lp->ast_wait);
+               wake_up_ast(lp);
                return;
        }
 
index 291415ddfe51cb291ccd66b23703e63a58b089d7..f49a12e24086d36e6975a733c3c1a8b9793c244d 100644 (file)
@@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
 
                        gfs2_assert(sdp, bd->bd_ail == ai);
 
+                       if (!bh){
+                               list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                                continue;
+                        }
+
                        if (!buffer_busy(bh)) {
                                if (!buffer_uptodate(bh)) {
                                        gfs2_log_unlock(sdp);
@@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
                                         bd_ail_st_list) {
                bh = bd->bd_bh;
 
+               if (!bh){
+                       list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                       continue;
+               }
+
                gfs2_assert(sdp, bd->bd_ail == ai);
 
                if (buffer_busy(bh)) {
@@ -262,8 +272,8 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
  * @sdp: The GFS2 superblock
  * @blks: The number of blocks to reserve
  *
- * Note that we never give out the last 6 blocks of the journal. Thats
- * due to the fact that there is are a small number of header blocks
+ * Note that we never give out the last few blocks of the journal. Thats
+ * due to the fact that there is a small number of header blocks
  * associated with each log flush. The exact number can't be known until
  * flush time, so we ensure that we have just enough free blocks at all
  * times to avoid running out during a log flush.
@@ -274,6 +284,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
        unsigned int try = 0;
+       unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
 
        if (gfs2_assert_warn(sdp, blks) ||
            gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@@ -281,7 +292,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 
        mutex_lock(&sdp->sd_log_reserve_mutex);
        gfs2_log_lock(sdp);
-       while(sdp->sd_log_blks_free <= (blks + 6)) {
+       while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
                gfs2_log_unlock(sdp);
                gfs2_ail1_empty(sdp, 0);
                gfs2_log_flush(sdp, NULL);
@@ -357,6 +368,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
        return dist;
 }
 
+/**
+ * calc_reserved - Calculate the number of blocks to reserve when
+ *                 refunding a transaction's unused buffers.
+ * @sdp: The GFS2 superblock
+ *
+ * This is complex.  We need to reserve room for all our currently used
+ * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
+ * all our journaled data buffers for journaled files (e.g. files in the 
+ * meta_fs like rindex, or files for which chattr +j was done.)
+ * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
+ * will count it as free space (sd_log_blks_free) and corruption will follow.
+ *
+ * We can have metadata bufs and jdata bufs in the same journal.  So each
+ * type gets its own log header, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one header 
+ * in cases where we have more buffers than will fit on a journal page.
+ * Metadata journal entries take up half the space of journaled buffer entries.
+ * Thus, metadata entries have buf_limit (502) and journaled buffers have
+ * databuf_limit (251) before they cause a wrap around.
+ *
+ * Also, we need to reserve blocks for revoke journal entries and one for an
+ * overall header for the lot.
+ *
+ * Returns: the number of blocks reserved
+ */
+static unsigned int calc_reserved(struct gfs2_sbd *sdp)
+{
+       unsigned int reserved = 0;
+       unsigned int mbuf_limit, metabufhdrs_needed;
+       unsigned int dbuf_limit, databufhdrs_needed;
+       unsigned int revokes = 0;
+
+       mbuf_limit = buf_limit(sdp);
+       metabufhdrs_needed = (sdp->sd_log_commited_buf +
+                             (mbuf_limit - 1)) / mbuf_limit;
+       dbuf_limit = databuf_limit(sdp);
+       databufhdrs_needed = (sdp->sd_log_commited_databuf +
+                             (dbuf_limit - 1)) / dbuf_limit;
+
+       if (sdp->sd_log_commited_revoke)
+               revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
+                                         sizeof(u64));
+
+       reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
+               sdp->sd_log_commited_databuf + databufhdrs_needed +
+               revokes;
+       /* One for the overall header */
+       if (reserved)
+               reserved++;
+       return reserved;
+}
+
 static unsigned int current_tail(struct gfs2_sbd *sdp)
 {
        struct gfs2_ail *ai;
@@ -447,14 +510,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
        return bh;
 }
 
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
+static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
        unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
 
        ail2_empty(sdp, new_tail);
 
        gfs2_log_lock(sdp);
-       sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
+       sdp->sd_log_blks_free += dist;
        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
        gfs2_log_unlock(sdp);
 
@@ -504,7 +567,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
        brelse(bh);
 
        if (sdp->sd_log_tail != tail)
-               log_pull_tail(sdp, tail, pull);
+               log_pull_tail(sdp, tail);
        else
                gfs2_assert_withdraw(sdp, !pull);
 
@@ -517,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
        struct list_head *head = &sdp->sd_log_flush_list;
        struct gfs2_log_buf *lb;
        struct buffer_head *bh;
+       int flushcount = 0;
 
        while (!list_empty(head)) {
                lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
@@ -533,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
                } else
                        brelse(bh);
                kfree(lb);
+               flushcount++;
        }
 
-       log_write_header(sdp, 0, 0);
+       /* If nothing was journaled, the header is unplanned and unwanted. */
+       if (flushcount) {
+               log_write_header(sdp, 0, 0);
+       } else {
+               unsigned int tail;
+               tail = current_tail(sdp);
+
+               gfs2_ail1_empty(sdp, 0);
+               if (sdp->sd_log_tail != tail)
+                       log_pull_tail(sdp, tail);
+       }
 }
 
 /**
@@ -565,7 +640,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
        INIT_LIST_HEAD(&ai->ai_ail1_list);
        INIT_LIST_HEAD(&ai->ai_ail2_list);
 
-       gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
+       gfs2_assert_withdraw(sdp,
+                            sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
+                            sdp->sd_log_commited_buf +
+                            sdp->sd_log_commited_databuf);
        gfs2_assert_withdraw(sdp,
                        sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
 
@@ -576,16 +654,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
        lops_before_commit(sdp);
        if (!list_empty(&sdp->sd_log_flush_list))
                log_flush_commit(sdp);
-       else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
+       else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+               gfs2_log_lock(sdp);
+               sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+               gfs2_log_unlock(sdp);
                log_write_header(sdp, 0, PULL);
+       }
        lops_after_commit(sdp, ai);
 
        gfs2_log_lock(sdp);
        sdp->sd_log_head = sdp->sd_log_flush_head;
-       sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
        sdp->sd_log_blks_reserved = 0;
        sdp->sd_log_commited_buf = 0;
-       sdp->sd_log_num_hdrs = 0;
+       sdp->sd_log_commited_databuf = 0;
        sdp->sd_log_commited_revoke = 0;
 
        if (!list_empty(&ai->ai_ail1_list)) {
@@ -602,32 +683,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 
 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
-       unsigned int reserved = 0;
+       unsigned int reserved;
        unsigned int old;
 
        gfs2_log_lock(sdp);
 
        sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
-       gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
+       sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
+               tr->tr_num_databuf_rm;
+       gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
+                            (((int)sdp->sd_log_commited_databuf) >= 0));
        sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
        gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
-
-       if (sdp->sd_log_commited_buf)
-               reserved += sdp->sd_log_commited_buf;
-       if (sdp->sd_log_commited_revoke)
-               reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
-                                           sizeof(u64));
-       if (reserved)
-               reserved++;
-
+       reserved = calc_reserved(sdp);
        old = sdp->sd_log_blks_free;
        sdp->sd_log_blks_free += tr->tr_reserved -
                                 (reserved - sdp->sd_log_blks_reserved);
 
        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
-       gfs2_assert_withdraw(sdp,
-                            sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
-                            sdp->sd_log_num_hdrs);
+       gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
+                            sdp->sd_jdesc->jd_blocks);
 
        sdp->sd_log_blks_reserved = reserved;
 
@@ -673,13 +748,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
-       gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
        gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
 
        sdp->sd_log_flush_head = sdp->sd_log_head;
        sdp->sd_log_flush_wrapped = 0;
 
-       log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
+       log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
+                        (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
 
        gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
        gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
index f82d84d05d233d99806b21c3eb93550759ddbac5..aff70f0698fdd887799e3d23a0057ea715714112 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "gfs2.h"
 #include "incore.h"
+#include "inode.h"
 #include "glock.h"
 #include "log.h"
 #include "lops.h"
@@ -117,15 +118,13 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
        struct gfs2_log_descriptor *ld;
        struct gfs2_bufdata *bd1 = NULL, *bd2;
        unsigned int total = sdp->sd_log_num_buf;
-       unsigned int offset = sizeof(struct gfs2_log_descriptor);
+       unsigned int offset = BUF_OFFSET;
        unsigned int limit;
        unsigned int num;
        unsigned n;
        __be64 *ptr;
 
-       offset += sizeof(__be64) - 1;
-       offset &= ~(sizeof(__be64) - 1);
-       limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
+       limit = buf_limit(sdp);
        /* for 4k blocks, limit = 503 */
 
        bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
@@ -134,7 +133,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
                if (total > limit)
                        num = limit;
                bh = gfs2_log_get_buf(sdp);
-               sdp->sd_log_num_hdrs++;
                ld = (struct gfs2_log_descriptor *)bh->b_data;
                ptr = (__be64 *)(bh->b_data + offset);
                ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -469,25 +467,28 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
        struct gfs2_inode *ip = GFS2_I(mapping->host);
 
        gfs2_log_lock(sdp);
+       if (!list_empty(&bd->bd_list_tr)) {
+               gfs2_log_unlock(sdp);
+               return;
+       }
        tr->tr_touched = 1;
-       if (list_empty(&bd->bd_list_tr) &&
-           (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
+       if (gfs2_is_jdata(ip)) {
                tr->tr_num_buf++;
                list_add(&bd->bd_list_tr, &tr->tr_list_buf);
-               gfs2_log_unlock(sdp);
-               gfs2_pin(sdp, bd->bd_bh);
-               tr->tr_num_buf_new++;
-       } else {
-               gfs2_log_unlock(sdp);
        }
+       gfs2_log_unlock(sdp);
+       if (!list_empty(&le->le_list))
+               return;
+
        gfs2_trans_add_gl(bd->bd_gl);
-       gfs2_log_lock(sdp);
-       if (list_empty(&le->le_list)) {
-               if (ip->i_di.di_flags & GFS2_DIF_JDATA)
-                       sdp->sd_log_num_jdata++;
-               sdp->sd_log_num_databuf++;
-               list_add(&le->le_list, &sdp->sd_log_le_databuf);
+       if (gfs2_is_jdata(ip)) {
+               sdp->sd_log_num_jdata++;
+               gfs2_pin(sdp, bd->bd_bh);
+               tr->tr_num_databuf_new++;
        }
+       sdp->sd_log_num_databuf++;
+       gfs2_log_lock(sdp);
+       list_add(&le->le_list, &sdp->sd_log_le_databuf);
        gfs2_log_unlock(sdp);
 }
 
@@ -520,7 +521,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
        LIST_HEAD(started);
        struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
        struct buffer_head *bh = NULL,*bh1 = NULL;
-       unsigned int offset = sizeof(struct gfs2_log_descriptor);
        struct gfs2_log_descriptor *ld;
        unsigned int limit;
        unsigned int total_dbuf = sdp->sd_log_num_databuf;
@@ -528,9 +528,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
        unsigned int num, n;
        __be64 *ptr = NULL;
 
-       offset += 2*sizeof(__be64) - 1;
-       offset &= ~(2*sizeof(__be64) - 1);
-       limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
+       limit = databuf_limit(sdp);
 
        /*
         * Start writing ordered buffers, write journaled buffers
@@ -581,10 +579,10 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                                gfs2_log_unlock(sdp);
                                if (!bh) {
                                        bh = gfs2_log_get_buf(sdp);
-                                       sdp->sd_log_num_hdrs++;
                                        ld = (struct gfs2_log_descriptor *)
                                             bh->b_data;
-                                       ptr = (__be64 *)(bh->b_data + offset);
+                                       ptr = (__be64 *)(bh->b_data +
+                                                        DATABUF_OFFSET);
                                        ld->ld_header.mh_magic =
                                                cpu_to_be32(GFS2_MAGIC);
                                        ld->ld_header.mh_type =
@@ -605,7 +603,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                                if (unlikely(magic != 0))
                                        set_buffer_escaped(bh1);
                                gfs2_log_lock(sdp);
-                               if (n++ > num)
+                               if (++n >= num)
                                        break;
                        } else if (!bh1) {
                                total_dbuf--;
@@ -622,6 +620,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                }
                gfs2_log_unlock(sdp);
                if (bh) {
+                       set_buffer_mapped(bh);
                        set_buffer_dirty(bh);
                        ll_rw_block(WRITE, 1, &bh);
                        bh = NULL;
index 965bc65c7c6432b9f47f3937d4dbd27f5b4c9446..41a00df755879dea4018ae332f40a45155bc9fb6 100644 (file)
 #include <linux/list.h>
 #include "incore.h"
 
+#define BUF_OFFSET \
+       ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
+        ~(sizeof(__be64) - 1))
+#define DATABUF_OFFSET \
+       ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
+        ~(2 * sizeof(__be64) - 1))
+
 extern const struct gfs2_log_operations gfs2_glock_lops;
 extern const struct gfs2_log_operations gfs2_buf_lops;
 extern const struct gfs2_log_operations gfs2_revoke_lops;
@@ -21,6 +28,22 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
 
 extern const struct gfs2_log_operations *gfs2_log_ops[];
 
+static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
+{
+       unsigned int limit;
+
+       limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
+       return limit;
+}
+
+static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
+{
+       unsigned int limit;
+
+       limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
+       return limit;
+}
+
 static inline void lops_init_le(struct gfs2_log_element *le,
                                const struct gfs2_log_operations *lops)
 {
index e62d4f620c5841c73b177f2022324b09442e1a13..8da343b34ae733de759c09cefa64e84b71a887c7 100644 (file)
@@ -387,12 +387,18 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
 
                        if (test_clear_buffer_pinned(bh)) {
                                struct gfs2_trans *tr = current->journal_info;
+                               struct gfs2_inode *bh_ip =
+                                       GFS2_I(bh->b_page->mapping->host);
+
                                gfs2_log_lock(sdp);
                                list_del_init(&bd->bd_le.le_list);
                                gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
                                sdp->sd_log_num_buf--;
                                gfs2_log_unlock(sdp);
-                               tr->tr_num_buf_rm++;
+                               if (bh_ip->i_inode.i_private != NULL)
+                                       tr->tr_num_databuf_rm++;
+                               else
+                                       tr->tr_num_buf_rm++;
                                brelse(bh);
                        }
                        if (bd) {
index e037425bc0427e6cff31c6bda1bae2daa16cf626..527bf19d9690f5c58203e1ca1c3fbb0b1e5fa8ae 100644 (file)
@@ -63,7 +63,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
                                         struct buffer_head **bhp)
 {
-       return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
+       return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
 }
 
 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
index 4864659555d4f94d49132a2727a4acd0b5e1d5e4..6f006a804db33c48310c5af608ed4aa53805625c 100644 (file)
@@ -82,20 +82,19 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
        char *options, *o, *v;
        int error = 0;
 
-       if (!remount) {
-               /*  If someone preloaded options, use those instead  */
-               spin_lock(&gfs2_sys_margs_lock);
-               if (gfs2_sys_margs) {
-                       data = gfs2_sys_margs;
-                       gfs2_sys_margs = NULL;
-               }
-               spin_unlock(&gfs2_sys_margs_lock);
-
-               /*  Set some defaults  */
-               args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
-               args->ar_quota = GFS2_QUOTA_DEFAULT;
-               args->ar_data = GFS2_DATA_DEFAULT;
+       /*  If someone preloaded options, use those instead  */
+       spin_lock(&gfs2_sys_margs_lock);
+       if (!remount && gfs2_sys_margs) {
+               data = gfs2_sys_margs;
+               gfs2_sys_margs = NULL;
        }
+       spin_unlock(&gfs2_sys_margs_lock);
+
+       /*  Set some defaults  */
+       memset(args, 0, sizeof(struct gfs2_args));
+       args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
+       args->ar_quota = GFS2_QUOTA_DEFAULT;
+       args->ar_data = GFS2_DATA_DEFAULT;
 
        /* Split the options into tokens with the "," character and
           process them */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
deleted file mode 100644 (file)
index d9ecfd2..0000000
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-
-#include "gfs2.h"
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-#include "incore.h"
-
-#define pv(struct, member, fmt) printk(KERN_INFO "  "#member" = "fmt"\n", \
-                                      struct->member);
-
-/*
- * gfs2_xxx_in - read in an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_out - write out an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_print - print out an xxx struct
- * first arg: the cpu-order structure
- */
-
-void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
-{
-       const struct gfs2_inum *str = buf;
-
-       no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
-       no->no_addr = be64_to_cpu(str->no_addr);
-}
-
-void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
-{
-       struct gfs2_inum *str = buf;
-
-       str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
-       str->no_addr = cpu_to_be64(no->no_addr);
-}
-
-static void gfs2_inum_print(const struct gfs2_inum_host *no)
-{
-       printk(KERN_INFO "  no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
-       printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
-}
-
-static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
-{
-       const struct gfs2_meta_header *str = buf;
-
-       mh->mh_magic = be32_to_cpu(str->mh_magic);
-       mh->mh_type = be32_to_cpu(str->mh_type);
-       mh->mh_format = be32_to_cpu(str->mh_format);
-}
-
-void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
-{
-       const struct gfs2_sb *str = buf;
-
-       gfs2_meta_header_in(&sb->sb_header, buf);
-
-       sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
-       sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
-       sb->sb_bsize = be32_to_cpu(str->sb_bsize);
-       sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
-
-       gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
-       gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
-
-       memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
-       memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-}
-
-void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
-{
-       const struct gfs2_rindex *str = buf;
-
-       ri->ri_addr = be64_to_cpu(str->ri_addr);
-       ri->ri_length = be32_to_cpu(str->ri_length);
-       ri->ri_data0 = be64_to_cpu(str->ri_data0);
-       ri->ri_data = be32_to_cpu(str->ri_data);
-       ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
-
-}
-
-void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
-{
-       printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
-       pv(ri, ri_length, "%u");
-
-       printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
-       pv(ri, ri_data, "%u");
-
-       pv(ri, ri_bitbytes, "%u");
-}
-
-void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
-{
-       const struct gfs2_rgrp *str = buf;
-
-       rg->rg_flags = be32_to_cpu(str->rg_flags);
-       rg->rg_free = be32_to_cpu(str->rg_free);
-       rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
-       rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
-}
-
-void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
-{
-       struct gfs2_rgrp *str = buf;
-
-       str->rg_flags = cpu_to_be32(rg->rg_flags);
-       str->rg_free = cpu_to_be32(rg->rg_free);
-       str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
-       str->__pad = cpu_to_be32(0);
-       str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
-       memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
-}
-
-void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
-{
-       const struct gfs2_quota *str = buf;
-
-       qu->qu_limit = be64_to_cpu(str->qu_limit);
-       qu->qu_warn = be64_to_cpu(str->qu_warn);
-       qu->qu_value = be64_to_cpu(str->qu_value);
-}
-
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-{
-       const struct gfs2_dinode_host *di = &ip->i_di;
-       struct gfs2_dinode *str = buf;
-
-       str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-       str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
-       str->di_header.__pad0 = 0;
-       str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-       str->di_header.__pad1 = 0;
-
-       gfs2_inum_out(&ip->i_num, &str->di_num);
-
-       str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-       str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
-       str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-       str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-       str->di_size = cpu_to_be64(di->di_size);
-       str->di_blocks = cpu_to_be64(di->di_blocks);
-       str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-       str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-       str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
-
-       str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
-       str->di_goal_data = cpu_to_be64(di->di_goal_data);
-       str->di_generation = cpu_to_be64(di->di_generation);
-
-       str->di_flags = cpu_to_be32(di->di_flags);
-       str->di_height = cpu_to_be16(di->di_height);
-       str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
-                                            !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
-                                            GFS2_FORMAT_DE : 0);
-       str->di_depth = cpu_to_be16(di->di_depth);
-       str->di_entries = cpu_to_be32(di->di_entries);
-
-       str->di_eattr = cpu_to_be64(di->di_eattr);
-}
-
-void gfs2_dinode_print(const struct gfs2_inode *ip)
-{
-       const struct gfs2_dinode_host *di = &ip->i_di;
-
-       gfs2_inum_print(&ip->i_num);
-
-       printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
-       printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
-       printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
-       printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
-
-       pv(di, di_flags, "0x%.8X");
-       pv(di, di_height, "%u");
-
-       pv(di, di_depth, "%u");
-       pv(di, di_entries, "%u");
-
-       printk(KERN_INFO "  di_eattr = %llu\n", (unsigned long long)di->di_eattr);
-}
-
-void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
-{
-       const struct gfs2_log_header *str = buf;
-
-       gfs2_meta_header_in(&lh->lh_header, buf);
-       lh->lh_sequence = be64_to_cpu(str->lh_sequence);
-       lh->lh_flags = be32_to_cpu(str->lh_flags);
-       lh->lh_tail = be32_to_cpu(str->lh_tail);
-       lh->lh_blkno = be32_to_cpu(str->lh_blkno);
-       lh->lh_hash = be32_to_cpu(str->lh_hash);
-}
-
-void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
-{
-       const struct gfs2_inum_range *str = buf;
-
-       ir->ir_start = be64_to_cpu(str->ir_start);
-       ir->ir_length = be64_to_cpu(str->ir_length);
-}
-
-void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
-{
-       struct gfs2_inum_range *str = buf;
-
-       str->ir_start = cpu_to_be64(ir->ir_start);
-       str->ir_length = cpu_to_be64(ir->ir_length);
-}
-
-void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
-{
-       const struct gfs2_statfs_change *str = buf;
-
-       sc->sc_total = be64_to_cpu(str->sc_total);
-       sc->sc_free = be64_to_cpu(str->sc_free);
-       sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
-}
-
-void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
-{
-       struct gfs2_statfs_change *str = buf;
-
-       str->sc_total = cpu_to_be64(sc->sc_total);
-       str->sc_free = cpu_to_be64(sc->sc_free);
-       str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
-}
-
-void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
-{
-       const struct gfs2_quota_change *str = buf;
-
-       qc->qc_change = be64_to_cpu(str->qc_change);
-       qc->qc_flags = be32_to_cpu(str->qc_flags);
-       qc->qc_id = be32_to_cpu(str->qc_id);
-}
-
index 30c15622174fdbc59f3b94ab8cec331001b2a943..26c888890c245bf562931a2ddd54619ac582d36c 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,6 +32,7 @@
 #include "trans.h"
 #include "rgrp.h"
 #include "ops_file.h"
+#include "super.h"
 #include "util.h"
 #include "glops.h"
 
@@ -49,6 +50,8 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
                end = start + bsize;
                if (end <= from || start >= to)
                        continue;
+               if (gfs2_is_jdata(ip))
+                       set_buffer_uptodate(bh);
                gfs2_trans_add_bh(ip->i_gl, bh, 0);
        }
 }
@@ -134,7 +137,9 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
                return 0; /* don't care */
        }
 
-       if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+       if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
+           PageChecked(page)) {
+               ClearPageChecked(page);
                error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
                if (error)
                        goto out_ignore;
@@ -203,11 +208,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
         * so we need to supply one here. It doesn't happen often.
         */
        if (unlikely(page->index)) {
-               kaddr = kmap_atomic(page, KM_USER0);
-               memset(kaddr, 0, PAGE_CACHE_SIZE);
-               kunmap_atomic(kaddr, KM_USER0);
-               flush_dcache_page(page);
-               SetPageUptodate(page);
+               zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
                return 0;
        }
 
@@ -449,6 +450,31 @@ out_uninit:
        return error;
 }
 
+/**
+ * adjust_fs_space - Adjusts the free space available due to gfs2_grow
+ * @inode: the rindex inode
+ */
+static void adjust_fs_space(struct inode *inode)
+{
+       struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+       struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+       struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+       u64 fs_total, new_free;
+
+       /* Total up the file system space, according to the latest rindex. */
+       fs_total = gfs2_ri_total(sdp);
+
+       spin_lock(&sdp->sd_statfs_spin);
+       if (fs_total > (m_sc->sc_total + l_sc->sc_total))
+               new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
+       else
+               new_free = 0;
+       spin_unlock(&sdp->sd_statfs_spin);
+       fs_warn(sdp, "File system extended by %llu blocks.\n",
+               (unsigned long long)new_free);
+       gfs2_statfs_change(sdp, new_free, new_free, 0);
+}
+
 /**
  * gfs2_commit_write - Commit write to a file
  * @file: The file to write to
@@ -511,6 +537,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
                di->di_size = cpu_to_be64(inode->i_size);
        }
 
+       if (inode == sdp->sd_rindex)
+               adjust_fs_space(inode);
+
        brelse(dibh);
        gfs2_trans_end(sdp);
        if (al->al_requested) {
@@ -542,6 +571,23 @@ fail_nounlock:
        return error;
 }
 
+/**
+ * gfs2_set_page_dirty - Page dirtying function
+ * @page: The page to dirty
+ *
+ * Returns: 1 if it dirtyed the page, or 0 otherwise
+ */
+static int gfs2_set_page_dirty(struct page *page)
+{
+       struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+       struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+
+       if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+               SetPageChecked(page);
+       return __set_page_dirty_buffers(page);
+}
+
 /**
  * gfs2_bmap - Block map function
  * @mapping: Address space info
@@ -578,6 +624,8 @@ static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
        if (bd) {
                bd->bd_bh = NULL;
                bh->b_private = NULL;
+               if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
+                       kmem_cache_free(gfs2_bufdata_cachep, bd);
        }
        gfs2_log_unlock(sdp);
 
@@ -598,6 +646,8 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
        unsigned int curr_off = 0;
 
        BUG_ON(!PageLocked(page));
+       if (offset == 0)
+               ClearPageChecked(page);
        if (!page_has_buffers(page))
                return;
 
@@ -728,8 +778,8 @@ static unsigned limit = 0;
                        return;
 
                fs_warn(sdp, "ip = %llu %llu\n",
-                       (unsigned long long)ip->i_num.no_formal_ino,
-                       (unsigned long long)ip->i_num.no_addr);
+                       (unsigned long long)ip->i_no_formal_ino,
+                       (unsigned long long)ip->i_no_addr);
 
                for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
                        fs_warn(sdp, "ip->i_cache[%u] = %s\n",
@@ -810,6 +860,7 @@ const struct address_space_operations gfs2_file_aops = {
        .sync_page = block_sync_page,
        .prepare_write = gfs2_prepare_write,
        .commit_write = gfs2_commit_write,
+       .set_page_dirty = gfs2_set_page_dirty,
        .bmap = gfs2_bmap,
        .invalidatepage = gfs2_invalidatepage,
        .releasepage = gfs2_releasepage,
index 35aaee4aa7e1323aea5824948c6ee99529ac2184..fa1b5b3d28b99b50c08d428de760dc6c260b2098 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
index a6fdc52f554a43d5d195a8282319028d63d54873..793e334d098e19518957371c116f8d9c6f75178d 100644 (file)
@@ -21,6 +21,7 @@
 #include "glock.h"
 #include "ops_dentry.h"
 #include "util.h"
+#include "inode.h"
 
 /**
  * gfs2_drevalidate - Check directory lookup consistency
@@ -40,14 +41,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
        struct gfs2_inode *dip = GFS2_I(parent->d_inode);
        struct inode *inode = dentry->d_inode;
        struct gfs2_holder d_gh;
-       struct gfs2_inode *ip;
-       struct gfs2_inum_host inum;
-       unsigned int type;
+       struct gfs2_inode *ip = NULL;
        int error;
        int had_lock=0;
 
-       if (inode && is_bad_inode(inode))
-               goto invalid;
+       if (inode) {
+               if (is_bad_inode(inode))
+                       goto invalid;
+               ip = GFS2_I(inode);
+       }
 
        if (sdp->sd_args.ar_localcaching)
                goto valid;
@@ -59,7 +61,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
                        goto fail;
        } 
 
-       error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
+       error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
        switch (error) {
        case 0:
                if (!inode)
@@ -73,16 +75,6 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
                goto fail_gunlock;
        }
 
-       ip = GFS2_I(inode);
-
-       if (!gfs2_inum_equal(&ip->i_num, &inum))
-               goto invalid_gunlock;
-
-       if (IF2DT(ip->i_inode.i_mode) != type) {
-               gfs2_consist_inode(dip);
-               goto fail_gunlock;
-       }
-
 valid_gunlock:
        if (!had_lock)
                gfs2_glock_dq_uninit(&d_gh);
index aad918337a469136778f32e5367c3fb586da82b2..99ea5659bc2c58a5a41f482043ef25db8b6284ea 100644 (file)
 #include "glops.h"
 #include "inode.h"
 #include "ops_dentry.h"
-#include "ops_export.h"
+#include "ops_fstype.h"
 #include "rgrp.h"
 #include "util.h"
 
+#define GFS2_SMALL_FH_SIZE 4
+#define GFS2_LARGE_FH_SIZE 8
+#define GFS2_OLD_FH_SIZE 10
+
 static struct dentry *gfs2_decode_fh(struct super_block *sb,
                                     __u32 *p,
                                     int fh_len,
@@ -35,31 +39,28 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
                                     void *context)
 {
        __be32 *fh = (__force __be32 *)p;
-       struct gfs2_fh_obj fh_obj;
-       struct gfs2_inum_host *this, parent;
+       struct gfs2_inum_host inum, parent;
 
-       this            = &fh_obj.this;
-       fh_obj.imode    = DT_UNKNOWN;
        memset(&parent, 0, sizeof(struct gfs2_inum));
 
        switch (fh_len) {
        case GFS2_LARGE_FH_SIZE:
+       case GFS2_OLD_FH_SIZE:
                parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
                parent.no_formal_ino |= be32_to_cpu(fh[5]);
                parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
                parent.no_addr |= be32_to_cpu(fh[7]);
-               fh_obj.imode = be32_to_cpu(fh[8]);
        case GFS2_SMALL_FH_SIZE:
-               this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
-               this->no_formal_ino |= be32_to_cpu(fh[1]);
-               this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
-               this->no_addr |= be32_to_cpu(fh[3]);
+               inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
+               inum.no_formal_ino |= be32_to_cpu(fh[1]);
+               inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
+               inum.no_addr |= be32_to_cpu(fh[3]);
                break;
        default:
                return NULL;
        }
 
-       return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
+       return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent,
                                                    acceptable, context);
 }
 
@@ -75,10 +76,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
            (connectable && *len < GFS2_LARGE_FH_SIZE))
                return 255;
 
-       fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
-       fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
-       fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
-       fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
+       fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+       fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+       fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
+       fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
        *len = GFS2_SMALL_FH_SIZE;
 
        if (!connectable || inode == sb->s_root->d_inode)
@@ -90,13 +91,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
        igrab(inode);
        spin_unlock(&dentry->d_lock);
 
-       fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
-       fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
-       fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
-       fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
-
-       fh[8]  = cpu_to_be32(inode->i_mode);
-       fh[9]  = 0;     /* pad to double word */
+       fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
+       fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
+       fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
+       fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
        *len = GFS2_LARGE_FH_SIZE;
 
        iput(inode);
@@ -144,7 +142,8 @@ static int gfs2_get_name(struct dentry *parent, char *name,
        ip = GFS2_I(inode);
 
        *name = 0;
-       gnfd.inum = ip->i_num;
+       gnfd.inum.no_addr = ip->i_no_addr;
+       gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
        gnfd.name = name;
 
        error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -192,8 +191,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
 static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
-       struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
-       struct gfs2_inum_host *inum = &fh_obj->this;
+       struct gfs2_inum_host *inum = inum_obj;
        struct gfs2_holder i_gh, ri_gh, rgd_gh;
        struct gfs2_rgrpd *rgd;
        struct inode *inode;
@@ -202,9 +200,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 
        /* System files? */
 
-       inode = gfs2_ilookup(sb, inum);
+       inode = gfs2_ilookup(sb, inum->no_addr);
        if (inode) {
-               if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
+               if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
                        iput(inode);
                        return ERR_PTR(-ESTALE);
                }
@@ -236,7 +234,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
        gfs2_glock_dq_uninit(&rgd_gh);
        gfs2_glock_dq_uninit(&ri_gh);
 
-       inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
+       inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
+                                       inum->no_addr,
+                                       0);
        if (!inode)
                goto fail;
        if (IS_ERR(inode)) {
@@ -250,6 +250,15 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
                goto fail;
        }
 
+       /* Pick up the works we bypass in gfs2_inode_lookup */
+       if (inode->i_state & I_NEW) 
+               gfs2_set_iop(inode);
+
+       if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
+               iput(inode);
+               goto fail;
+       }
+
        error = -EIO;
        if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
                iput(inode);
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
deleted file mode 100644 (file)
index f925a95..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_EXPORT_DOT_H__
-#define __OPS_EXPORT_DOT_H__
-
-#define GFS2_SMALL_FH_SIZE 4
-#define GFS2_LARGE_FH_SIZE 10
-
-extern struct export_operations gfs2_export_ops;
-struct gfs2_fh_obj {
-       struct gfs2_inum_host this;
-       __u32            imode;
-};
-
-#endif /* __OPS_EXPORT_DOT_H__ */
index 7dc3be10820412f338846f3c08e4408113b9506b..196d83266e34348a66ce2042449a803568d7c07b 100644 (file)
@@ -502,7 +502,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
        struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
        struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
        struct lm_lockname name =
-               { .ln_number = ip->i_num.no_addr,
+               { .ln_number = ip->i_no_addr,
                  .ln_type = LM_TYPE_PLOCK };
 
        if (!(fl->fl_flags & FL_POSIX))
@@ -557,7 +557,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
                gfs2_glock_dq_uninit(fl_gh);
        } else {
                error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
-                                     ip->i_num.no_addr, &gfs2_flock_glops,
+                                     ip->i_no_addr, &gfs2_flock_glops,
                                      CREATE, &gl);
                if (error)
                        goto out;
index 2c5f8e7def0dc21413ccd5e1fb2e38939d36d907..cf5aa50505488d95a62719f9728660e742728575 100644 (file)
@@ -27,7 +27,6 @@
 #include "inode.h"
 #include "lm.h"
 #include "mount.h"
-#include "ops_export.h"
 #include "ops_fstype.h"
 #include "ops_super.h"
 #include "recovery.h"
@@ -105,6 +104,7 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
        sb->s_magic = GFS2_MAGIC;
        sb->s_op = &gfs2_super_ops;
        sb->s_export_op = &gfs2_export_ops;
+       sb->s_time_gran = 1;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
 
        if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
@@ -116,7 +116,6 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
 
 static int init_names(struct gfs2_sbd *sdp, int silent)
 {
-       struct page *page;
        char *proto, *table;
        int error = 0;
 
@@ -126,14 +125,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
        /*  Try to autodetect  */
 
        if (!proto[0] || !table[0]) {
-               struct gfs2_sb *sb;
-               page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-               if (!page)
-                       return -ENOBUFS;
-               sb = kmap(page);
-               gfs2_sb_in(&sdp->sd_sb, sb);
-               kunmap(page);
-               __free_page(page);
+               error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+               if (error)
+                       return error;
 
                error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
                if (error)
@@ -151,6 +145,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
        snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
        snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
 
+       while ((table = strchr(sdp->sd_table_name, '/')))
+               *table = '_';
+
 out:
        return error;
 }
@@ -236,17 +233,17 @@ fail:
        return error;
 }
 
-static struct inode *gfs2_lookup_root(struct super_block *sb,
-                                     struct gfs2_inum_host *inum)
+static inline struct inode *gfs2_lookup_root(struct super_block *sb,
+                                            u64 no_addr)
 {
-       return gfs2_inode_lookup(sb, inum, DT_DIR);
+       return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
 }
 
 static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 {
        struct super_block *sb = sdp->sd_vfs;
        struct gfs2_holder sb_gh;
-       struct gfs2_inum_host *inum;
+       u64 no_addr;
        struct inode *inode;
        int error = 0;
 
@@ -289,10 +286,10 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
        sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
 
        /* Get the root inode */
-       inum = &sdp->sd_sb.sb_root_dir;
+       no_addr = sdp->sd_sb.sb_root_dir.no_addr;
        if (sb->s_type == &gfs2meta_fs_type)
-               inum = &sdp->sd_sb.sb_master_dir;
-       inode = gfs2_lookup_root(sb, inum);
+               no_addr = sdp->sd_sb.sb_master_dir.no_addr;
+       inode = gfs2_lookup_root(sb, no_addr);
        if (IS_ERR(inode)) {
                error = PTR_ERR(inode);
                fs_err(sdp, "can't read in root inode: %d\n", error);
@@ -449,7 +446,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
        if (undo)
                goto fail_qinode;
 
-       inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
+       inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
        if (IS_ERR(inode)) {
                error = PTR_ERR(inode);
                fs_err(sdp, "can't read in master directory: %d\n", error);
index 7cc2c296271be2d7930879a981013c85c90c49a9..407029b3b2b3857b6564126fe191d1bc96094c67 100644 (file)
@@ -14,5 +14,6 @@
 
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
+extern struct export_operations gfs2_export_ops;
 
 #endif /* __OPS_FSTYPE_DOT_H__ */
index d85f6e05cb955d99cebcad6aa3bd660871cef49f..911c115b5c6c29ecf16aabe087c2be507bb58f71 100644 (file)
@@ -157,7 +157,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        if (error)
                goto out_gunlock;
 
-       error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
+       error = gfs2_dir_check(dir, &dentry->d_name, NULL);
        switch (error) {
        case -ENOENT:
                break;
@@ -206,7 +206,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
                        goto out_gunlock_q;
 
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                        al->al_rgd->rd_ri.ri_length +
+                                        al->al_rgd->rd_length +
                                         2 * RES_DINODE + RES_STATFS +
                                         RES_QUOTA, 0);
                if (error)
@@ -217,8 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
                        goto out_ipres;
        }
 
-       error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
-                            IF2DT(inode->i_mode));
+       error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
        if (error)
                goto out_end_trans;
 
@@ -275,7 +274,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
 
-       rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+       rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
 
 
@@ -420,7 +419,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
                gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
 
-               gfs2_inum_out(&dip->i_num, &dent->de_inum);
+               gfs2_inum_out(dip, dent);
                dent->de_type = cpu_to_be16(DT_DIR);
 
                gfs2_dinode_out(ip, di);
@@ -472,7 +471,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
 
-       rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+       rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
 
        error = gfs2_glock_nq_m(3, ghs);
@@ -614,7 +613,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                 * this is the case of the target file already existing
                 * so we unlink before doing the rename
                 */
-               nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+               nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
                if (nrgd)
                        gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
        }
@@ -653,7 +652,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_gunlock;
 
-               error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
+               error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
                switch (error) {
                case -ENOENT:
                        error = 0;
@@ -712,7 +711,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                        goto out_gunlock_q;
 
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                        al->al_rgd->rd_ri.ri_length +
+                                        al->al_rgd->rd_length +
                                         4 * RES_DINODE + 4 * RES_LEAF +
                                         RES_STATFS + RES_QUOTA + 4, 0);
                if (error)
@@ -750,7 +749,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_end_trans;
 
-               error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
+               error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
                if (error)
                        goto out_end_trans;
        } else {
@@ -758,7 +757,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                error = gfs2_meta_inode_buffer(ip, &dibh);
                if (error)
                        goto out_end_trans;
-               ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+               ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -768,8 +767,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        if (error)
                goto out_end_trans;
 
-       error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
-                            IF2DT(ip->i_inode.i_mode));
+       error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
        if (error)
                goto out_end_trans;
 
@@ -905,8 +903,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
        }
 
        error = gfs2_truncatei(ip, attr->ia_size);
-       if (error)
-               return error;
+       if (error && (inode->i_size != ip->i_di.di_size))
+               i_size_write(inode, ip->i_di.di_size);
 
        return error;
 }
index 485ce3d499230c08b7e741e338e569f454dea88c..603d940f1159c2c6612d4e2c4d65cc08a946d327 100644 (file)
@@ -326,8 +326,10 @@ static void gfs2_clear_inode(struct inode *inode)
                gfs2_glock_schedule_for_reclaim(ip->i_gl);
                gfs2_glock_put(ip->i_gl);
                ip->i_gl = NULL;
-               if (ip->i_iopen_gh.gh_gl)
+               if (ip->i_iopen_gh.gh_gl) {
+                       ip->i_iopen_gh.gh_gl->gl_object = NULL;
                        gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+               }
        }
 }
 
@@ -422,13 +424,13 @@ static void gfs2_delete_inode(struct inode *inode)
        if (!inode->i_private)
                goto out;
 
-       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
+       error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        if (unlikely(error)) {
                gfs2_glock_dq_uninit(&ip->i_iopen_gh);
                goto out;
        }
 
-       gfs2_glock_dq(&ip->i_iopen_gh);
+       gfs2_glock_dq_wait(&ip->i_iopen_gh);
        gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
        error = gfs2_glock_nq(&ip->i_iopen_gh);
        if (error)
index aa0dbd2aac1bd31bdd3cb4ebf8265fe9b874d47d..404b7cc9f8c4887803d10437c91783a18e655fa0 100644 (file)
@@ -66,7 +66,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
        if (error)
                goto out_gunlock_q;
 
-       error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
+       error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
                                 ind_blocks + RES_DINODE +
                                 RES_STATFS + RES_QUOTA, 0);
        if (error)
index c186857e48a80a49433b81fe2f11986b58c188f7..6e546ee8f3d4d237e01f44aac67db0a35dc0f296 100644 (file)
 #define QUOTA_USER 1
 #define QUOTA_GROUP 0
 
+struct gfs2_quota_host {
+       u64 qu_limit;
+       u64 qu_warn;
+       s64 qu_value;
+};
+
+struct gfs2_quota_change_host {
+       u64 qc_change;
+       u32 qc_flags; /* GFS2_QCF_... */
+       u32 qc_id;
+};
+
 static u64 qd2offset(struct gfs2_quota_data *qd)
 {
        u64 offset;
@@ -561,6 +573,25 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
        mutex_unlock(&sdp->sd_quota_mutex);
 }
 
+static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
+{
+       const struct gfs2_quota *str = buf;
+
+       qu->qu_limit = be64_to_cpu(str->qu_limit);
+       qu->qu_warn = be64_to_cpu(str->qu_warn);
+       qu->qu_value = be64_to_cpu(str->qu_value);
+}
+
+static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
+{
+       struct gfs2_quota *str = buf;
+
+       str->qu_limit = cpu_to_be64(qu->qu_limit);
+       str->qu_warn = cpu_to_be64(qu->qu_warn);
+       str->qu_value = cpu_to_be64(qu->qu_value);
+       memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
+}
+
 /**
  * gfs2_adjust_quota
  *
@@ -573,12 +604,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
        struct inode *inode = &ip->i_inode;
        struct address_space *mapping = inode->i_mapping;
        unsigned long index = loc >> PAGE_CACHE_SHIFT;
-       unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+       unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
        unsigned blocksize, iblock, pos;
        struct buffer_head *bh;
        struct page *page;
        void *kaddr;
-       __be64 *ptr;
+       char *ptr;
+       struct gfs2_quota_host qp;
        s64 value;
        int err = -EIO;
 
@@ -620,13 +652,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 
        kaddr = kmap_atomic(page, KM_USER0);
        ptr = kaddr + offset;
-       value = (s64)be64_to_cpu(*ptr) + change;
-       *ptr = cpu_to_be64(value);
+       gfs2_quota_in(&qp, ptr);
+       qp.qu_value += change;
+       value = qp.qu_value;
+       gfs2_quota_out(&qp, ptr);
        flush_dcache_page(page);
        kunmap_atomic(kaddr, KM_USER0);
        err = 0;
        qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
        qd->qd_qb.qb_value = cpu_to_be64(value);
+       ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC);
+       ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value);
 unlock:
        unlock_page(page);
        page_cache_release(page);
@@ -689,7 +725,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                        goto out_alloc;
 
                error = gfs2_trans_begin(sdp,
-                                        al->al_rgd->rd_ri.ri_length +
+                                        al->al_rgd->rd_length +
                                         num_qd * data_blocks +
                                         nalloc * ind_blocks +
                                         RES_DINODE + num_qd +
@@ -709,7 +745,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                offset = qd2offset(qd);
                error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
                                          (struct gfs2_quota_data *)
-                                         qd->qd_gl->gl_lvb);
+                                         qd);
                if (error)
                        goto out_end_trans;
 
@@ -1050,6 +1086,15 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
        return error;
 }
 
+static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
+{
+       const struct gfs2_quota_change *str = buf;
+
+       qc->qc_change = be64_to_cpu(str->qc_change);
+       qc->qc_flags = be32_to_cpu(str->qc_flags);
+       qc->qc_id = be32_to_cpu(str->qc_id);
+}
+
 int gfs2_quota_init(struct gfs2_sbd *sdp)
 {
        struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
index 8bc182c7e2ef22c2bad269743f15bab5f27a4914..5ada38c99a2c95e49a1abb99a828fa0f860306eb 100644 (file)
@@ -116,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
        }
 }
 
+static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+{
+       const struct gfs2_log_header *str = buf;
+
+       if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+           str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
+               return 1;
+
+       lh->lh_sequence = be64_to_cpu(str->lh_sequence);
+       lh->lh_flags = be32_to_cpu(str->lh_flags);
+       lh->lh_tail = be32_to_cpu(str->lh_tail);
+       lh->lh_blkno = be32_to_cpu(str->lh_blkno);
+       lh->lh_hash = be32_to_cpu(str->lh_hash);
+       return 0;
+}
+
 /**
  * get_log_header - read the log header for a given segment
  * @jd: the journal
@@ -147,12 +163,10 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
                                             sizeof(u32));
        hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
        hash ^= (u32)~0;
-       gfs2_log_header_in(&lh, bh->b_data);
+       error = gfs2_log_header_in(&lh, bh->b_data);
        brelse(bh);
 
-       if (lh.lh_header.mh_magic != GFS2_MAGIC ||
-           lh.lh_header.mh_type != GFS2_METATYPE_LH ||
-           lh.lh_blkno != blk || lh.lh_hash != hash)
+       if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
                return 1;
 
        *head = lh;
index 1727f5012efec89ee5724000f7e97ce0c31d2570..e4e040625153aa29b2254e51276eb6ac9807380e 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -28,6 +28,7 @@
 #include "ops_file.h"
 #include "util.h"
 #include "log.h"
+#include "inode.h"
 
 #define BFITNOENT ((u32)~0)
 
@@ -50,6 +51,9 @@ static const char valid_change[16] = {
                1, 0, 0, 0
 };
 
+static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
+                        unsigned char old_state, unsigned char new_state);
+
 /**
  * gfs2_setbit - Set a bit in the bitmaps
  * @buffer: the buffer that holds the bitmaps
@@ -204,7 +208,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_bitmap *bi = NULL;
-       u32 length = rgd->rd_ri.ri_length;
+       u32 length = rgd->rd_length;
        u32 count[4], tmp;
        int buf, x;
 
@@ -227,7 +231,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
                return;
        }
 
-       tmp = rgd->rd_ri.ri_data -
+       tmp = rgd->rd_data -
                rgd->rd_rg.rg_free -
                rgd->rd_rg.rg_dinodes;
        if (count[1] + count[2] != tmp) {
@@ -253,10 +257,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 
 }
 
-static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 {
-       u64 first = ri->ri_data0;
-       u64 last = first + ri->ri_data;
+       u64 first = rgd->rd_data0;
+       u64 last = first + rgd->rd_data;
        return first <= block && block < last;
 }
 
@@ -275,7 +279,7 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
        spin_lock(&sdp->sd_rindex_spin);
 
        list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
-               if (rgrp_contains_block(&rgd->rd_ri, blk)) {
+               if (rgrp_contains_block(rgd, blk)) {
                        list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
                        spin_unlock(&sdp->sd_rindex_spin);
                        return rgd;
@@ -354,6 +358,15 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
        mutex_unlock(&sdp->sd_rindex_mutex);
 }
 
+static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
+{
+       printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
+       printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
+       printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
+       printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
+       printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
+}
+
 /**
  * gfs2_compute_bitstructs - Compute the bitmap sizes
  * @rgd: The resource group descriptor
@@ -367,7 +380,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_bitmap *bi;
-       u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
+       u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
        u32 bytes_left, bytes;
        int x;
 
@@ -378,7 +391,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
        if (!rgd->rd_bits)
                return -ENOMEM;
 
-       bytes_left = rgd->rd_ri.ri_bitbytes;
+       bytes_left = rgd->rd_bitbytes;
 
        for (x = 0; x < length; x++) {
                bi = rgd->rd_bits + x;
@@ -399,14 +412,14 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
                } else if (x + 1 == length) {
                        bytes = bytes_left;
                        bi->bi_offset = sizeof(struct gfs2_meta_header);
-                       bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+                       bi->bi_start = rgd->rd_bitbytes - bytes_left;
                        bi->bi_len = bytes;
                /* other blocks */
                } else {
                        bytes = sdp->sd_sb.sb_bsize -
                                sizeof(struct gfs2_meta_header);
                        bi->bi_offset = sizeof(struct gfs2_meta_header);
-                       bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+                       bi->bi_start = rgd->rd_bitbytes - bytes_left;
                        bi->bi_len = bytes;
                }
 
@@ -418,9 +431,9 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
                return -EIO;
        }
        bi = rgd->rd_bits + (length - 1);
-       if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
+       if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
                if (gfs2_consist_rgrpd(rgd)) {
-                       gfs2_rindex_print(&rgd->rd_ri);
+                       gfs2_rindex_print(rgd);
                        fs_err(sdp, "start=%u len=%u offset=%u\n",
                               bi->bi_start, bi->bi_len, bi->bi_offset);
                }
@@ -431,9 +444,104 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 }
 
 /**
- * gfs2_ri_update - Pull in a new resource index from the disk
+ * gfs2_ri_total - Total up the file system space, according to the rindex.
+ *
+ */
+u64 gfs2_ri_total(struct gfs2_sbd *sdp)
+{
+       u64 total_data = 0;     
+       struct inode *inode = sdp->sd_rindex;
+       struct gfs2_inode *ip = GFS2_I(inode);
+       char buf[sizeof(struct gfs2_rindex)];
+       struct file_ra_state ra_state;
+       int error, rgrps;
+
+       mutex_lock(&sdp->sd_rindex_mutex);
+       file_ra_state_init(&ra_state, inode->i_mapping);
+       for (rgrps = 0;; rgrps++) {
+               loff_t pos = rgrps * sizeof(struct gfs2_rindex);
+
+               if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
+                       break;
+               error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+                                          sizeof(struct gfs2_rindex));
+               if (error != sizeof(struct gfs2_rindex))
+                       break;
+               total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
+       }
+       mutex_unlock(&sdp->sd_rindex_mutex);
+       return total_data;
+}
+
+static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
+{
+       const struct gfs2_rindex *str = buf;
+
+       rgd->rd_addr = be64_to_cpu(str->ri_addr);
+       rgd->rd_length = be32_to_cpu(str->ri_length);
+       rgd->rd_data0 = be64_to_cpu(str->ri_data0);
+       rgd->rd_data = be32_to_cpu(str->ri_data);
+       rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
+}
+
+/**
+ * read_rindex_entry - Pull in a new resource index entry from the disk
  * @gl: The glock covering the rindex inode
  *
+ * Returns: 0 on success, error code otherwise
+ */
+
+static int read_rindex_entry(struct gfs2_inode *ip,
+                            struct file_ra_state *ra_state)
+{
+       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+       loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
+       char buf[sizeof(struct gfs2_rindex)];
+       int error;
+       struct gfs2_rgrpd *rgd;
+
+       error = gfs2_internal_read(ip, ra_state, buf, &pos,
+                                  sizeof(struct gfs2_rindex));
+       if (!error)
+               return 0;
+       if (error != sizeof(struct gfs2_rindex)) {
+               if (error > 0)
+                       error = -EIO;
+               return error;
+       }
+
+       rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
+       error = -ENOMEM;
+       if (!rgd)
+               return error;
+
+       mutex_init(&rgd->rd_mutex);
+       lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
+       rgd->rd_sbd = sdp;
+
+       list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
+       list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+
+       gfs2_rindex_in(rgd, buf);
+       error = compute_bitstructs(rgd);
+       if (error)
+               return error;
+
+       error = gfs2_glock_get(sdp, rgd->rd_addr,
+                              &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+       if (error)
+               return error;
+
+       rgd->rd_gl->gl_object = rgd;
+       rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+       rgd->rd_flags |= GFS2_RDF_CHECK;
+       return error;
+}
+
+/**
+ * gfs2_ri_update - Pull in a new resource index from the disk
+ * @ip: pointer to the rindex inode
+ *
  * Returns: 0 on successful update, error code otherwise
  */
 
@@ -441,13 +549,11 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct inode *inode = &ip->i_inode;
-       struct gfs2_rgrpd *rgd;
-       char buf[sizeof(struct gfs2_rindex)];
        struct file_ra_state ra_state;
-       u64 junk = ip->i_di.di_size;
+       u64 rgrp_count = ip->i_di.di_size;
        int error;
 
-       if (do_div(junk, sizeof(struct gfs2_rindex))) {
+       if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
                gfs2_consist_inode(ip);
                return -EIO;
        }
@@ -455,50 +561,50 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
        clear_rgrpdi(sdp);
 
        file_ra_state_init(&ra_state, inode->i_mapping);
-       for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
-               loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
-               error = gfs2_internal_read(ip, &ra_state, buf, &pos,
-                                           sizeof(struct gfs2_rindex));
-               if (!error)
-                       break;
-               if (error != sizeof(struct gfs2_rindex)) {
-                       if (error > 0)
-                               error = -EIO;
-                       goto fail;
+       for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
+               error = read_rindex_entry(ip, &ra_state);
+               if (error) {
+                       clear_rgrpdi(sdp);
+                       return error;
                }
+       }
 
-               rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
-               error = -ENOMEM;
-               if (!rgd)
-                       goto fail;
-
-               mutex_init(&rgd->rd_mutex);
-               lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
-               rgd->rd_sbd = sdp;
-
-               list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
-               list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
-
-               gfs2_rindex_in(&rgd->rd_ri, buf);
-               error = compute_bitstructs(rgd);
-               if (error)
-                       goto fail;
+       sdp->sd_rindex_vn = ip->i_gl->gl_vn;
+       return 0;
+}
 
-               error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
-                                      &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
-               if (error)
-                       goto fail;
+/**
+ * gfs2_ri_update_special - Pull in a new resource index from the disk
+ *
+ * This is a special version that's safe to call from gfs2_inplace_reserve_i.
+ * In this case we know that we don't have any resource groups in memory yet.
+ *
+ * @ip: pointer to the rindex inode
+ *
+ * Returns: 0 on successful update, error code otherwise
+ */
+static int gfs2_ri_update_special(struct gfs2_inode *ip)
+{
+       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+       struct inode *inode = &ip->i_inode;
+       struct file_ra_state ra_state;
+       int error;
 
-               rgd->rd_gl->gl_object = rgd;
-               rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+       file_ra_state_init(&ra_state, inode->i_mapping);
+       for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+               /* Ignore partials */
+               if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
+                   ip->i_di.di_size)
+                       break;
+               error = read_rindex_entry(ip, &ra_state);
+               if (error) {
+                       clear_rgrpdi(sdp);
+                       return error;
+               }
        }
 
        sdp->sd_rindex_vn = ip->i_gl->gl_vn;
        return 0;
-
-fail:
-       clear_rgrpdi(sdp);
-       return error;
 }
 
 /**
@@ -543,6 +649,28 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
        return error;
 }
 
+static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
+{
+       const struct gfs2_rgrp *str = buf;
+
+       rg->rg_flags = be32_to_cpu(str->rg_flags);
+       rg->rg_free = be32_to_cpu(str->rg_free);
+       rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
+       rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
+}
+
+static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
+{
+       struct gfs2_rgrp *str = buf;
+
+       str->rg_flags = cpu_to_be32(rg->rg_flags);
+       str->rg_free = cpu_to_be32(rg->rg_free);
+       str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
+       str->__pad = cpu_to_be32(0);
+       str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
+       memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
+}
+
 /**
  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
  * @rgd: the struct gfs2_rgrpd describing the RG to read in
@@ -557,7 +685,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_glock *gl = rgd->rd_gl;
-       unsigned int length = rgd->rd_ri.ri_length;
+       unsigned int length = rgd->rd_length;
        struct gfs2_bitmap *bi;
        unsigned int x, y;
        int error;
@@ -575,7 +703,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 
        for (x = 0; x < length; x++) {
                bi = rgd->rd_bits + x;
-               error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh);
+               error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
                if (error)
                        goto fail;
        }
@@ -637,7 +765,7 @@ void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-       int x, length = rgd->rd_ri.ri_length;
+       int x, length = rgd->rd_length;
 
        spin_lock(&sdp->sd_rindex_spin);
        gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
@@ -660,7 +788,7 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-       unsigned int length = rgd->rd_ri.ri_length;
+       unsigned int length = rgd->rd_length;
        unsigned int x;
 
        for (x = 0; x < length; x++) {
@@ -721,6 +849,38 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
        return ret;
 }
 
+/**
+ * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
+ * @rgd: The rgrp
+ *
+ * Returns: The inode, if one has been found
+ */
+
+static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
+{
+       struct inode *inode;
+       u32 goal = 0;
+       u64 no_addr;
+
+       for(;;) {
+               goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
+                                   GFS2_BLKST_UNLINKED);
+               if (goal == 0)
+                       return 0;
+               no_addr = goal + rgd->rd_data0;
+               if (no_addr <= *last_unlinked)
+                       continue;
+               *last_unlinked = no_addr;
+               inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
+                                       no_addr, -1);
+               if (!IS_ERR(inode))
+                       return inode;
+       }
+
+       rgd->rd_flags &= ~GFS2_RDF_CHECK;
+       return NULL;
+}
+
 /**
  * recent_rgrp_first - get first RG from "recent" list
  * @sdp: The GFS2 superblock
@@ -743,7 +903,7 @@ static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
                goto first;
 
        list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
-               if (rgd->rd_ri.ri_addr == rglast)
+               if (rgd->rd_addr == rglast)
                        goto out;
        }
 
@@ -882,8 +1042,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
  * Returns: errno
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip)
+static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
+       struct inode *inode = NULL;
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *rgd, *begin = NULL;
        struct gfs2_alloc *al = &ip->i_alloc;
@@ -903,7 +1064,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
+                       if (rgd->rd_flags & GFS2_RDF_CHECK)
+                               inode = try_rgrp_unlink(rgd, last_unlinked);
                        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                       if (inode)
+                               return inode;
                        rgd = recent_rgrp_next(rgd, 1);
                        break;
 
@@ -912,7 +1077,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                        break;
 
                default:
-                       return error;
+                       return ERR_PTR(error);
                }
        }
 
@@ -927,7 +1092,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
+                       if (rgd->rd_flags & GFS2_RDF_CHECK)
+                               inode = try_rgrp_unlink(rgd, last_unlinked);
                        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                       if (inode)
+                               return inode;
                        break;
 
                case GLR_TRYFAILED:
@@ -935,7 +1104,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                        break;
 
                default:
-                       return error;
+                       return ERR_PTR(error);
                }
 
                rgd = gfs2_rgrpd_get_next(rgd);
@@ -944,7 +1113,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
 
                if (rgd == begin) {
                        if (++loops >= 3)
-                               return -ENOSPC;
+                               return ERR_PTR(-ENOSPC);
                        if (!skipped)
                                loops++;
                        flags = 0;
@@ -954,7 +1123,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
        }
 
 out:
-       ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
+       ip->i_last_rg_alloc = rgd->rd_addr;
 
        if (begin) {
                recent_rgrp_add(rgd);
@@ -964,7 +1133,7 @@ out:
                forward_rgrp_set(sdp, rgd);
        }
 
-       return 0;
+       return NULL;
 }
 
 /**
@@ -978,19 +1147,33 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_alloc *al = &ip->i_alloc;
-       int error;
+       struct inode *inode;
+       int error = 0;
+       u64 last_unlinked = 0;
 
        if (gfs2_assert_warn(sdp, al->al_requested))
                return -EINVAL;
 
-       error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+try_again:
+       /* We need to hold the rindex unless the inode we're using is
+          the rindex itself, in which case it's already held. */
+       if (ip != GFS2_I(sdp->sd_rindex))
+               error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+       else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
+               error = gfs2_ri_update_special(ip);
+
        if (error)
                return error;
 
-       error = get_local_rgrp(ip);
-       if (error) {
-               gfs2_glock_dq_uninit(&al->al_ri_gh);
-               return error;
+       inode = get_local_rgrp(ip, &last_unlinked);
+       if (inode) {
+               if (ip != GFS2_I(sdp->sd_rindex))
+                       gfs2_glock_dq_uninit(&al->al_ri_gh);
+               if (IS_ERR(inode))
+                       return PTR_ERR(inode);
+               iput(inode);
+               gfs2_log_flush(sdp, NULL);
+               goto try_again;
        }
 
        al->al_file = file;
@@ -1019,7 +1202,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
 
        al->al_rgd = NULL;
        gfs2_glock_dq_uninit(&al->al_rgd_gh);
-       gfs2_glock_dq_uninit(&al->al_ri_gh);
+       if (ip != GFS2_I(sdp->sd_rindex))
+               gfs2_glock_dq_uninit(&al->al_ri_gh);
 }
 
 /**
@@ -1037,8 +1221,8 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
        unsigned int buf;
        unsigned char type;
 
-       length = rgd->rd_ri.ri_length;
-       rgrp_block = block - rgd->rd_ri.ri_data0;
+       length = rgd->rd_length;
+       rgrp_block = block - rgd->rd_data0;
 
        for (buf = 0; buf < length; buf++) {
                bi = rgd->rd_bits + buf;
@@ -1077,10 +1261,10 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
  */
 
 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-                            unsigned char old_state, unsigned char new_state)
+                       unsigned char old_state, unsigned char new_state)
 {
        struct gfs2_bitmap *bi = NULL;
-       u32 length = rgd->rd_ri.ri_length;
+       u32 length = rgd->rd_length;
        u32 blk = 0;
        unsigned int buf, x;
 
@@ -1118,17 +1302,18 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
                goal = 0;
        }
 
-       if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
-               blk = 0;
+       if (old_state != new_state) {
+               gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
 
-       gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-       gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-                   bi->bi_len, blk, new_state);
-       if (bi->bi_clone)
-               gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+               gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+               gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
                            bi->bi_len, blk, new_state);
+               if (bi->bi_clone)
+                       gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+                                   bi->bi_len, blk, new_state);
+       }
 
-       return bi->bi_start * GFS2_NBBY + blk;
+       return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
 }
 
 /**
@@ -1156,9 +1341,9 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
                return NULL;
        }
 
-       length = rgd->rd_ri.ri_length;
+       length = rgd->rd_length;
 
-       rgrp_blk = bstart - rgd->rd_ri.ri_data0;
+       rgrp_blk = bstart - rgd->rd_data0;
 
        while (blen--) {
                for (buf = 0; buf < length; buf++) {
@@ -1202,15 +1387,15 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
        u32 goal, blk;
        u64 block;
 
-       if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
-               goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
+       if (rgrp_contains_block(rgd, ip->i_di.di_goal_data))
+               goal = ip->i_di.di_goal_data - rgd->rd_data0;
        else
                goal = rgd->rd_last_alloc_data;
 
        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
        rgd->rd_last_alloc_data = blk;
 
-       block = rgd->rd_ri.ri_data0 + blk;
+       block = rgd->rd_data0 + blk;
        ip->i_di.di_goal_data = block;
 
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1246,15 +1431,15 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
        u32 goal, blk;
        u64 block;
 
-       if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
-               goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
+       if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
+               goal = ip->i_di.di_goal_meta - rgd->rd_data0;
        else
                goal = rgd->rd_last_alloc_meta;
 
        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
        rgd->rd_last_alloc_meta = blk;
 
-       block = rgd->rd_ri.ri_data0 + blk;
+       block = rgd->rd_data0 + blk;
        ip->i_di.di_goal_meta = block;
 
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1296,7 +1481,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
 
        rgd->rd_last_alloc_meta = blk;
 
-       block = rgd->rd_ri.ri_data0 + blk;
+       block = rgd->rd_data0 + blk;
 
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
        rgd->rd_rg.rg_free--;
@@ -1379,7 +1564,7 @@ void gfs2_unlink_di(struct inode *inode)
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_rgrpd *rgd;
-       u64 blkno = ip->i_num.no_addr;
+       u64 blkno = ip->i_no_addr;
 
        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
        if (!rgd)
@@ -1414,9 +1599,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-       gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
+       gfs2_free_uninit_di(rgd, ip->i_no_addr);
        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
-       gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
+       gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
 
 /**
index b01e0cfc99b5fc8d38a3b04d059b66fa2c7c4542..b4c6adfc6f2ebe447697c9bdadf8f7837ea7170e 100644 (file)
@@ -65,5 +65,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
                      int flags);
 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+u64 gfs2_ri_total(struct gfs2_sbd *sdp);
 
 #endif /* __RGRP_DOT_H__ */
index 4fdda974dc837e03d930091ba10fdef0adbe7c63..f916b9740c75b454b28aba4aa6157d10d8f8af14 100644 (file)
@@ -95,8 +95,8 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
        unsigned int x;
 
-       if (sb->sb_header.mh_magic != GFS2_MAGIC ||
-           sb->sb_header.mh_type != GFS2_METATYPE_SB) {
+       if (sb->sb_magic != GFS2_MAGIC ||
+           sb->sb_type != GFS2_METATYPE_SB) {
                if (!silent)
                        printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
                return -EINVAL;
@@ -174,10 +174,31 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
        return 0;
 }
 
+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+       const struct gfs2_sb *str = buf;
+
+       sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+       sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+       sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+       sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+       sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+       sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+       sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+       sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+       sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+       sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+       sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+
+       memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+       memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
+
 /**
  * gfs2_read_super - Read the gfs2 super block from disk
- * @sb: The VFS super block
+ * @sdp: The GFS2 super block
  * @sector: The location of the super block
+ * @error: The error code to return
  *
  * This uses the bio functions to read the super block from disk
  * because we want to be 100% sure that we never read cached data.
@@ -189,17 +210,19 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
  * the master directory (contains pointers to journals etc) and the
  * root directory.
  *
- * Returns: A page containing the sb or NULL
+ * Returns: 0 on success or error
  */
 
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 {
+       struct super_block *sb = sdp->sd_vfs;
+       struct gfs2_sb *p;
        struct page *page;
        struct bio *bio;
 
        page = alloc_page(GFP_KERNEL);
        if (unlikely(!page))
-               return NULL;
+               return -ENOBUFS;
 
        ClearPageUptodate(page);
        ClearPageDirty(page);
@@ -208,7 +231,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
        bio = bio_alloc(GFP_KERNEL, 1);
        if (unlikely(!bio)) {
                __free_page(page);
-               return NULL;
+               return -ENOBUFS;
        }
 
        bio->bi_sector = sector * (sb->s_blocksize >> 9);
@@ -222,9 +245,13 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
        bio_put(bio);
        if (!PageUptodate(page)) {
                __free_page(page);
-               return NULL;
+               return -EIO;
        }
-       return page;
+       p = kmap(page);
+       gfs2_sb_in(&sdp->sd_sb, p);
+       kunmap(page);
+       __free_page(page);
+       return 0;
 }
 
 /**
@@ -241,19 +268,13 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
        u32 tmp_blocks;
        unsigned int x;
        int error;
-       struct page *page;
-       char *sb;
 
-       page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-       if (!page) {
+       error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+       if (error) {
                if (!silent)
                        fs_err(sdp, "can't read superblock\n");
-               return -EIO;
+               return error;
        }
-       sb = kmap(page);
-       gfs2_sb_in(&sdp->sd_sb, sb);
-       kunmap(page);
-       __free_page(page);
 
        error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
        if (error)
@@ -360,7 +381,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
                name.len = sprintf(buf, "journal%u", sdp->sd_journals);
                name.hash = gfs2_disk_hash(name.name, name.len);
 
-               error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
+               error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
                if (error == -ENOENT) {
                        error = 0;
                        break;
@@ -593,6 +614,24 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
        return error;
 }
 
+static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
+{
+       const struct gfs2_statfs_change *str = buf;
+
+       sc->sc_total = be64_to_cpu(str->sc_total);
+       sc->sc_free = be64_to_cpu(str->sc_free);
+       sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
+}
+
+static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
+{
+       struct gfs2_statfs_change *str = buf;
+
+       str->sc_total = cpu_to_be64(sc->sc_total);
+       str->sc_free = cpu_to_be64(sc->sc_free);
+       str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
+}
+
 int gfs2_statfs_init(struct gfs2_sbd *sdp)
 {
        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -772,7 +811,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
                            struct gfs2_statfs_change_host *sc)
 {
        gfs2_rgrp_verify(rgd);
-       sc->sc_total += rgd->rd_ri.ri_data;
+       sc->sc_total += rgd->rd_data;
        sc->sc_free += rgd->rd_rg.rg_free;
        sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
        return 0;
index e590b2df11dc1c88343743132a64ece6ae83ddd9..60a870e430be921da125b2aee7a412d66231dc9f 100644 (file)
@@ -16,7 +16,7 @@ void gfs2_tune_init(struct gfs2_tune *gt);
 
 int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
 int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
 
 static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 {
index 601eaa1b9ed63f9c50dc72b104af2985cf01a276..424a0774eda81268b1daf3eff28ae82a636c6ad8 100644 (file)
@@ -115,8 +115,8 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
                "GFS2: fsid=%s:   inode = %llu %llu\n"
                "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
                sdp->sd_fsname,
-               sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
-               (unsigned long long)ip->i_num.no_addr,
+               sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino,
+               (unsigned long long)ip->i_no_addr,
                sdp->sd_fsname, function, file, line);
        return rv;
 }
@@ -137,7 +137,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
                "GFS2: fsid=%s:   RG = %llu\n"
                "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
                sdp->sd_fsname,
-               sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
+               sdp->sd_fsname, (unsigned long long)rgd->rd_addr,
                sdp->sd_fsname, function, file, line);
        return rv;
 }
index f317c270d4bfe9acbefdf3a567f8313480ad4e0e..afae306b177c148c10ea227534e2fb5281f58625 100644 (file)
@@ -49,6 +49,7 @@ header-y += consolemap.h
 header-y += const.h
 header-y += cycx_cfm.h
 header-y += dlm_device.h
+header-y += dlm_netlink.h
 header-y += dm-ioctl.h
 header-y += dn.h
 header-y += dqblk_v1.h
index 1b1dcb9a40bbab9facb21a13ece1cb23cab28534..be9d278761e0ed5ddbb87653d64f34e9108956cb 100644 (file)
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
  * Only relevant to locks originating in userspace.  A persistent lock will not
  * be removed if the process holding the lock exits.
  *
- * DLM_LKF_NODLKWT
+ * DLM_LKF_NODLCKWT
+ *
+ * Do not cancel the lock if it gets into conversion deadlock.
+ * Exclude this lock from being monitored due to DLM_LSFL_TIMEWARN.
+ *
  * DLM_LKF_NODLCKBLK
  *
  * net yet implemented
 #define DLM_LKF_ALTPR          0x00008000
 #define DLM_LKF_ALTCW          0x00010000
 #define DLM_LKF_FORCEUNLOCK    0x00020000
+#define DLM_LKF_TIMEOUT                0x00040000
 
 /*
  * Some return codes that are not in errno.h
@@ -199,11 +204,12 @@ struct dlm_lksb {
        char *   sb_lvbptr;
 };
 
+#define DLM_LSFL_NODIR         0x00000001
+#define DLM_LSFL_TIMEWARN      0x00000002
+#define DLM_LSFL_FS            0x00000004
 
 #ifdef __KERNEL__
 
-#define DLM_LSFL_NODIR         0x00000001
-
 /*
  * dlm_new_lockspace
  *
index c2735cab2ebf2966b61f5d932cf1fd77401d978f..9642277a152a2d3d764317d03400a53d2f5eae94 100644 (file)
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
 #define DLM_USER_LVB_LEN       32
 
 /* Version of the device interface */
-#define DLM_DEVICE_VERSION_MAJOR 5
-#define DLM_DEVICE_VERSION_MINOR 1
+#define DLM_DEVICE_VERSION_MAJOR 6
+#define DLM_DEVICE_VERSION_MINOR 0
 #define DLM_DEVICE_VERSION_PATCH 0
 
 /* struct passed to the lock write */
 struct dlm_lock_params {
        __u8 mode;
        __u8 namelen;
-       __u16 flags;
+       __u16 unused;
+       __u32 flags;
        __u32 lkid;
        __u32 parent;
-        void __user *castparam;
+       __u64 xid;
+       __u64 timeout;
+       void __user *castparam;
        void __user *castaddr;
        void __user *bastparam;
-        void __user *bastaddr;
+       void __user *bastaddr;
        struct dlm_lksb __user *lksb;
        char lvb[DLM_USER_LVB_LEN];
        char name[0];
@@ -62,9 +65,15 @@ struct dlm_write_request {
        } i;
 };
 
+struct dlm_device_version {
+       __u32 version[3];
+};
+
 /* struct read from the "device" fd,
    consists mainly of userspace pointers for the library to use */
+
 struct dlm_lock_result {
+       __u32 version[3];
        __u32 length;
        void __user * user_astaddr;
        void __user * user_astparam;
@@ -83,6 +92,7 @@ struct dlm_lock_result {
 #define DLM_USER_CREATE_LOCKSPACE  4
 #define DLM_USER_REMOVE_LOCKSPACE  5
 #define DLM_USER_PURGE        6
+#define DLM_USER_DEADLOCK     7
 
 /* Arbitrary length restriction */
 #define MAX_LS_NAME_LEN 64
diff --git a/include/linux/dlm_netlink.h b/include/linux/dlm_netlink.h
new file mode 100644 (file)
index 0000000..1927633
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef _DLM_NETLINK_H
+#define _DLM_NETLINK_H
+
+enum {
+       DLM_STATUS_WAITING = 1,
+       DLM_STATUS_GRANTED = 2,
+       DLM_STATUS_CONVERT = 3,
+};
+
+#define DLM_LOCK_DATA_VERSION 1
+
+struct dlm_lock_data {
+       uint16_t version;
+       uint32_t lockspace_id;
+       int nodeid;
+       int ownpid;
+       uint32_t id;
+       uint32_t remid;
+       uint64_t xid;
+       int8_t status;
+       int8_t grmode;
+       int8_t rqmode;
+       unsigned long timestamp;
+       int resource_namelen;
+       char resource_name[DLM_RESNAME_MAXLEN];
+};
+
+enum {
+       DLM_CMD_UNSPEC = 0,
+       DLM_CMD_HELLO,          /* user->kernel */
+       DLM_CMD_TIMEOUT,        /* kernel->user */
+       __DLM_CMD_MAX,
+};
+
+#define DLM_CMD_MAX (__DLM_CMD_MAX - 1)
+
+enum {
+       DLM_TYPE_UNSPEC = 0,
+       DLM_TYPE_LOCK,
+       __DLM_TYPE_MAX,
+};
+
+#define DLM_TYPE_MAX (__DLM_TYPE_MAX - 1)
+
+#define DLM_GENL_VERSION 0x1
+#define DLM_GENL_NAME "DLM"
+
+#endif /* _DLM_NETLINK_H */
index 8b7e4c1e32ae2c8f80d2f8182e2452add7c18451..a44a6a078f0a68b487ff6d049c71b6ceea154a05 100644 (file)
@@ -54,18 +54,6 @@ struct gfs2_inum {
        __be64 no_addr;
 };
 
-struct gfs2_inum_host {
-       __u64 no_formal_ino;
-       __u64 no_addr;
-};
-
-static inline int gfs2_inum_equal(const struct gfs2_inum_host *ino1,
-                                 const struct gfs2_inum_host *ino2)
-{
-       return ino1->no_formal_ino == ino2->no_formal_ino &&
-              ino1->no_addr == ino2->no_addr;
-}
-
 /*
  * Generic metadata head structure
  * Every inplace buffer logged in the journal must start with this.
@@ -94,12 +82,6 @@ struct gfs2_meta_header {
        __be32 __pad1;          /* Was incarnation number in gfs1 */
 };
 
-struct gfs2_meta_header_host {
-       __u32 mh_magic;
-       __u32 mh_type;
-       __u32 mh_format;
-};
-
 /*
  * super-block structure
  *
@@ -139,23 +121,6 @@ struct gfs2_sb {
        /* In gfs1, quota and license dinodes followed */
 };
 
-struct gfs2_sb_host {
-       struct gfs2_meta_header_host sb_header;
-
-       __u32 sb_fs_format;
-       __u32 sb_multihost_format;
-
-       __u32 sb_bsize;
-       __u32 sb_bsize_shift;
-
-       struct gfs2_inum_host sb_master_dir; /* Was jindex dinode in gfs1 */
-       struct gfs2_inum_host sb_root_dir;
-
-       char sb_lockproto[GFS2_LOCKNAME_LEN];
-       char sb_locktable[GFS2_LOCKNAME_LEN];
-       /* In gfs1, quota and license dinodes followed */
-};
-
 /*
  * resource index structure
  */
@@ -173,14 +138,6 @@ struct gfs2_rindex {
        __u8 ri_reserved[64];
 };
 
-struct gfs2_rindex_host {
-       __u64 ri_addr;  /* grp block disk address */
-       __u64 ri_data0; /* first data location */
-       __u32 ri_length;        /* length of rgrp header in fs blocks */
-       __u32 ri_data;  /* num of data blocks in rgrp */
-       __u32 ri_bitbytes;      /* number of bytes in data bitmaps */
-};
-
 /*
  * resource group header structure
  */
@@ -212,13 +169,6 @@ struct gfs2_rgrp {
        __u8 rg_reserved[80]; /* Several fields from gfs1 now reserved */
 };
 
-struct gfs2_rgrp_host {
-       __u32 rg_flags;
-       __u32 rg_free;
-       __u32 rg_dinodes;
-       __u64 rg_igeneration;
-};
-
 /*
  * quota structure
  */
@@ -230,12 +180,6 @@ struct gfs2_quota {
        __u8 qu_reserved[64];
 };
 
-struct gfs2_quota_host {
-       __u64 qu_limit;
-       __u64 qu_warn;
-       __u64 qu_value;
-};
-
 /*
  * dinode structure
  */
@@ -315,29 +259,11 @@ struct gfs2_dinode {
        struct gfs2_inum __pad4; /* Unused even in current gfs1 */
 
        __be64 di_eattr;        /* extended attribute block number */
+       __be32 di_atime_nsec;   /* nsec portion of atime */
+       __be32 di_mtime_nsec;   /* nsec portion of mtime */
+       __be32 di_ctime_nsec;   /* nsec portion of ctime */
 
-       __u8 di_reserved[56];
-};
-
-struct gfs2_dinode_host {
-       __u64 di_size;  /* number of bytes in file */
-       __u64 di_blocks;        /* number of blocks in file */
-
-       /* This section varies from gfs1. Padding added to align with
-         * remainder of dinode
-        */
-       __u64 di_goal_meta;     /* rgrp to alloc from next */
-       __u64 di_goal_data;     /* data block goal */
-       __u64 di_generation;    /* generation number for NFS */
-
-       __u32 di_flags; /* GFS2_DIF_... */
-       __u16 di_height;        /* height of metadata */
-
-       /* These only apply to directories  */
-       __u16 di_depth; /* Number of bits in the table */
-       __u32 di_entries;       /* The number of entries in the directory */
-
-       __u64 di_eattr; /* extended attribute block number */
+       __u8 di_reserved[44];
 };
 
 /*
@@ -414,16 +340,6 @@ struct gfs2_log_header {
        __be32 lh_hash;
 };
 
-struct gfs2_log_header_host {
-       struct gfs2_meta_header_host lh_header;
-
-       __u64 lh_sequence;      /* Sequence number of this transaction */
-       __u32 lh_flags; /* GFS2_LOG_HEAD_... */
-       __u32 lh_tail;          /* Block number of log tail */
-       __u32 lh_blkno;
-       __u32 lh_hash;
-};
-
 /*
  * Log type descriptor
  */
@@ -464,11 +380,6 @@ struct gfs2_inum_range {
        __be64 ir_length;
 };
 
-struct gfs2_inum_range_host {
-       __u64 ir_start;
-       __u64 ir_length;
-};
-
 /*
  * Statfs change
  * Describes an change to the pool of free and allocated
@@ -481,12 +392,6 @@ struct gfs2_statfs_change {
        __be64 sc_dinodes;
 };
 
-struct gfs2_statfs_change_host {
-       __u64 sc_total;
-       __u64 sc_free;
-       __u64 sc_dinodes;
-};
-
 /*
  * Quota change
  * Describes an allocation change for a particular
@@ -501,39 +406,12 @@ struct gfs2_quota_change {
        __be32 qc_id;
 };
 
-struct gfs2_quota_change_host {
-       __u64 qc_change;
-       __u32 qc_flags; /* GFS2_QCF_... */
-       __u32 qc_id;
+struct gfs2_quota_lvb {
+        __be32 qb_magic;
+        __u32 __pad;
+        __be64 qb_limit;      /* Hard limit of # blocks to alloc */
+        __be64 qb_warn;       /* Warn user when alloc is above this # */
+        __be64 qb_value;       /* Current # blocks allocated */
 };
 
-#ifdef __KERNEL__
-/* Translation functions */
-
-extern void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf);
-extern void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf);
-extern void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf);
-extern void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf);
-extern void gfs2_rindex_out(const struct gfs2_rindex_host *ri, void *buf);
-extern void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf);
-extern void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf);
-extern void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf);
-struct gfs2_inode;
-extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
-extern void gfs2_ea_header_in(struct gfs2_ea_header *ea, const void *buf);
-extern void gfs2_ea_header_out(const struct gfs2_ea_header *ea, void *buf);
-extern void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf);
-extern void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf);
-extern void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf);
-extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf);
-extern void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf);
-extern void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf);
-
-/* Printing functions */
-
-extern void gfs2_rindex_print(const struct gfs2_rindex_host *ri);
-extern void gfs2_dinode_print(const struct gfs2_inode *ip);
-
-#endif /* __KERNEL__ */
-
 #endif /* __GFS2_ONDISK_DOT_H__ */