[PATCH] 64-bit jbd2 core
authorZach Brown <zach.brown@oracle.com>
Wed, 11 Oct 2006 08:21:08 +0000 (01:21 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Wed, 11 Oct 2006 18:14:16 +0000 (11:14 -0700)
Here is the patch to JBD to handle 64 bit block numbers, originally from Zach
Brown.  This patch is useful only after adding support for 64-bit block
numbers in the filesystem.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/recovery.c
fs/jbd2/revoke.c
include/linux/jbd2.h

index b1a4eafc15417688128612e5d21746ad1bf6b183..44d68a113c73bd06560a5b004eba5942ec37c908 100644 (file)
@@ -271,6 +271,14 @@ write_out_data:
        journal_do_submit_data(wbuf, bufs);
 }
 
+static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
+                                  sector_t block)
+{
+       tag->t_blocknr = cpu_to_be32(block & (u32)~0);
+       if (tag_bytes > JBD_TAG_SIZE32)
+               tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
+}
+
 /*
  * jbd2_journal_commit_transaction
  *
@@ -293,6 +301,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        int first_tag = 0;
        int tag_flag;
        int i;
+       int tag_bytes = journal_tag_bytes(journal);
 
        /*
         * First job: lock down the current transaction and wait for
@@ -597,10 +606,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                        tag_flag |= JBD2_FLAG_SAME_UUID;
 
                tag = (journal_block_tag_t *) tagp;
-               tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
+               write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
                tag->t_flags = cpu_to_be32(tag_flag);
-               tagp += sizeof(journal_block_tag_t);
-               space_left -= sizeof(journal_block_tag_t);
+               tagp += tag_bytes;
+               space_left -= tag_bytes;
 
                if (first_tag) {
                        memcpy (tagp, journal->j_uuid, 16);
@@ -614,7 +623,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
                if (bufs == journal->j_wbufsize ||
                    commit_transaction->t_buffers == NULL ||
-                   space_left < sizeof(journal_block_tag_t) + 16) {
+                   space_left < tag_bytes + 16) {
 
                        jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
 
index 8d0f71e562feb92da34c2d13bde4b0a6843bef8a..926ebcbf8a7aea8a3ef7a8b455ca4a8e44a430d2 100644 (file)
@@ -1609,6 +1609,17 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
        return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 }
 
+/*
+ * helper functions to deal with 32 or 64bit block numbers.
+ */
+size_t journal_tag_bytes(journal_t *journal)
+{
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+               return JBD_TAG_SIZE64;
+       else
+               return JBD_TAG_SIZE32;
+}
+
 /*
  * Simple support for retrying memory allocations.  Introduced to help to
  * debug different VM deadlock avoidance strategies.
index b2012d1124329d769bd8258f085c65d1dd99cb8b..2486843adda0ac8b44182a29bf1c0d2ae1410e34 100644 (file)
@@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
  * Count the number of in-use tags in a journal descriptor block.
  */
 
-static int count_tags(struct buffer_head *bh, int size)
+static int count_tags(journal_t *journal, struct buffer_head *bh)
 {
        char *                  tagp;
        journal_block_tag_t *   tag;
-       int                     nr = 0;
+       int                     nr = 0, size = journal->j_blocksize;
+       int                     tag_bytes = journal_tag_bytes(journal);
 
        tagp = &bh->b_data[sizeof(journal_header_t)];
 
-       while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
+       while ((tagp - bh->b_data + tag_bytes) <= size) {
                tag = (journal_block_tag_t *) tagp;
 
                nr++;
-               tagp += sizeof(journal_block_tag_t);
+               tagp += tag_bytes;
                if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
                        tagp += 16;
 
@@ -307,6 +308,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
        return err;
 }
 
+static inline sector_t read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+{
+       sector_t block = be32_to_cpu(tag->t_blocknr);
+       if (tag_bytes > JBD_TAG_SIZE32)
+               block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
+       return block;
+}
+
 static int do_one_pass(journal_t *journal,
                        struct recovery_info *info, enum passtype pass)
 {
@@ -318,11 +327,12 @@ static int do_one_pass(journal_t *journal,
        struct buffer_head *    bh;
        unsigned int            sequence;
        int                     blocktype;
+       int                     tag_bytes = journal_tag_bytes(journal);
 
        /* Precompute the maximum metadata descriptors in a descriptor block */
        int                     MAX_BLOCKS_PER_DESC;
        MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
-                              / sizeof(journal_block_tag_t));
+                              / tag_bytes);
 
        /*
         * First thing is to establish what we expect to find in the log
@@ -412,8 +422,7 @@ static int do_one_pass(journal_t *journal,
                         * in pass REPLAY; otherwise, just skip over the
                         * blocks it describes. */
                        if (pass != PASS_REPLAY) {
-                               next_log_block +=
-                                       count_tags(bh, journal->j_blocksize);
+                               next_log_block += count_tags(journal, bh);
                                wrap(journal, next_log_block);
                                brelse(bh);
                                continue;
@@ -424,7 +433,7 @@ static int do_one_pass(journal_t *journal,
                         * getting done here! */
 
                        tagp = &bh->b_data[sizeof(journal_header_t)];
-                       while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
+                       while ((tagp - bh->b_data + tag_bytes)
                               <= journal->j_blocksize) {
                                unsigned long io_block;
 
@@ -446,7 +455,8 @@ static int do_one_pass(journal_t *journal,
                                        unsigned long blocknr;
 
                                        J_ASSERT(obh != NULL);
-                                       blocknr = be32_to_cpu(tag->t_blocknr);
+                                       blocknr = read_tag_block(tag_bytes,
+                                                                tag);
 
                                        /* If the block has been
                                         * revoked, then we're all done
@@ -494,7 +504,7 @@ static int do_one_pass(journal_t *journal,
                                }
 
                        skip_write:
-                               tagp += sizeof(journal_block_tag_t);
+                               tagp += tag_bytes;
                                if (!(flags & JBD2_FLAG_SAME_UUID))
                                        tagp += 16;
 
@@ -572,17 +582,24 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 {
        jbd2_journal_revoke_header_t *header;
        int offset, max;
+       int record_len = 4;
 
        header = (jbd2_journal_revoke_header_t *) bh->b_data;
        offset = sizeof(jbd2_journal_revoke_header_t);
        max = be32_to_cpu(header->r_count);
 
-       while (offset < max) {
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+               record_len = 8;
+
+       while (offset + record_len <= max) {
                unsigned long blocknr;
                int err;
 
-               blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
-               offset += 4;
+               if (record_len == 4)
+                       blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
+               else
+                       blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
+               offset += record_len;
                err = jbd2_journal_set_revoke(journal, blocknr, sequence);
                if (err)
                        return err;
index 5820a0c5ad26caf01f4ffa8aff6654485da6c3f1..8aac875bd301865c81aa0e8f93e6b5f00c7e1ddc 100644 (file)
@@ -584,9 +584,17 @@ static void write_one_revoke_record(journal_t *journal,
                *descriptorp = descriptor;
        }
 
-       * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
-               cpu_to_be32(record->blocknr);
-       offset += 4;
+       if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
+               * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) =
+                       cpu_to_be64(record->blocknr);
+               offset += 8;
+
+       } else {
+               * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
+                       cpu_to_be32(record->blocknr);
+               offset += 4;
+       }
+
        *offsetp = offset;
 }
 
index 3251f7abb57ded75e9af86196bf3ae016fd3258a..5e5aa64f12618a333dab4943a27426d0f655303b 100644 (file)
@@ -150,14 +150,21 @@ typedef struct journal_header_s
 
 
 /*
- * The block tag: used to describe a single buffer in the journal
+ * The block tag: used to describe a single buffer in the journal.
+ * t_blocknr_high is only used if INCOMPAT_64BIT is set, so this
+ * raw struct shouldn't be used for pointer math or sizeof() - use
+ * journal_tag_bytes(journal) instead to compute this.
  */
 typedef struct journal_block_tag_s
 {
        __be32          t_blocknr;      /* The on-disk block number */
        __be32          t_flags;        /* See below */
+       __be32          t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
+#define JBD_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
+#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t))
+
 /*
  * The revoke descriptor: used on disk to describe a series of blocks to
  * be revoked from the log
@@ -235,11 +242,13 @@ typedef struct journal_superblock_s
         ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
 
 #define JBD2_FEATURE_INCOMPAT_REVOKE   0x00000001
+#define JBD2_FEATURE_INCOMPAT_64BIT    0x00000002
 
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES     0
 #define JBD2_KNOWN_ROCOMPAT_FEATURES   0
-#define JBD2_KNOWN_INCOMPAT_FEATURES   JBD2_FEATURE_INCOMPAT_REVOKE
+#define JBD2_KNOWN_INCOMPAT_FEATURES   (JBD2_FEATURE_INCOMPAT_REVOKE | \
+                                        JBD2_FEATURE_INCOMPAT_64BIT)
 
 #ifdef __KERNEL__
 
@@ -1052,6 +1061,7 @@ static inline int tid_geq(tid_t x, tid_t y)
 }
 
 extern int jbd2_journal_blocks_per_page(struct inode *inode);
+extern size_t journal_tag_bytes(journal_t *journal);
 
 /*
  * Return the minimum number of blocks which must be free in the journal