#include <linux/rbtree.h>
#include <linux/delay.h>
#include <linux/random.h>
+#include <linux/log2.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/async_tx.h>
#define SB_MAGIC "integrt"
#define SB_VERSION 1
#define SB_SECTORS 8
+#define MAX_SECTORS_PER_BLOCK 8
struct superblock {
__u8 magic[8];
__u32 journal_sections;
__u64 provided_data_sectors; /* userspace uses this value */
__u32 flags;
+ __u8 log2_sectors_per_block;
};
#define SB_FLAG_HAVE_JOURNAL_MAC 0x1
} s;
__u64 sector;
} u;
- commit_id_t last_bytes;
- __u8 tag[0];
+ commit_id_t last_bytes[0];
+ /* __u8 tag[0]; */
};
+#define journal_entry_tag(ic, je) ((__u8 *)&(je)->last_bytes[(ic)->sectors_per_block])
+
#if BITS_PER_LONG == 64
#define journal_entry_set_sector(je, x) do { smp_wmb(); ACCESS_ONCE((je)->u.sector) = cpu_to_le64(x); } while (0)
#define journal_entry_get_sector(je) le64_to_cpu((je)->u.sector)
commit_id_t commit_id;
};
-#define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, tag))
+#define MAX_TAG_SIZE (JOURNAL_SECTOR_DATA - JOURNAL_MAC_PER_SECTOR - offsetof(struct journal_entry, last_bytes[MAX_SECTORS_PER_BLOCK]))
#define METADATA_PADDING_SECTORS 8
unsigned short journal_entry_size;
unsigned char journal_entries_per_sector;
unsigned char journal_section_entries;
- unsigned char journal_section_sectors;
+ unsigned short journal_section_sectors;
unsigned journal_sections;
unsigned journal_entries;
sector_t device_sectors;
unsigned metadata_run;
__s8 log2_metadata_run;
__u8 log2_buffer_sectors;
+ __u8 sectors_per_block;
unsigned char mode;
bool suspending;
*offset = (unsigned)data_sector & ((1U << log2_interleave_sectors) - 1);
}
+#define sector_to_block(ic, n) \
+do { \
+ BUG_ON((n) & (unsigned)((ic)->sectors_per_block - 1)); \
+ (n) >>= (ic)->sb->log2_sectors_per_block; \
+} while (0)
+
static __u64 get_metadata_sector_and_offset(struct dm_integrity_c *ic, sector_t area,
sector_t offset, unsigned *metadata_offset)
{
ms += area * ic->metadata_run;
ms >>= ic->log2_buffer_sectors;
+ sector_to_block(ic, offset);
+
if (likely(ic->log2_tag_size >= 0)) {
ms += offset >> (SECTOR_SHIFT + ic->log2_buffer_sectors - ic->log2_tag_size);
mo = (offset << ic->log2_tag_size) & ((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - 1);
static struct journal_sector *access_journal_data(struct dm_integrity_c *ic, unsigned section, unsigned n)
{
- access_journal_check(ic, section, n, true, "access_journal_data");
+ n <<= ic->sb->log2_sectors_per_block;
- return access_journal(ic, section, n + JOURNAL_BLOCK_SECTORS);
+ n += JOURNAL_BLOCK_SECTORS;
+
+ access_journal_check(ic, section, n, false, "access_journal_data");
+
+ return access_journal(ic, section, n);
}
static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result[JOURNAL_MAC_SIZE])
int r;
unsigned sector, pl_index, pl_offset;
+ BUG_ON((target | n_sectors | offset) & (unsigned)(ic->sectors_per_block - 1));
+
if (unlikely(dm_integrity_failed(ic))) {
fn(-1UL, data);
return;
struct rb_node **n = &ic->in_progress.rb_node;
struct rb_node *parent;
+ BUG_ON((new_range->logical_sector | new_range->n_sectors) & (unsigned)(ic->sectors_per_block - 1));
+
parent = NULL;
while (*n) {
goto failed;
}
- r = crypto_shash_update(req, data, 1 << SECTOR_SHIFT);
+ r = crypto_shash_update(req, data, ic->sectors_per_block << SECTOR_SHIFT);
if (unlikely(r < 0)) {
dm_integrity_io_error(ic, "crypto_shash_update", r);
goto failed;
if (unlikely(ic->mode == 'R'))
goto skip_io;
- checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT) * ic->tag_size + extra_space,
+ checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
if (!checksums)
checksums = checksums_onstack;
do {
integrity_sector_checksum(ic, sector, mem + pos, checksums_ptr);
checksums_ptr += ic->tag_size;
- sectors_to_process--;
- pos += 1 << SECTOR_SHIFT;
- sector++;
+ sectors_to_process -= ic->sectors_per_block;
+ pos += ic->sectors_per_block << SECTOR_SHIFT;
+ sector += ic->sectors_per_block;
} while (pos < bv.bv_len && sectors_to_process && checksums != checksums_onstack);
kunmap_atomic(mem);
if (bip) {
struct bio_vec biv;
struct bvec_iter iter;
- unsigned data_to_process = dio->range.n_sectors * ic->tag_size;
+ unsigned data_to_process = dio->range.n_sectors;
+ sector_to_block(ic, data_to_process);
+ data_to_process *= ic->tag_size;
bip_for_each_vec(biv, bip, iter) {
unsigned char *tag;
{
struct dm_integrity_c *ic = ti->private;
struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
+ struct bio_integrity_payload *bip;
sector_t area, offset;
(unsigned long long)ic->provided_data_sectors);
return -EIO;
}
+ if (unlikely((dio->range.logical_sector | bio_sectors(bio)) & (unsigned)(ic->sectors_per_block - 1))) {
+ DMERR("Bio not aligned on %u sectors: 0x%llx, 0x%x",
+ ic->sectors_per_block,
+ (unsigned long long)dio->range.logical_sector, bio_sectors(bio));
+ return -EIO;
+ }
+
+ if (ic->sectors_per_block > 1) {
+ struct bvec_iter iter;
+ struct bio_vec bv;
+ bio_for_each_segment(bv, bio, iter) {
+ if (unlikely((bv.bv_offset | bv.bv_len) & ((ic->sectors_per_block << SECTOR_SHIFT) - 1))) {
+ DMERR("Bio vector (%u,%u) is not aligned on %u-sector boundary",
+ bv.bv_offset, bv.bv_len, ic->sectors_per_block);
+ return -EIO;
+ }
+ }
+ }
+
+ bip = bio_integrity(bio);
+ if (!ic->internal_hash) {
+ if (bip) {
+ unsigned wanted_tag_size = bio_sectors(bio) >> ic->sb->log2_sectors_per_block;
+ if (ic->log2_tag_size >= 0)
+ wanted_tag_size <<= ic->log2_tag_size;
+ else
+ wanted_tag_size *= ic->tag_size;
+ if (unlikely(wanted_tag_size != bip->bip_iter.bi_size)) {
+ DMERR("Invalid integrity data size %u, expected %u", bip->bip_iter.bi_size, wanted_tag_size);
+ return -EIO;
+ }
+ }
+ } else {
+ if (unlikely(bip != NULL)) {
+ DMERR("Unexpected integrity data when using internal hash");
+ return -EIO;
+ }
+ }
if (unlikely(ic->mode == 'R') && unlikely(dio->write))
return -EIO;
if (unlikely(!dio->write)) {
struct journal_sector *js;
+ char *mem_ptr;
+ unsigned s;
if (unlikely(journal_entry_is_inprogress(je))) {
flush_dcache_page(bv.bv_page);
smp_rmb();
BUG_ON(journal_entry_get_sector(je) != logical_sector);
js = access_journal_data(ic, journal_section, journal_entry);
- memcpy(mem + bv.bv_offset, js, JOURNAL_SECTOR_DATA);
- memcpy(mem + bv.bv_offset + JOURNAL_SECTOR_DATA, &je->last_bytes, sizeof je->last_bytes);
+ mem_ptr = mem + bv.bv_offset;
+ s = 0;
+ do {
+ memcpy(mem_ptr, js, JOURNAL_SECTOR_DATA);
+ *(commit_id_t *)(mem_ptr + JOURNAL_SECTOR_DATA) = je->last_bytes[s];
+ js++;
+ mem_ptr += 1 << SECTOR_SHIFT;
+ } while (++s < ic->sectors_per_block);
#ifdef INTERNAL_VERIFY
if (ic->internal_hash) {
char checksums_onstack[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
- if (unlikely(memcmp(checksums_onstack, je->tag, ic->tag_size))) {
+ if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
DMERR("Checksum failed when reading from journal, at sector 0x%llx",
(unsigned long long)logical_sector);
}
if (!ic->internal_hash) {
struct bio_integrity_payload *bip = bio_integrity(bio);
unsigned tag_todo = ic->tag_size;
- char *tag_ptr = je->tag;
+ char *tag_ptr = journal_entry_tag(ic, je);
if (bip) do {
struct bio_vec biv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
if (likely(dio->write)) {
struct journal_sector *js;
+ unsigned s;
js = access_journal_data(ic, journal_section, journal_entry);
- memcpy(js, mem + bv.bv_offset, 1 << SECTOR_SHIFT);
- je->last_bytes = js->commit_id;
+ memcpy(js, mem + bv.bv_offset, ic->sectors_per_block << SECTOR_SHIFT);
+
+ s = 0;
+ do {
+ je->last_bytes[s] = js[s].commit_id;
+ } while (++s < ic->sectors_per_block);
if (ic->internal_hash) {
unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
if (unlikely(digest_size > ic->tag_size)) {
char checksums_onstack[digest_size];
integrity_sector_checksum(ic, logical_sector, (char *)js, checksums_onstack);
- memcpy(je->tag, checksums_onstack, ic->tag_size);
+ memcpy(journal_entry_tag(ic, je), checksums_onstack, ic->tag_size);
} else
- integrity_sector_checksum(ic, logical_sector, (char *)js, je->tag);
+ integrity_sector_checksum(ic, logical_sector, (char *)js, journal_entry_tag(ic, je));
}
journal_entry_set_sector(je, logical_sector);
}
- logical_sector++;
+ logical_sector += ic->sectors_per_block;
journal_entry++;
if (unlikely(journal_entry == ic->journal_section_entries)) {
wraparound_section(ic, &journal_section);
}
- bv.bv_offset += 1 << SECTOR_SHIFT;
- } while (bv.bv_len -= 1 << SECTOR_SHIFT);
+ bv.bv_offset += ic->sectors_per_block << SECTOR_SHIFT;
+ } while (bv.bv_len -= ic->sectors_per_block << SECTOR_SHIFT);
if (unlikely(!dio->write))
flush_dcache_page(bv.bv_page);
pos = journal_section * ic->journal_section_entries + journal_entry;
ws = journal_section;
we = journal_entry;
- for (i = 0; i < dio->range.n_sectors; i++) {
+ i = 0;
+ do {
struct journal_entry *je;
add_journal_node(ic, &ic->journal_tree[pos], dio->range.logical_sector + i);
ws++;
wraparound_section(ic, &ws);
}
- }
+ } while ((i += ic->sectors_per_block) < dio->range.n_sectors);
spin_unlock_irq(&ic->endio_wait.lock);
goto journal_read_write;
dio->range.n_sectors = next_sector - dio->range.logical_sector;
} else {
unsigned i;
- for (i = 1; i < dio->range.n_sectors; i++) {
- if (!test_journal_node(ic, journal_read_pos + i, dio->range.logical_sector + i))
+ unsigned jp = journal_read_pos + 1;
+ for (i = ic->sectors_per_block; i < dio->range.n_sectors; i += ic->sectors_per_block, jp++) {
+ if (!test_journal_node(ic, jp, dio->range.logical_sector + i))
break;
}
dio->range.n_sectors = i;
complete_journal_op(comp);
}
+static void restore_last_bytes(struct dm_integrity_c *ic, struct journal_sector *js,
+ struct journal_entry *je)
+{
+ unsigned s = 0;
+ do {
+ js->commit_id = je->last_bytes[s];
+ js++;
+ } while (++s < ic->sectors_per_block);
+}
+
static void do_journal_write(struct dm_integrity_c *ic, unsigned write_start,
unsigned write_sections, bool from_replay)
{
continue;
BUG_ON(unlikely(journal_entry_is_inprogress(je)) && !from_replay);
sec = journal_entry_get_sector(je);
+ if (unlikely(from_replay)) {
+ if (unlikely(sec & (unsigned)(ic->sectors_per_block - 1))) {
+ dm_integrity_io_error(ic, "invalid sector in journal", -EIO);
+ sec &= ~(sector_t)(ic->sectors_per_block - 1);
+ }
+ }
get_area_and_offset(ic, sec, &area, &offset);
- access_journal_data(ic, i, j)->commit_id = je->last_bytes;
+ restore_last_bytes(ic, access_journal_data(ic, i, j), je);
for (k = j + 1; k < ic->journal_section_entries; k++) {
struct journal_entry *je2 = access_journal_entry(ic, i, k);
sector_t sec2, area2, offset2;
BUG_ON(unlikely(journal_entry_is_inprogress(je2)) && !from_replay);
sec2 = journal_entry_get_sector(je2);
get_area_and_offset(ic, sec2, &area2, &offset2);
- if (area2 != area || offset2 != offset + (k - j))
+ if (area2 != area || offset2 != offset + ((k - j) << ic->sb->log2_sectors_per_block))
break;
- access_journal_data(ic, i, k)->commit_id = je2->last_bytes;
+ restore_last_bytes(ic, access_journal_data(ic, i, k), je2);
}
next_loop = k - 1;
io = mempool_alloc(ic->journal_io_mempool, GFP_NOIO);
io->comp = ∁
io->range.logical_sector = sec;
- io->range.n_sectors = k - j;
+ io->range.n_sectors = (k - j) << ic->sb->log2_sectors_per_block;
spin_lock_irq(&ic->endio_wait.lock);
while (unlikely(!add_new_range(ic, &io->range)))
journal_entry_set_unused(je2);
remove_journal_node(ic, §ion_node[j]);
j++;
- sec++;
- offset++;
+ sec += ic->sectors_per_block;
+ offset += ic->sectors_per_block;
}
while (j < k && find_newer_committed_node(ic, §ion_node[k - 1])) {
struct journal_entry *je2 = access_journal_entry(ic, i, k - 1);
ic->internal_hash) {
char test_tag[max(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)];
- integrity_sector_checksum(ic, sec + (l - j),
+ integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
(char *)access_journal_data(ic, i, l), test_tag);
- if (unlikely(memcmp(test_tag, je2->tag, ic->tag_size)))
+ if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size)))
dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
}
journal_entry_set_unused(je2);
- r = dm_integrity_rw_tag(ic, je2->tag, &metadata_block, &metadata_offset,
+ r = dm_integrity_rw_tag(ic, journal_entry_tag(ic, je2), &metadata_block, &metadata_offset,
ic->tag_size, TAG_WRITE);
if (unlikely(r)) {
dm_integrity_io_error(ic, "reading tags", r);
}
atomic_inc(&comp.in_flight);
- copy_from_journal(ic, i, j, k - j, get_data_sector(ic, area, offset),
+ copy_from_journal(ic, i, j << ic->sb->log2_sectors_per_block,
+ (k - j) << ic->sb->log2_sectors_per_block,
+ get_data_sector(ic, area, offset),
complete_copy_from_journal, io);
skip_io:
j = next_loop;
watermark_percentage += ic->journal_entries / 2;
do_div(watermark_percentage, ic->journal_entries);
arg_count = 5;
+ arg_count += ic->sectors_per_block != 1;
arg_count += !!ic->internal_hash_alg.alg_string;
arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string;
DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
DMEMIT(" commit_time:%u", ic->autocommit_msec);
+ if (ic->sectors_per_block != 1)
+ DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
#define EMIT_ALG(a, n) \
do { \
return fn(ti, ic->dev, ic->start + ic->initial_sectors + ic->metadata_run, ti->len, data);
}
+static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+ struct dm_integrity_c *ic = ti->private;
+
+ if (ic->sectors_per_block > 1) {
+ limits->logical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
+ limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
+ blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
+ }
+}
+
static void calculate_journal_section_size(struct dm_integrity_c *ic)
{
unsigned sector_space = JOURNAL_SECTOR_DATA;
ic->journal_sections = le32_to_cpu(ic->sb->journal_sections);
- ic->journal_entry_size = roundup(offsetof(struct journal_entry, tag) + ic->tag_size,
+ ic->journal_entry_size = roundup(offsetof(struct journal_entry, last_bytes[ic->sectors_per_block]) + ic->tag_size,
JOURNAL_ENTRY_ROUNDUP);
if (ic->sb->flags & cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC))
sector_space -= JOURNAL_MAC_PER_SECTOR;
ic->journal_entries_per_sector = sector_space / ic->journal_entry_size;
ic->journal_section_entries = ic->journal_entries_per_sector * JOURNAL_BLOCK_SECTORS;
- ic->journal_section_sectors = ic->journal_section_entries + JOURNAL_BLOCK_SECTORS;
+ ic->journal_section_sectors = (ic->journal_section_entries << ic->sb->log2_sectors_per_block) + JOURNAL_BLOCK_SECTORS;
ic->journal_entries = ic->journal_section_entries * ic->journal_sections;
}
return -EINVAL;
ic->initial_sectors = initial_sectors;
- ic->metadata_run = roundup((__u64)ic->tag_size << ic->sb->log2_interleave_sectors,
+ ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
(__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
if (!(ic->metadata_run & (ic->metadata_run - 1)))
ic->log2_metadata_run = __ffs(ic->metadata_run);
memcpy(ic->sb->magic, SB_MAGIC, 8);
ic->sb->version = SB_VERSION;
ic->sb->integrity_tag_size = cpu_to_le16(ic->tag_size);
+ ic->sb->log2_sectors_per_block = __ffs(ic->sectors_per_block);
if (ic->journal_mac_alg.alg_string)
ic->sb->flags |= cpu_to_le32(SB_FLAG_HAVE_JOURNAL_MAC);
memset(&bi, 0, sizeof(bi));
bi.profile = &dm_integrity_profile;
- bi.tuple_size = ic->tag_size * (queue_logical_block_size(disk->queue) >> SECTOR_SHIFT);
- bi.tag_size = ic->tag_size;
+ bi.tuple_size = ic->tag_size;
+ bi.tag_size = bi.tuple_size;
+ bi.interval_exp = ilog2(ic->sectors_per_block << SECTOR_SHIFT);
blk_integrity_register(disk, &bi);
blk_queue_max_integrity_segments(disk->queue, UINT_MAX);
* internal_hash
* journal_crypt
* journal_mac
+ * block_size
*/
static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
{
unsigned extra_args;
struct dm_arg_set as;
static struct dm_arg _args[] = {
- {0, 8, "Invalid number of feature args"},
+ {0, 9, "Invalid number of feature args"},
};
unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
bool should_write_sb;
buffer_sectors = DEFAULT_BUFFER_SECTORS;
journal_watermark = DEFAULT_JOURNAL_WATERMARK;
sync_msec = DEFAULT_SYNC_MSEC;
+ ic->sectors_per_block = 1;
as.argc = argc - DIRECT_ARGUMENTS;
as.argv = argv + DIRECT_ARGUMENTS;
journal_watermark = val;
else if (sscanf(opt_string, "commit_time:%u%c", &val, &dummy) == 1)
sync_msec = val;
- else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
+ else if (sscanf(opt_string, "block_size:%u%c", &val, &dummy) == 1) {
+ if (val < 1 << SECTOR_SHIFT ||
+ val > MAX_SECTORS_PER_BLOCK << SECTOR_SHIFT ||
+ (val & (val -1))) {
+ r = -EINVAL;
+ ti->error = "Invalid block_size argument";
+ goto bad;
+ }
+ ic->sectors_per_block = val >> SECTOR_SHIFT;
+ } else if (!memcmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
"Invalid internal_hash argument");
if (r)
}
if (le16_to_cpu(ic->sb->integrity_tag_size) != ic->tag_size) {
r = -EINVAL;
- ti->error = "Invalid tag size";
+ ti->error = "Tag size doesn't match the information in superblock";
+ goto bad;
+ }
+ if (ic->sb->log2_sectors_per_block != __ffs(ic->sectors_per_block)) {
+ r = -EINVAL;
+ ti->error = "Block size doesn't match the information in superblock";
goto bad;
}
/* make sure that ti->max_io_len doesn't overflow */
.resume = dm_integrity_resume,
.status = dm_integrity_status,
.iterate_devices = dm_integrity_iterate_devices,
+ .io_hints = dm_integrity_io_hints,
};
int __init dm_integrity_init(void)