enum superblock_flag_bits {
/* for spotting crashes that would invalidate the dirty bitset */
CLEAN_SHUTDOWN,
+ /* metadata must be checked using the tools */
+ NEEDS_CHECK,
};
/*
struct dm_disk_bitset discard_info;
struct rw_semaphore root_lock;
+ unsigned long flags;
dm_block_t root;
dm_block_t hint_root;
dm_block_t discard_root;
* buffer before the superblock is locked and updated.
*/
__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
+
+ /*
+ * Set if a transaction has to be aborted but the attempt to roll
+ * back to the previous (good) transaction failed. The only
+ * metadata operation permissible in this state is the closing of
+ * the device.
+ */
+ bool fail_io:1;
};
/*-------------------------------------------------------------------
static void read_superblock_fields(struct dm_cache_metadata *cmd,
struct cache_disk_superblock *disk_super)
{
+ cmd->flags = le32_to_cpu(disk_super->flags);
cmd->root = le64_to_cpu(disk_super->mapping_root);
cmd->hint_root = le64_to_cpu(disk_super->hint_root);
cmd->discard_root = le64_to_cpu(disk_super->discard_root);
if (mutator)
update_flags(disk_super, mutator);
+ disk_super->flags = cpu_to_le32(cmd->flags);
disk_super->mapping_root = cpu_to_le64(cmd->root);
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
cmd->cache_blocks = 0;
cmd->policy_hint_size = policy_hint_size;
cmd->changed = true;
+ cmd->fail_io = false;
r = __create_persistent_data_objects(cmd, may_format_device);
if (r) {
list_del(&cmd->list);
mutex_unlock(&table_lock);
- __destroy_persistent_data_objects(cmd);
+ if (!cmd->fail_io)
+ __destroy_persistent_data_objects(cmd);
kfree(cmd);
}
}
return 0;
}
+#define WRITE_LOCK(cmd) \
+ if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \
+ return -EINVAL; \
+ down_write(&cmd->root_lock)
+
+#define WRITE_LOCK_VOID(cmd) \
+ if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) \
+ return; \
+ down_write(&cmd->root_lock)
+
+#define WRITE_UNLOCK(cmd) \
+ up_write(&cmd->root_lock)
+
int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
{
int r;
bool clean;
__le64 null_mapping = pack_value(0, 0);
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
__dm_bless_for_disk(&null_mapping);
if (from_cblock(new_cache_size) < from_cblock(cmd->cache_blocks)) {
cmd->changed = true;
out:
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = dm_bitset_resize(&cmd->discard_info,
cmd->discard_root,
from_dblock(cmd->discard_nr_blocks),
}
cmd->changed = true;
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __discard(cmd, dblock, discard);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __remove(cmd, cblock);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __insert(cmd, cblock, oblock);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __dirty(cmd, cblock, dirty);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd,
struct dm_cache_statistics *stats)
{
- down_write(&cmd->root_lock);
+ WRITE_LOCK_VOID(cmd);
cmd->stats = *stats;
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
}
int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown)
flags_mutator mutator = (clean_shutdown ? set_clean_shutdown :
clear_clean_shutdown);
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = __commit_transaction(cmd, mutator);
if (r)
goto out;
r = __begin_transaction(cmd);
out:
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
{
int r;
- down_write(&cmd->root_lock);
+ WRITE_LOCK(cmd);
r = write_hints(cmd, policy);
- up_write(&cmd->root_lock);
+ WRITE_UNLOCK(cmd);
return r;
}
{
return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result);
}
+
+void dm_cache_metadata_set_read_only(struct dm_cache_metadata *cmd)
+{
+ WRITE_LOCK_VOID(cmd);
+ dm_bm_set_read_only(cmd->bm);
+ WRITE_UNLOCK(cmd);
+}
+
+void dm_cache_metadata_set_read_write(struct dm_cache_metadata *cmd)
+{
+ WRITE_LOCK_VOID(cmd);
+ dm_bm_set_read_write(cmd->bm);
+ WRITE_UNLOCK(cmd);
+}
+
+int dm_cache_metadata_set_needs_check(struct dm_cache_metadata *cmd)
+{
+ int r;
+ struct dm_block *sblock;
+ struct cache_disk_superblock *disk_super;
+
+ /*
+ * We ignore fail_io for this function.
+ */
+ down_write(&cmd->root_lock);
+ set_bit(NEEDS_CHECK, &cmd->flags);
+
+ r = superblock_lock(cmd, &sblock);
+ if (r) {
+ DMERR("couldn't read superblock");
+ goto out;
+ }
+
+ disk_super = dm_block_data(sblock);
+ disk_super->flags = cpu_to_le32(cmd->flags);
+
+ dm_bm_unlock(sblock);
+
+out:
+ up_write(&cmd->root_lock);
+ return r;
+}
+
+bool dm_cache_metadata_needs_check(struct dm_cache_metadata *cmd)
+{
+ bool needs_check;
+
+ down_read(&cmd->root_lock);
+ needs_check = !!test_bit(NEEDS_CHECK, &cmd->flags);
+ up_read(&cmd->root_lock);
+
+ return needs_check;
+}
+
+int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
+{
+ int r;
+
+ WRITE_LOCK(cmd);
+ __destroy_persistent_data_objects(cmd);
+ r = __create_persistent_data_objects(cmd, false);
+ if (r)
+ cmd->fail_io = true;
+ WRITE_UNLOCK(cmd);
+
+ return r;
+}
#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
-/*
- * FIXME: the cache is read/write for the time being.
- */
enum cache_metadata_mode {
CM_WRITE, /* metadata may be changed */
CM_READ_ONLY, /* metadata may not be changed */
+ CM_FAIL
};
enum cache_io_mode {
struct dm_bio_prison_cell *cell2;
};
+static enum cache_metadata_mode get_cache_mode(struct cache *cache);
+
static void wake_worker(struct cache *cache)
{
queue_work(cache->wq, &cache->worker);
{
struct dm_cache_statistics stats;
+ if (get_cache_mode(cache) >= CM_READ_ONLY)
+ return;
+
stats.read_hits = atomic_read(&cache->stats.read_hit);
stats.read_misses = atomic_read(&cache->stats.read_miss);
stats.write_hits = atomic_read(&cache->stats.write_hit);
remap_to_origin_clear_discard(pb->cache, bio, oblock);
}
+/*----------------------------------------------------------------
+ * Failure modes
+ *--------------------------------------------------------------*/
+static enum cache_metadata_mode get_cache_mode(struct cache *cache)
+{
+ return cache->features.mode;
+}
+
+static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
+{
+ const char *descs[] = {
+ "write",
+ "read-only",
+ "fail"
+ };
+
+ dm_table_event(cache->ti->table);
+ DMINFO("switching cache to %s mode", descs[(int)mode]);
+}
+
+static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
+{
+ bool needs_check = dm_cache_metadata_needs_check(cache->cmd);
+ enum cache_metadata_mode old_mode = get_cache_mode(cache);
+
+ if (new_mode == CM_WRITE && needs_check) {
+ DMERR("unable to switch cache to write mode until repaired.");
+ if (old_mode != new_mode)
+ new_mode = old_mode;
+ else
+ new_mode = CM_READ_ONLY;
+ }
+
+ /* Never move out of fail mode */
+ if (old_mode == CM_FAIL)
+ new_mode = CM_FAIL;
+
+ switch (new_mode) {
+ case CM_FAIL:
+ case CM_READ_ONLY:
+ dm_cache_metadata_set_read_only(cache->cmd);
+ break;
+
+ case CM_WRITE:
+ dm_cache_metadata_set_read_write(cache->cmd);
+ break;
+ }
+
+ cache->features.mode = new_mode;
+
+ if (new_mode != old_mode)
+ notify_mode_switch(cache, new_mode);
+}
+
+static void abort_transaction(struct cache *cache)
+{
+ if (get_cache_mode(cache) >= CM_READ_ONLY)
+ return;
+
+ if (dm_cache_metadata_set_needs_check(cache->cmd)) {
+ DMERR("failed to set 'needs_check' flag in metadata");
+ set_cache_mode(cache, CM_FAIL);
+ }
+
+ DMERR_LIMIT("aborting current metadata transaction");
+ if (dm_cache_metadata_abort(cache->cmd)) {
+ DMERR("failed to abort metadata transaction");
+ set_cache_mode(cache, CM_FAIL);
+ }
+}
+
+static void metadata_operation_failed(struct cache *cache, const char *op, int r)
+{
+ DMERR_LIMIT("metadata operation '%s' failed: error = %d", op, r);
+ abort_transaction(cache);
+ set_cache_mode(cache, CM_READ_ONLY);
+}
+
/*----------------------------------------------------------------
* Migration processing
*
static void migration_success_pre_commit(struct dm_cache_migration *mg)
{
+ int r;
unsigned long flags;
struct cache *cache = mg->cache;
return;
} else if (mg->demote) {
- if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
+ r = dm_cache_remove_mapping(cache->cmd, mg->cblock);
+ if (r) {
DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
+ metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
policy_force_mapping(cache->policy, mg->new_oblock,
mg->old_oblock);
if (mg->promote)
return;
}
} else {
- if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
+ r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock);
+ if (r) {
DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
+ metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
policy_remove_mapping(cache->policy, mg->new_oblock);
free_io_migration(mg);
return;
jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
}
+/*
+ * A non-zero return indicates read_only or fail_io mode.
+ */
+static int commit(struct cache *cache, bool clean_shutdown)
+{
+ int r;
+
+ if (get_cache_mode(cache) >= CM_READ_ONLY)
+ return -EINVAL;
+
+ atomic_inc(&cache->stats.commit_count);
+ r = dm_cache_commit(cache->cmd, clean_shutdown);
+ if (r)
+ metadata_operation_failed(cache, "dm_cache_commit", r);
+
+ return r;
+}
+
static int commit_if_needed(struct cache *cache)
{
int r = 0;
if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
dm_cache_changed_this_transaction(cache->cmd)) {
- atomic_inc(&cache->stats.commit_count);
+ r = commit(cache, false);
cache->commit_requested = false;
- r = dm_cache_commit(cache->cmd, false);
cache->last_commit_jiffies = jiffies;
}
r = policy_remove_cblock(cache->policy, to_cblock(begin));
if (!r) {
r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
- if (r)
+ if (r) {
+ metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
break;
+ }
} else if (r == -ENODATA) {
/* harmless, already unmapped */
if (commit_if_needed(cache)) {
process_deferred_flush_bios(cache, false);
process_migrations(cache, &cache->need_commit_migrations, migration_failure);
-
- /*
- * FIXME: rollback metadata or just go into a
- * failure mode and error everything
- */
-
} else {
process_deferred_flush_bios(cache, true);
process_migrations(cache, &cache->need_commit_migrations,
goto bad;
}
cache->cmd = cmd;
+ set_cache_mode(cache, CM_WRITE);
+ if (get_cache_mode(cache) != CM_WRITE) {
+ *error = "Unable to get write access to metadata, please check/repair metadata.";
+ r = -EINVAL;
+ goto bad;
+ }
if (passthrough_mode(&cache->features)) {
bool all_clean;
{
unsigned i, r;
+ if (get_cache_mode(cache) >= CM_READ_ONLY)
+ return -EINVAL;
+
for (i = 0; i < from_cblock(cache->cache_size); i++) {
r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
is_dirty(cache, to_cblock(i)));
- if (r)
+ if (r) {
+ metadata_operation_failed(cache, "dm_cache_set_dirty", r);
return r;
+ }
}
return 0;
{
unsigned i, r;
+ if (get_cache_mode(cache) >= CM_READ_ONLY)
+ return -EINVAL;
+
r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
cache->discard_nr_blocks);
if (r) {
DMERR("could not resize on-disk discard bitset");
+ metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
return r;
}
for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
r = dm_cache_set_discard(cache->cmd, to_dblock(i),
is_discarded(cache, to_dblock(i)));
- if (r)
+ if (r) {
+ metadata_operation_failed(cache, "dm_cache_set_discard", r);
return r;
+ }
+ }
+
+ return 0;
+}
+
+static int write_hints(struct cache *cache)
+{
+ int r;
+
+ if (get_cache_mode(cache) >= CM_READ_ONLY)
+ return -EINVAL;
+
+ r = dm_cache_write_hints(cache->cmd, cache->policy);
+ if (r) {
+ metadata_operation_failed(cache, "dm_cache_write_hints", r);
+ return r;
}
return 0;
save_stats(cache);
- r3 = dm_cache_write_hints(cache->cmd, cache->policy);
+ r3 = write_hints(cache);
if (r3)
DMERR("could not write hints");
* set the clean shutdown flag. This will effectively force every
* dirty bit to be set on reload.
*/
- r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3);
+ r4 = commit(cache, !r1 && !r2 && !r3);
if (r4)
- DMERR("could not write cache metadata. Data loss may occur.");
+ DMERR("could not write cache metadata.");
return !r1 && !r2 && !r3 && !r4;
}
requeue_deferred_cells(cache);
stop_quiescing(cache);
- (void) sync_metadata(cache);
+ if (get_cache_mode(cache) == CM_WRITE)
+ (void) sync_metadata(cache);
}
static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
r = dm_cache_resize(cache->cmd, new_size);
if (r) {
DMERR("could not resize cache metadata");
+ metadata_operation_failed(cache, "dm_cache_resize", r);
return r;
}
load_mapping, cache);
if (r) {
DMERR("could not load cache mappings");
+ metadata_operation_failed(cache, "dm_cache_load_mappings", r);
return r;
}
r = dm_cache_load_discards(cache->cmd, load_discard, &li);
if (r) {
DMERR("could not load origin discards");
+ metadata_operation_failed(cache, "dm_cache_load_discards", r);
return r;
}
set_discard_range(&li);
* <#demotions> <#promotions> <#dirty>
* <#features> <features>*
* <#core args> <core args>
- * <policy name> <#policy args> <policy args>*
+ * <policy name> <#policy args> <policy args>* <cache metadata mode>
*/
static void cache_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
switch (type) {
case STATUSTYPE_INFO:
- /* Commit to ensure statistics aren't out-of-date */
- if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) {
- r = dm_cache_commit(cache->cmd, false);
- if (r)
- DMERR("could not commit metadata for accurate status");
+ if (get_cache_mode(cache) == CM_FAIL) {
+ DMEMIT("Fail");
+ break;
}
+ /* Commit to ensure statistics aren't out-of-date */
+ if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
+ (void) commit(cache, false);
+
r = dm_cache_get_free_metadata_block_count(cache->cmd,
&nr_free_blocks_metadata);
if (r) {
DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
if (sz < maxlen) {
- r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz);
+ r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
if (r)
DMERR("policy_emit_config_values returned %d", r);
}
+ if (get_cache_mode(cache) == CM_READ_ONLY)
+ DMEMIT("ro ");
+ else
+ DMEMIT("rw ");
+
break;
case STATUSTYPE_TABLE:
if (!argc)
return -EINVAL;
+ if (get_cache_mode(cache) >= CM_READ_ONLY) {
+ DMERR("unable to service cache target messages in READ_ONLY or FAIL mode");
+ return -EOPNOTSUPP;
+ }
+
if (!strcasecmp(argv[0], "invalidate_cblocks"))
return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
static struct target_type cache_target = {
.name = "cache",
- .version = {1, 6, 0},
+ .version = {1, 7, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,