import PULS_20160108
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / ext4 / mmp.c
1 #include <linux/fs.h>
2 #include <linux/random.h>
3 #include <linux/buffer_head.h>
4 #include <linux/utsname.h>
5 #include <linux/kthread.h>
6
7 #include "ext4.h"
8
9 /* Checksumming functions */
10 static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
11 {
12 struct ext4_sb_info *sbi = EXT4_SB(sb);
13 int offset = offsetof(struct mmp_struct, mmp_checksum);
14 __u32 csum;
15
16 csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
17
18 return cpu_to_le32(csum);
19 }
20
21 int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
22 {
23 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
24 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
25 return 1;
26
27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
28 }
29
30 void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31 {
32 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
33 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
34 return;
35
36 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
37 }
38
39 /*
40 * Write the MMP block using WRITE_SYNC to try to get the block on-disk
41 * faster.
42 */
43 static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
44 {
45 struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
46
47 /*
48 * We protect against freezing so that we don't create dirty buffers
49 * on frozen filesystem.
50 */
51 sb_start_write(sb);
52 ext4_mmp_csum_set(sb, mmp);
53 mark_buffer_dirty(bh);
54 lock_buffer(bh);
55 bh->b_end_io = end_buffer_write_sync;
56 get_bh(bh);
57 #ifdef FEATURE_STORAGE_META_LOG
58 if( bh && bh->b_bdev && bh->b_bdev->bd_disk)
59 set_metadata_rw_status(bh->b_bdev->bd_disk->first_minor, WAIT_WRITE_CNT);
60 #endif
61 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
62 wait_on_buffer(bh);
63 sb_end_write(sb);
64 if (unlikely(!buffer_uptodate(bh)))
65 return 1;
66
67 return 0;
68 }
69
70 /*
71 * Read the MMP block. It _must_ be read from disk and hence we clear the
72 * uptodate flag on the buffer.
73 */
74 static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
75 ext4_fsblk_t mmp_block)
76 {
77 struct mmp_struct *mmp;
78
79 if (*bh)
80 clear_buffer_uptodate(*bh);
81
82 /* This would be sb_bread(sb, mmp_block), except we need to be sure
83 * that the MD RAID device cache has been bypassed, and that the read
84 * is not blocked in the elevator. */
85 if (!*bh)
86 *bh = sb_getblk(sb, mmp_block);
87 if (!*bh)
88 return -ENOMEM;
89 if (*bh) {
90 get_bh(*bh);
91 lock_buffer(*bh);
92 (*bh)->b_end_io = end_buffer_read_sync;
93 #ifdef FEATURE_STORAGE_META_LOG
94 if( (*bh) && (*bh)->b_bdev && (*bh)->b_bdev->bd_disk)
95 set_metadata_rw_status((*bh)->b_bdev->bd_disk->first_minor, WAIT_READ_CNT);
96 #endif
97 submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
98 wait_on_buffer(*bh);
99 if (!buffer_uptodate(*bh)) {
100 brelse(*bh);
101 *bh = NULL;
102 }
103 }
104 if (unlikely(!*bh)) {
105 ext4_warning(sb, "Error while reading MMP block %llu",
106 mmp_block);
107 return -EIO;
108 }
109
110 mmp = (struct mmp_struct *)((*bh)->b_data);
111 if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
112 !ext4_mmp_csum_verify(sb, mmp))
113 return -EINVAL;
114
115 return 0;
116 }
117
118 /*
119 * Dump as much information as possible to help the admin.
120 */
121 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
122 const char *function, unsigned int line, const char *msg)
123 {
124 __ext4_warning(sb, function, line, msg);
125 __ext4_warning(sb, function, line,
126 "MMP failure info: last update time: %llu, last update "
127 "node: %s, last update device: %s\n",
128 (long long unsigned int) le64_to_cpu(mmp->mmp_time),
129 mmp->mmp_nodename, mmp->mmp_bdevname);
130 }
131
132 /*
133 * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
134 */
135 static int kmmpd(void *data)
136 {
137 struct super_block *sb = ((struct mmpd_data *) data)->sb;
138 struct buffer_head *bh = ((struct mmpd_data *) data)->bh;
139 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
140 struct mmp_struct *mmp;
141 ext4_fsblk_t mmp_block;
142 u32 seq = 0;
143 unsigned long failed_writes = 0;
144 int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
145 unsigned mmp_check_interval;
146 unsigned long last_update_time;
147 unsigned long diff;
148 int retval;
149
150 mmp_block = le64_to_cpu(es->s_mmp_block);
151 mmp = (struct mmp_struct *)(bh->b_data);
152 mmp->mmp_time = cpu_to_le64(get_seconds());
153 /*
154 * Start with the higher mmp_check_interval and reduce it if
155 * the MMP block is being updated on time.
156 */
157 mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
158 EXT4_MMP_MIN_CHECK_INTERVAL);
159 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
160 bdevname(bh->b_bdev, mmp->mmp_bdevname);
161
162 memcpy(mmp->mmp_nodename, init_utsname()->nodename,
163 sizeof(mmp->mmp_nodename));
164
165 while (!kthread_should_stop()) {
166 if (++seq > EXT4_MMP_SEQ_MAX)
167 seq = 1;
168
169 mmp->mmp_seq = cpu_to_le32(seq);
170 mmp->mmp_time = cpu_to_le64(get_seconds());
171 last_update_time = jiffies;
172
173 retval = write_mmp_block(sb, bh);
174 /*
175 * Don't spew too many error messages. Print one every
176 * (s_mmp_update_interval * 60) seconds.
177 */
178 if (retval) {
179 if ((failed_writes % 60) == 0)
180 ext4_error(sb, "Error writing to MMP block");
181 failed_writes++;
182 }
183
184 if (!(le32_to_cpu(es->s_feature_incompat) &
185 EXT4_FEATURE_INCOMPAT_MMP)) {
186 ext4_warning(sb, "kmmpd being stopped since MMP feature"
187 " has been disabled.");
188 EXT4_SB(sb)->s_mmp_tsk = NULL;
189 goto failed;
190 }
191
192 if (sb->s_flags & MS_RDONLY) {
193 ext4_warning(sb, "kmmpd being stopped since filesystem "
194 "has been remounted as readonly.");
195 EXT4_SB(sb)->s_mmp_tsk = NULL;
196 goto failed;
197 }
198
199 diff = jiffies - last_update_time;
200 if (diff < mmp_update_interval * HZ)
201 schedule_timeout_interruptible(mmp_update_interval *
202 HZ - diff);
203
204 /*
205 * We need to make sure that more than mmp_check_interval
206 * seconds have not passed since writing. If that has happened
207 * we need to check if the MMP block is as we left it.
208 */
209 diff = jiffies - last_update_time;
210 if (diff > mmp_check_interval * HZ) {
211 struct buffer_head *bh_check = NULL;
212 struct mmp_struct *mmp_check;
213
214 retval = read_mmp_block(sb, &bh_check, mmp_block);
215 if (retval) {
216 ext4_error(sb, "error reading MMP data: %d",
217 retval);
218
219 EXT4_SB(sb)->s_mmp_tsk = NULL;
220 goto failed;
221 }
222
223 mmp_check = (struct mmp_struct *)(bh_check->b_data);
224 if (mmp->mmp_seq != mmp_check->mmp_seq ||
225 memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
226 sizeof(mmp->mmp_nodename))) {
227 dump_mmp_msg(sb, mmp_check,
228 "Error while updating MMP info. "
229 "The filesystem seems to have been"
230 " multiply mounted.");
231 ext4_error(sb, "abort");
232 goto failed;
233 }
234 put_bh(bh_check);
235 }
236
237 /*
238 * Adjust the mmp_check_interval depending on how much time
239 * it took for the MMP block to be written.
240 */
241 mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
242 EXT4_MMP_MAX_CHECK_INTERVAL),
243 EXT4_MMP_MIN_CHECK_INTERVAL);
244 mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
245 }
246
247 /*
248 * Unmount seems to be clean.
249 */
250 mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
251 mmp->mmp_time = cpu_to_le64(get_seconds());
252
253 retval = write_mmp_block(sb, bh);
254
255 failed:
256 kfree(data);
257 brelse(bh);
258 return retval;
259 }
260
261 /*
262 * Get a random new sequence number but make sure it is not greater than
263 * EXT4_MMP_SEQ_MAX.
264 */
265 static unsigned int mmp_new_seq(void)
266 {
267 u32 new_seq;
268
269 do {
270 get_random_bytes(&new_seq, sizeof(u32));
271 } while (new_seq > EXT4_MMP_SEQ_MAX);
272
273 return new_seq;
274 }
275
276 /*
277 * Protect the filesystem from being mounted more than once.
278 */
279 int ext4_multi_mount_protect(struct super_block *sb,
280 ext4_fsblk_t mmp_block)
281 {
282 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
283 struct buffer_head *bh = NULL;
284 struct mmp_struct *mmp = NULL;
285 struct mmpd_data *mmpd_data;
286 u32 seq;
287 unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
288 unsigned int wait_time = 0;
289 int retval;
290
291 if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
292 mmp_block >= ext4_blocks_count(es)) {
293 ext4_warning(sb, "Invalid MMP block in superblock");
294 goto failed;
295 }
296
297 retval = read_mmp_block(sb, &bh, mmp_block);
298 if (retval)
299 goto failed;
300
301 mmp = (struct mmp_struct *)(bh->b_data);
302
303 if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
304 mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
305
306 /*
307 * If check_interval in MMP block is larger, use that instead of
308 * update_interval from the superblock.
309 */
310 if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
311 mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
312
313 seq = le32_to_cpu(mmp->mmp_seq);
314 if (seq == EXT4_MMP_SEQ_CLEAN)
315 goto skip;
316
317 if (seq == EXT4_MMP_SEQ_FSCK) {
318 dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
319 goto failed;
320 }
321
322 wait_time = min(mmp_check_interval * 2 + 1,
323 mmp_check_interval + 60);
324
325 /* Print MMP interval if more than 20 secs. */
326 if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
327 ext4_warning(sb, "MMP interval %u higher than expected, please"
328 " wait.\n", wait_time * 2);
329
330 if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
331 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
332 goto failed;
333 }
334
335 retval = read_mmp_block(sb, &bh, mmp_block);
336 if (retval)
337 goto failed;
338 mmp = (struct mmp_struct *)(bh->b_data);
339 if (seq != le32_to_cpu(mmp->mmp_seq)) {
340 dump_mmp_msg(sb, mmp,
341 "Device is already active on another node.");
342 goto failed;
343 }
344
345 skip:
346 /*
347 * write a new random sequence number.
348 */
349 seq = mmp_new_seq();
350 mmp->mmp_seq = cpu_to_le32(seq);
351
352 retval = write_mmp_block(sb, bh);
353 if (retval)
354 goto failed;
355
356 /*
357 * wait for MMP interval and check mmp_seq.
358 */
359 if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
360 ext4_warning(sb, "MMP startup interrupted, failing mount\n");
361 goto failed;
362 }
363
364 retval = read_mmp_block(sb, &bh, mmp_block);
365 if (retval)
366 goto failed;
367 mmp = (struct mmp_struct *)(bh->b_data);
368 if (seq != le32_to_cpu(mmp->mmp_seq)) {
369 dump_mmp_msg(sb, mmp,
370 "Device is already active on another node.");
371 goto failed;
372 }
373
374 mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
375 if (!mmpd_data) {
376 ext4_warning(sb, "not enough memory for mmpd_data");
377 goto failed;
378 }
379 mmpd_data->sb = sb;
380 mmpd_data->bh = bh;
381
382 /*
383 * Start a kernel thread to update the MMP block periodically.
384 */
385 EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s",
386 bdevname(bh->b_bdev,
387 mmp->mmp_bdevname));
388 if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
389 EXT4_SB(sb)->s_mmp_tsk = NULL;
390 kfree(mmpd_data);
391 ext4_warning(sb, "Unable to create kmmpd thread for %s.",
392 sb->s_id);
393 goto failed;
394 }
395
396 return 0;
397
398 failed:
399 brelse(bh);
400 return 1;
401 }
402
403