ocfs2: Free up some space in the lvb
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / ocfs2 / dlmglue.c
CommitLineData
ccd979bd
MF
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dlmglue.c
5 *
6 * Code which implements an OCFS2 specific interface to our DLM.
7 *
8 * Copyright (C) 2003, 2004 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */
25
26#include <linux/types.h>
27#include <linux/slab.h>
28#include <linux/highmem.h>
29#include <linux/mm.h>
30#include <linux/smp_lock.h>
31#include <linux/crc32.h>
32#include <linux/kthread.h>
33#include <linux/pagemap.h>
34#include <linux/debugfs.h>
35#include <linux/seq_file.h>
36
37#include <cluster/heartbeat.h>
38#include <cluster/nodemanager.h>
39#include <cluster/tcp.h>
40
41#include <dlm/dlmapi.h>
42
43#define MLOG_MASK_PREFIX ML_DLM_GLUE
44#include <cluster/masklog.h>
45
46#include "ocfs2.h"
47
48#include "alloc.h"
d680efe9 49#include "dcache.h"
ccd979bd
MF
50#include "dlmglue.h"
51#include "extent_map.h"
52#include "heartbeat.h"
53#include "inode.h"
54#include "journal.h"
55#include "slot_map.h"
56#include "super.h"
57#include "uptodate.h"
58#include "vote.h"
59
60#include "buffer_head_io.h"
61
62struct ocfs2_mask_waiter {
63 struct list_head mw_item;
64 int mw_status;
65 struct completion mw_complete;
66 unsigned long mw_mask;
67 unsigned long mw_goal;
68};
69
70static void ocfs2_inode_ast_func(void *opaque);
71static void ocfs2_inode_bast_func(void *opaque,
72 int level);
d680efe9
MF
73static void ocfs2_dentry_ast_func(void *opaque);
74static void ocfs2_dentry_bast_func(void *opaque,
75 int level);
ccd979bd
MF
76static void ocfs2_super_ast_func(void *opaque);
77static void ocfs2_super_bast_func(void *opaque,
78 int level);
79static void ocfs2_rename_ast_func(void *opaque);
80static void ocfs2_rename_bast_func(void *opaque,
81 int level);
82
d680efe9
MF
83/*
84 * Return value from ocfs2_convert_worker_t functions.
85 *
86 * These control the precise actions of ocfs2_generic_unblock_lock()
87 * and ocfs2_process_blocked_lock()
88 *
89 */
90enum ocfs2_unblock_action {
91 UNBLOCK_CONTINUE = 0, /* Continue downconvert */
92 UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
93 * ->post_unlock callback */
94 UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
95 * ->post_unlock() callback. */
96};
97
98struct ocfs2_unblock_ctl {
99 int requeue;
100 enum ocfs2_unblock_action unblock_action;
101};
102
ccd979bd
MF
103/* so far, all locks have gotten along with the same unlock ast */
104static void ocfs2_unlock_ast_func(void *opaque,
105 enum dlm_status status);
ccd979bd 106static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
d680efe9 107 struct ocfs2_unblock_ctl *ctl);
ccd979bd 108static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
d680efe9 109 struct ocfs2_unblock_ctl *ctl);
ccd979bd 110static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
d680efe9
MF
111 struct ocfs2_unblock_ctl *ctl);
112static int ocfs2_unblock_dentry_lock(struct ocfs2_lock_res *lockres,
113 struct ocfs2_unblock_ctl *ctl);
ccd979bd 114static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
d680efe9
MF
115 struct ocfs2_unblock_ctl *ctl);
116
117static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
118 struct ocfs2_lock_res *lockres);
ccd979bd
MF
119
120struct ocfs2_lock_res_ops {
121 void (*ast)(void *);
122 void (*bast)(void *, int);
123 void (*unlock_ast)(void *, enum dlm_status);
d680efe9
MF
124 int (*unblock)(struct ocfs2_lock_res *, struct ocfs2_unblock_ctl *);
125 void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
ccd979bd
MF
126};
127
d680efe9
MF
128typedef int (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
129static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
130 struct ocfs2_lock_res *lockres,
131 struct ocfs2_unblock_ctl *ctl,
132 ocfs2_convert_worker_t *worker);
133
ccd979bd
MF
134static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
135 .ast = ocfs2_inode_ast_func,
136 .bast = ocfs2_inode_bast_func,
137 .unlock_ast = ocfs2_unlock_ast_func,
138 .unblock = ocfs2_unblock_inode_lock,
139};
140
141static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
142 .ast = ocfs2_inode_ast_func,
143 .bast = ocfs2_inode_bast_func,
144 .unlock_ast = ocfs2_unlock_ast_func,
145 .unblock = ocfs2_unblock_meta,
146};
147
ccd979bd
MF
148static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
149 .ast = ocfs2_inode_ast_func,
150 .bast = ocfs2_inode_bast_func,
151 .unlock_ast = ocfs2_unlock_ast_func,
152 .unblock = ocfs2_unblock_data,
153};
154
155static struct ocfs2_lock_res_ops ocfs2_super_lops = {
156 .ast = ocfs2_super_ast_func,
157 .bast = ocfs2_super_bast_func,
158 .unlock_ast = ocfs2_unlock_ast_func,
159 .unblock = ocfs2_unblock_osb_lock,
160};
161
162static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
163 .ast = ocfs2_rename_ast_func,
164 .bast = ocfs2_rename_bast_func,
165 .unlock_ast = ocfs2_unlock_ast_func,
166 .unblock = ocfs2_unblock_osb_lock,
167};
168
d680efe9
MF
169static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
170 .ast = ocfs2_dentry_ast_func,
171 .bast = ocfs2_dentry_bast_func,
172 .unlock_ast = ocfs2_unlock_ast_func,
173 .unblock = ocfs2_unblock_dentry_lock,
174 .post_unlock = ocfs2_dentry_post_unlock,
175};
176
ccd979bd
MF
177static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
178{
179 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
180 lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
181 lockres->l_type == OCFS2_LOCK_TYPE_RW;
182}
183
184static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres)
185{
186 return lockres->l_type == OCFS2_LOCK_TYPE_SUPER;
187}
188
189static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres)
190{
191 return lockres->l_type == OCFS2_LOCK_TYPE_RENAME;
192}
193
194static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres)
195{
196 BUG_ON(!ocfs2_is_super_lock(lockres)
197 && !ocfs2_is_rename_lock(lockres));
198
199 return (struct ocfs2_super *) lockres->l_priv;
200}
201
202static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
203{
204 BUG_ON(!ocfs2_is_inode_lock(lockres));
205
206 return (struct inode *) lockres->l_priv;
207}
208
d680efe9
MF
209static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
210{
211 BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
212
213 return (struct ocfs2_dentry_lock *)lockres->l_priv;
214}
215
ccd979bd
MF
216static int ocfs2_lock_create(struct ocfs2_super *osb,
217 struct ocfs2_lock_res *lockres,
218 int level,
219 int dlm_flags);
220static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
221 int wanted);
222static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
223 struct ocfs2_lock_res *lockres,
224 int level);
225static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
226static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
227static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
228static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
229static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
230 struct ocfs2_lock_res *lockres);
231static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
232 int convert);
233#define ocfs2_log_dlm_error(_func, _stat, _lockres) do { \
234 mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \
235 "resource %s: %s\n", dlm_errname(_stat), _func, \
236 _lockres->l_name, dlm_errmsg(_stat)); \
237} while (0)
238static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
239 struct ocfs2_lock_res *lockres);
240static int ocfs2_meta_lock_update(struct inode *inode,
241 struct buffer_head **bh);
242static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
243static inline int ocfs2_highest_compat_lock_level(int level);
244static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
245 struct ocfs2_lock_res *lockres,
246 int new_level);
247
ccd979bd
MF
248static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
249 u64 blkno,
250 u32 generation,
251 char *name)
252{
253 int len;
254
255 mlog_entry_void();
256
257 BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
258
b0697053
MF
259 len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
260 ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
261 (long long)blkno, generation);
ccd979bd
MF
262
263 BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
264
265 mlog(0, "built lock resource with name: %s\n", name);
266
267 mlog_exit_void();
268}
269
34af946a 270static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
ccd979bd
MF
271
272static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
273 struct ocfs2_dlm_debug *dlm_debug)
274{
275 mlog(0, "Add tracking for lockres %s\n", res->l_name);
276
277 spin_lock(&ocfs2_dlm_tracking_lock);
278 list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
279 spin_unlock(&ocfs2_dlm_tracking_lock);
280}
281
282static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
283{
284 spin_lock(&ocfs2_dlm_tracking_lock);
285 if (!list_empty(&res->l_debug_list))
286 list_del_init(&res->l_debug_list);
287 spin_unlock(&ocfs2_dlm_tracking_lock);
288}
289
290static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
291 struct ocfs2_lock_res *res,
292 enum ocfs2_lock_type type,
ccd979bd
MF
293 struct ocfs2_lock_res_ops *ops,
294 void *priv)
295{
ccd979bd
MF
296 res->l_type = type;
297 res->l_ops = ops;
298 res->l_priv = priv;
299
300 res->l_level = LKM_IVMODE;
301 res->l_requested = LKM_IVMODE;
302 res->l_blocking = LKM_IVMODE;
303 res->l_action = OCFS2_AST_INVALID;
304 res->l_unlock_action = OCFS2_UNLOCK_INVALID;
305
306 res->l_flags = OCFS2_LOCK_INITIALIZED;
307
308 ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
309}
310
311void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
312{
313 /* This also clears out the lock status block */
314 memset(res, 0, sizeof(struct ocfs2_lock_res));
315 spin_lock_init(&res->l_lock);
316 init_waitqueue_head(&res->l_event);
317 INIT_LIST_HEAD(&res->l_blocked_list);
318 INIT_LIST_HEAD(&res->l_mask_waiters);
319}
320
321void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
322 enum ocfs2_lock_type type,
323 struct inode *inode)
324{
325 struct ocfs2_lock_res_ops *ops;
326
327 switch(type) {
328 case OCFS2_LOCK_TYPE_RW:
329 ops = &ocfs2_inode_rw_lops;
330 break;
331 case OCFS2_LOCK_TYPE_META:
332 ops = &ocfs2_inode_meta_lops;
333 break;
334 case OCFS2_LOCK_TYPE_DATA:
335 ops = &ocfs2_inode_data_lops;
336 break;
337 default:
338 mlog_bug_on_msg(1, "type: %d\n", type);
339 ops = NULL; /* thanks, gcc */
340 break;
341 };
342
d680efe9
MF
343 ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
344 inode->i_generation, res->l_name);
345 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
346}
347
348static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
349{
350 __be64 inode_blkno_be;
351
352 memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
353 sizeof(__be64));
354
355 return be64_to_cpu(inode_blkno_be);
356}
357
358void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
359 u64 parent, struct inode *inode)
360{
361 int len;
362 u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
363 __be64 inode_blkno_be = cpu_to_be64(inode_blkno);
364 struct ocfs2_lock_res *lockres = &dl->dl_lockres;
365
366 ocfs2_lock_res_init_once(lockres);
367
368 /*
369 * Unfortunately, the standard lock naming scheme won't work
370 * here because we have two 16 byte values to use. Instead,
371 * we'll stuff the inode number as a binary value. We still
372 * want error prints to show something without garbling the
373 * display, so drop a null byte in there before the inode
374 * number. A future version of OCFS2 will likely use all
375 * binary lock names. The stringified names have been a
376 * tremendous aid in debugging, but now that the debugfs
377 * interface exists, we can mangle things there if need be.
378 *
379 * NOTE: We also drop the standard "pad" value (the total lock
380 * name size stays the same though - the last part is all
381 * zeros due to the memset in ocfs2_lock_res_init_once()
382 */
383 len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
384 "%c%016llx",
385 ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
386 (long long)parent);
387
388 BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
389
390 memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
391 sizeof(__be64));
392
393 ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
394 OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
395 dl);
ccd979bd
MF
396}
397
398static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
399 struct ocfs2_super *osb)
400{
401 /* Superblock lockres doesn't come from a slab so we call init
402 * once on it manually. */
403 ocfs2_lock_res_init_once(res);
d680efe9
MF
404 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
405 0, res->l_name);
ccd979bd 406 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
ccd979bd
MF
407 &ocfs2_super_lops, osb);
408}
409
410static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
411 struct ocfs2_super *osb)
412{
413 /* Rename lockres doesn't come from a slab so we call init
414 * once on it manually. */
415 ocfs2_lock_res_init_once(res);
d680efe9
MF
416 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
417 ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
ccd979bd
MF
418 &ocfs2_rename_lops, osb);
419}
420
421void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
422{
423 mlog_entry_void();
424
425 if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
426 return;
427
428 ocfs2_remove_lockres_tracking(res);
429
430 mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
431 "Lockres %s is on the blocked list\n",
432 res->l_name);
433 mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
434 "Lockres %s has mask waiters pending\n",
435 res->l_name);
436 mlog_bug_on_msg(spin_is_locked(&res->l_lock),
437 "Lockres %s is locked\n",
438 res->l_name);
439 mlog_bug_on_msg(res->l_ro_holders,
440 "Lockres %s has %u ro holders\n",
441 res->l_name, res->l_ro_holders);
442 mlog_bug_on_msg(res->l_ex_holders,
443 "Lockres %s has %u ex holders\n",
444 res->l_name, res->l_ex_holders);
445
446 /* Need to clear out the lock status block for the dlm */
447 memset(&res->l_lksb, 0, sizeof(res->l_lksb));
448
449 res->l_flags = 0UL;
450 mlog_exit_void();
451}
452
453static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
454 int level)
455{
456 mlog_entry_void();
457
458 BUG_ON(!lockres);
459
460 switch(level) {
461 case LKM_EXMODE:
462 lockres->l_ex_holders++;
463 break;
464 case LKM_PRMODE:
465 lockres->l_ro_holders++;
466 break;
467 default:
468 BUG();
469 }
470
471 mlog_exit_void();
472}
473
474static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
475 int level)
476{
477 mlog_entry_void();
478
479 BUG_ON(!lockres);
480
481 switch(level) {
482 case LKM_EXMODE:
483 BUG_ON(!lockres->l_ex_holders);
484 lockres->l_ex_holders--;
485 break;
486 case LKM_PRMODE:
487 BUG_ON(!lockres->l_ro_holders);
488 lockres->l_ro_holders--;
489 break;
490 default:
491 BUG();
492 }
493 mlog_exit_void();
494}
495
496/* WARNING: This function lives in a world where the only three lock
497 * levels are EX, PR, and NL. It *will* have to be adjusted when more
498 * lock types are added. */
499static inline int ocfs2_highest_compat_lock_level(int level)
500{
501 int new_level = LKM_EXMODE;
502
503 if (level == LKM_EXMODE)
504 new_level = LKM_NLMODE;
505 else if (level == LKM_PRMODE)
506 new_level = LKM_PRMODE;
507 return new_level;
508}
509
510static void lockres_set_flags(struct ocfs2_lock_res *lockres,
511 unsigned long newflags)
512{
513 struct list_head *pos, *tmp;
514 struct ocfs2_mask_waiter *mw;
515
516 assert_spin_locked(&lockres->l_lock);
517
518 lockres->l_flags = newflags;
519
520 list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) {
521 mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item);
522 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
523 continue;
524
525 list_del_init(&mw->mw_item);
526 mw->mw_status = 0;
527 complete(&mw->mw_complete);
528 }
529}
530static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
531{
532 lockres_set_flags(lockres, lockres->l_flags | or);
533}
534static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
535 unsigned long clear)
536{
537 lockres_set_flags(lockres, lockres->l_flags & ~clear);
538}
539
540static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
541{
542 mlog_entry_void();
543
544 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
545 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
546 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
547 BUG_ON(lockres->l_blocking <= LKM_NLMODE);
548
549 lockres->l_level = lockres->l_requested;
550 if (lockres->l_level <=
551 ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
552 lockres->l_blocking = LKM_NLMODE;
553 lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
554 }
555 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
556
557 mlog_exit_void();
558}
559
560static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
561{
562 mlog_entry_void();
563
564 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
565 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
566
567 /* Convert from RO to EX doesn't really need anything as our
568 * information is already up to data. Convert from NL to
569 * *anything* however should mark ourselves as needing an
570 * update */
571 if (lockres->l_level == LKM_NLMODE)
572 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
573
574 lockres->l_level = lockres->l_requested;
575 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
576
577 mlog_exit_void();
578}
579
580static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
581{
582 mlog_entry_void();
583
584 BUG_ON((!lockres->l_flags & OCFS2_LOCK_BUSY));
585 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
586
587 if (lockres->l_requested > LKM_NLMODE &&
588 !(lockres->l_flags & OCFS2_LOCK_LOCAL))
589 lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
590
591 lockres->l_level = lockres->l_requested;
592 lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
593 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
594
595 mlog_exit_void();
596}
597
598static void ocfs2_inode_ast_func(void *opaque)
599{
600 struct ocfs2_lock_res *lockres = opaque;
601 struct inode *inode;
602 struct dlm_lockstatus *lksb;
603 unsigned long flags;
604
605 mlog_entry_void();
606
607 inode = ocfs2_lock_res_inode(lockres);
608
b0697053
MF
609 mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
610 (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
ccd979bd
MF
611 ocfs2_lock_type_string(lockres->l_type));
612
613 BUG_ON(!ocfs2_is_inode_lock(lockres));
614
615 spin_lock_irqsave(&lockres->l_lock, flags);
616
617 lksb = &(lockres->l_lksb);
618 if (lksb->status != DLM_NORMAL) {
619 mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
b0697053
MF
620 "on inode %llu\n", lksb->status,
621 (unsigned long long)OCFS2_I(inode)->ip_blkno);
ccd979bd
MF
622 spin_unlock_irqrestore(&lockres->l_lock, flags);
623 mlog_exit_void();
624 return;
625 }
626
627 switch(lockres->l_action) {
628 case OCFS2_AST_ATTACH:
629 ocfs2_generic_handle_attach_action(lockres);
630 lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
631 break;
632 case OCFS2_AST_CONVERT:
633 ocfs2_generic_handle_convert_action(lockres);
634 break;
635 case OCFS2_AST_DOWNCONVERT:
636 ocfs2_generic_handle_downconvert_action(lockres);
637 break;
638 default:
639 mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
640 "lockres flags = 0x%lx, unlock action: %u\n",
641 lockres->l_name, lockres->l_action, lockres->l_flags,
642 lockres->l_unlock_action);
643
644 BUG();
645 }
646
647 /* data and rw locking ignores refresh flag for now. */
648 if (lockres->l_type != OCFS2_LOCK_TYPE_META)
649 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
650
651 /* set it to something invalid so if we get called again we
652 * can catch it. */
653 lockres->l_action = OCFS2_AST_INVALID;
654 spin_unlock_irqrestore(&lockres->l_lock, flags);
655 wake_up(&lockres->l_event);
656
657 mlog_exit_void();
658}
659
660static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
661 int level)
662{
663 int needs_downconvert = 0;
664 mlog_entry_void();
665
666 assert_spin_locked(&lockres->l_lock);
667
668 lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
669
670 if (level > lockres->l_blocking) {
671 /* only schedule a downconvert if we haven't already scheduled
672 * one that goes low enough to satisfy the level we're
673 * blocking. this also catches the case where we get
674 * duplicate BASTs */
675 if (ocfs2_highest_compat_lock_level(level) <
676 ocfs2_highest_compat_lock_level(lockres->l_blocking))
677 needs_downconvert = 1;
678
679 lockres->l_blocking = level;
680 }
681
682 mlog_exit(needs_downconvert);
683 return needs_downconvert;
684}
685
686static void ocfs2_generic_bast_func(struct ocfs2_super *osb,
687 struct ocfs2_lock_res *lockres,
688 int level)
689{
690 int needs_downconvert;
691 unsigned long flags;
692
693 mlog_entry_void();
694
695 BUG_ON(level <= LKM_NLMODE);
696
697 spin_lock_irqsave(&lockres->l_lock, flags);
698 needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
699 if (needs_downconvert)
700 ocfs2_schedule_blocked_lock(osb, lockres);
701 spin_unlock_irqrestore(&lockres->l_lock, flags);
702
d680efe9
MF
703 wake_up(&lockres->l_event);
704
ccd979bd
MF
705 ocfs2_kick_vote_thread(osb);
706
ccd979bd
MF
707 mlog_exit_void();
708}
709
710static void ocfs2_inode_bast_func(void *opaque, int level)
711{
712 struct ocfs2_lock_res *lockres = opaque;
713 struct inode *inode;
714 struct ocfs2_super *osb;
715
716 mlog_entry_void();
717
718 BUG_ON(!ocfs2_is_inode_lock(lockres));
719
720 inode = ocfs2_lock_res_inode(lockres);
721 osb = OCFS2_SB(inode->i_sb);
722
b0697053
MF
723 mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
724 (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
725 lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
ccd979bd
MF
726
727 ocfs2_generic_bast_func(osb, lockres, level);
728
729 mlog_exit_void();
730}
731
732static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
733 int ignore_refresh)
734{
735 struct dlm_lockstatus *lksb = &lockres->l_lksb;
736 unsigned long flags;
737
738 spin_lock_irqsave(&lockres->l_lock, flags);
739
740 if (lksb->status != DLM_NORMAL) {
741 mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n",
742 lockres->l_name, lksb->status);
743 spin_unlock_irqrestore(&lockres->l_lock, flags);
744 return;
745 }
746
747 switch(lockres->l_action) {
748 case OCFS2_AST_ATTACH:
749 ocfs2_generic_handle_attach_action(lockres);
750 break;
751 case OCFS2_AST_CONVERT:
752 ocfs2_generic_handle_convert_action(lockres);
753 break;
754 case OCFS2_AST_DOWNCONVERT:
755 ocfs2_generic_handle_downconvert_action(lockres);
756 break;
757 default:
758 BUG();
759 }
760
761 if (ignore_refresh)
762 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
763
764 /* set it to something invalid so if we get called again we
765 * can catch it. */
766 lockres->l_action = OCFS2_AST_INVALID;
ccd979bd
MF
767
768 wake_up(&lockres->l_event);
d680efe9 769 spin_unlock_irqrestore(&lockres->l_lock, flags);
ccd979bd
MF
770}
771
772static void ocfs2_super_ast_func(void *opaque)
773{
774 struct ocfs2_lock_res *lockres = opaque;
775
776 mlog_entry_void();
777 mlog(0, "Superblock AST fired\n");
778
779 BUG_ON(!ocfs2_is_super_lock(lockres));
780 ocfs2_generic_ast_func(lockres, 0);
781
782 mlog_exit_void();
783}
784
785static void ocfs2_super_bast_func(void *opaque,
786 int level)
787{
788 struct ocfs2_lock_res *lockres = opaque;
789 struct ocfs2_super *osb;
790
791 mlog_entry_void();
792 mlog(0, "Superblock BAST fired\n");
793
794 BUG_ON(!ocfs2_is_super_lock(lockres));
795 osb = ocfs2_lock_res_super(lockres);
796 ocfs2_generic_bast_func(osb, lockres, level);
797
798 mlog_exit_void();
799}
800
801static void ocfs2_rename_ast_func(void *opaque)
802{
803 struct ocfs2_lock_res *lockres = opaque;
804
805 mlog_entry_void();
806
807 mlog(0, "Rename AST fired\n");
808
809 BUG_ON(!ocfs2_is_rename_lock(lockres));
810
811 ocfs2_generic_ast_func(lockres, 1);
812
813 mlog_exit_void();
814}
815
816static void ocfs2_rename_bast_func(void *opaque,
817 int level)
818{
819 struct ocfs2_lock_res *lockres = opaque;
820 struct ocfs2_super *osb;
821
822 mlog_entry_void();
823
824 mlog(0, "Rename BAST fired\n");
825
826 BUG_ON(!ocfs2_is_rename_lock(lockres));
827
828 osb = ocfs2_lock_res_super(lockres);
829 ocfs2_generic_bast_func(osb, lockres, level);
830
831 mlog_exit_void();
832}
833
d680efe9
MF
834static void ocfs2_dentry_ast_func(void *opaque)
835{
836 struct ocfs2_lock_res *lockres = opaque;
837
838 BUG_ON(!lockres);
839
840 ocfs2_generic_ast_func(lockres, 1);
841}
842
843static void ocfs2_dentry_bast_func(void *opaque, int level)
844{
845 struct ocfs2_lock_res *lockres = opaque;
846 struct ocfs2_dentry_lock *dl = lockres->l_priv;
847 struct ocfs2_super *osb = OCFS2_SB(dl->dl_inode->i_sb);
848
849 mlog(0, "Dentry bast: level: %d, name: %s\n", level,
850 lockres->l_name);
851
852 ocfs2_generic_bast_func(osb, lockres, level);
853}
854
ccd979bd
MF
855static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
856 int convert)
857{
858 unsigned long flags;
859
860 mlog_entry_void();
861 spin_lock_irqsave(&lockres->l_lock, flags);
862 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
863 if (convert)
864 lockres->l_action = OCFS2_AST_INVALID;
865 else
866 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
867 spin_unlock_irqrestore(&lockres->l_lock, flags);
868
869 wake_up(&lockres->l_event);
870 mlog_exit_void();
871}
872
873/* Note: If we detect another process working on the lock (i.e.,
874 * OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
875 * to do the right thing in that case.
876 */
877static int ocfs2_lock_create(struct ocfs2_super *osb,
878 struct ocfs2_lock_res *lockres,
879 int level,
880 int dlm_flags)
881{
882 int ret = 0;
883 enum dlm_status status;
884 unsigned long flags;
885
886 mlog_entry_void();
887
888 mlog(0, "lock %s, level = %d, flags = %d\n", lockres->l_name, level,
889 dlm_flags);
890
891 spin_lock_irqsave(&lockres->l_lock, flags);
892 if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
893 (lockres->l_flags & OCFS2_LOCK_BUSY)) {
894 spin_unlock_irqrestore(&lockres->l_lock, flags);
895 goto bail;
896 }
897
898 lockres->l_action = OCFS2_AST_ATTACH;
899 lockres->l_requested = level;
900 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
901 spin_unlock_irqrestore(&lockres->l_lock, flags);
902
903 status = dlmlock(osb->dlm,
904 level,
905 &lockres->l_lksb,
906 dlm_flags,
907 lockres->l_name,
f0681062 908 OCFS2_LOCK_ID_MAX_LEN - 1,
ccd979bd
MF
909 lockres->l_ops->ast,
910 lockres,
911 lockres->l_ops->bast);
912 if (status != DLM_NORMAL) {
913 ocfs2_log_dlm_error("dlmlock", status, lockres);
914 ret = -EINVAL;
915 ocfs2_recover_from_dlm_error(lockres, 1);
916 }
917
918 mlog(0, "lock %s, successfull return from dlmlock\n", lockres->l_name);
919
920bail:
921 mlog_exit(ret);
922 return ret;
923}
924
925static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
926 int flag)
927{
928 unsigned long flags;
929 int ret;
930
931 spin_lock_irqsave(&lockres->l_lock, flags);
932 ret = lockres->l_flags & flag;
933 spin_unlock_irqrestore(&lockres->l_lock, flags);
934
935 return ret;
936}
937
938static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
939
940{
941 wait_event(lockres->l_event,
942 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
943}
944
945static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
946
947{
948 wait_event(lockres->l_event,
949 !ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
950}
951
952/* predict what lock level we'll be dropping down to on behalf
953 * of another node, and return true if the currently wanted
954 * level will be compatible with it. */
955static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
956 int wanted)
957{
958 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
959
960 return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
961}
962
963static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
964{
965 INIT_LIST_HEAD(&mw->mw_item);
966 init_completion(&mw->mw_complete);
967}
968
969static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
970{
971 wait_for_completion(&mw->mw_complete);
972 /* Re-arm the completion in case we want to wait on it again */
973 INIT_COMPLETION(mw->mw_complete);
974 return mw->mw_status;
975}
976
977static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
978 struct ocfs2_mask_waiter *mw,
979 unsigned long mask,
980 unsigned long goal)
981{
982 BUG_ON(!list_empty(&mw->mw_item));
983
984 assert_spin_locked(&lockres->l_lock);
985
986 list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
987 mw->mw_mask = mask;
988 mw->mw_goal = goal;
989}
990
991/* returns 0 if the mw that was removed was already satisfied, -EBUSY
992 * if the mask still hadn't reached its goal */
993static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
994 struct ocfs2_mask_waiter *mw)
995{
996 unsigned long flags;
997 int ret = 0;
998
999 spin_lock_irqsave(&lockres->l_lock, flags);
1000 if (!list_empty(&mw->mw_item)) {
1001 if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
1002 ret = -EBUSY;
1003
1004 list_del_init(&mw->mw_item);
1005 init_completion(&mw->mw_complete);
1006 }
1007 spin_unlock_irqrestore(&lockres->l_lock, flags);
1008
1009 return ret;
1010
1011}
1012
1013static int ocfs2_cluster_lock(struct ocfs2_super *osb,
1014 struct ocfs2_lock_res *lockres,
1015 int level,
1016 int lkm_flags,
1017 int arg_flags)
1018{
1019 struct ocfs2_mask_waiter mw;
1020 enum dlm_status status;
1021 int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
1022 int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
1023 unsigned long flags;
1024
1025 mlog_entry_void();
1026
1027 ocfs2_init_mask_waiter(&mw);
1028
1029again:
1030 wait = 0;
1031
1032 if (catch_signals && signal_pending(current)) {
1033 ret = -ERESTARTSYS;
1034 goto out;
1035 }
1036
1037 spin_lock_irqsave(&lockres->l_lock, flags);
1038
1039 mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
1040 "Cluster lock called on freeing lockres %s! flags "
1041 "0x%lx\n", lockres->l_name, lockres->l_flags);
1042
1043 /* We only compare against the currently granted level
1044 * here. If the lock is blocked waiting on a downconvert,
1045 * we'll get caught below. */
1046 if (lockres->l_flags & OCFS2_LOCK_BUSY &&
1047 level > lockres->l_level) {
1048 /* is someone sitting in dlm_lock? If so, wait on
1049 * them. */
1050 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
1051 wait = 1;
1052 goto unlock;
1053 }
1054
1055 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
1056 /* lock has not been created yet. */
1057 spin_unlock_irqrestore(&lockres->l_lock, flags);
1058
1059 ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0);
1060 if (ret < 0) {
1061 mlog_errno(ret);
1062 goto out;
1063 }
1064 goto again;
1065 }
1066
1067 if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
1068 !ocfs2_may_continue_on_blocked_lock(lockres, level)) {
1069 /* is the lock is currently blocked on behalf of
1070 * another node */
1071 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
1072 wait = 1;
1073 goto unlock;
1074 }
1075
1076 if (level > lockres->l_level) {
1077 if (lockres->l_action != OCFS2_AST_INVALID)
1078 mlog(ML_ERROR, "lockres %s has action %u pending\n",
1079 lockres->l_name, lockres->l_action);
1080
1081 lockres->l_action = OCFS2_AST_CONVERT;
1082 lockres->l_requested = level;
1083 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
1084 spin_unlock_irqrestore(&lockres->l_lock, flags);
1085
1086 BUG_ON(level == LKM_IVMODE);
1087 BUG_ON(level == LKM_NLMODE);
1088
1089 mlog(0, "lock %s, convert from %d to level = %d\n",
1090 lockres->l_name, lockres->l_level, level);
1091
1092 /* call dlm_lock to upgrade lock now */
1093 status = dlmlock(osb->dlm,
1094 level,
1095 &lockres->l_lksb,
1096 lkm_flags|LKM_CONVERT|LKM_VALBLK,
1097 lockres->l_name,
f0681062 1098 OCFS2_LOCK_ID_MAX_LEN - 1,
ccd979bd
MF
1099 lockres->l_ops->ast,
1100 lockres,
1101 lockres->l_ops->bast);
1102 if (status != DLM_NORMAL) {
1103 if ((lkm_flags & LKM_NOQUEUE) &&
1104 (status == DLM_NOTQUEUED))
1105 ret = -EAGAIN;
1106 else {
1107 ocfs2_log_dlm_error("dlmlock", status,
1108 lockres);
1109 ret = -EINVAL;
1110 }
1111 ocfs2_recover_from_dlm_error(lockres, 1);
1112 goto out;
1113 }
1114
1115 mlog(0, "lock %s, successfull return from dlmlock\n",
1116 lockres->l_name);
1117
1118 /* At this point we've gone inside the dlm and need to
1119 * complete our work regardless. */
1120 catch_signals = 0;
1121
1122 /* wait for busy to clear and carry on */
1123 goto again;
1124 }
1125
1126 /* Ok, if we get here then we're good to go. */
1127 ocfs2_inc_holders(lockres, level);
1128
1129 ret = 0;
1130unlock:
1131 spin_unlock_irqrestore(&lockres->l_lock, flags);
1132out:
1133 /*
1134 * This is helping work around a lock inversion between the page lock
1135 * and dlm locks. One path holds the page lock while calling aops
1136 * which block acquiring dlm locks. The voting thread holds dlm
1137 * locks while acquiring page locks while down converting data locks.
1138 * This block is helping an aop path notice the inversion and back
1139 * off to unlock its page lock before trying the dlm lock again.
1140 */
1141 if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
1142 mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
1143 wait = 0;
1144 if (lockres_remove_mask_waiter(lockres, &mw))
1145 ret = -EAGAIN;
1146 else
1147 goto again;
1148 }
1149 if (wait) {
1150 ret = ocfs2_wait_for_mask(&mw);
1151 if (ret == 0)
1152 goto again;
1153 mlog_errno(ret);
1154 }
1155
1156 mlog_exit(ret);
1157 return ret;
1158}
1159
1160static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
1161 struct ocfs2_lock_res *lockres,
1162 int level)
1163{
1164 unsigned long flags;
1165
1166 mlog_entry_void();
1167 spin_lock_irqsave(&lockres->l_lock, flags);
1168 ocfs2_dec_holders(lockres, level);
1169 ocfs2_vote_on_unlock(osb, lockres);
1170 spin_unlock_irqrestore(&lockres->l_lock, flags);
1171 mlog_exit_void();
1172}
1173
d680efe9
MF
1174int ocfs2_create_new_lock(struct ocfs2_super *osb,
1175 struct ocfs2_lock_res *lockres,
1176 int ex)
ccd979bd 1177{
d680efe9 1178 int level = ex ? LKM_EXMODE : LKM_PRMODE;
ccd979bd
MF
1179 unsigned long flags;
1180
1181 spin_lock_irqsave(&lockres->l_lock, flags);
1182 BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
1183 lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
1184 spin_unlock_irqrestore(&lockres->l_lock, flags);
1185
d680efe9 1186 return ocfs2_lock_create(osb, lockres, level, LKM_LOCAL);
ccd979bd
MF
1187}
1188
1189/* Grants us an EX lock on the data and metadata resources, skipping
1190 * the normal cluster directory lookup. Use this ONLY on newly created
1191 * inodes which other nodes can't possibly see, and which haven't been
1192 * hashed in the inode hash yet. This can give us a good performance
1193 * increase as it'll skip the network broadcast normally associated
1194 * with creating a new lock resource. */
1195int ocfs2_create_new_inode_locks(struct inode *inode)
1196{
1197 int ret;
d680efe9 1198 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
ccd979bd
MF
1199
1200 BUG_ON(!inode);
1201 BUG_ON(!ocfs2_inode_is_new(inode));
1202
1203 mlog_entry_void();
1204
b0697053 1205 mlog(0, "Inode %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
ccd979bd
MF
1206
1207 /* NOTE: That we don't increment any of the holder counts, nor
1208 * do we add anything to a journal handle. Since this is
1209 * supposed to be a new inode which the cluster doesn't know
1210 * about yet, there is no need to. As far as the LVB handling
1211 * is concerned, this is basically like acquiring an EX lock
1212 * on a resource which has an invalid one -- we'll set it
1213 * valid when we release the EX. */
1214
d680efe9 1215 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1);
ccd979bd
MF
1216 if (ret) {
1217 mlog_errno(ret);
1218 goto bail;
1219 }
1220
d680efe9 1221 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1);
ccd979bd
MF
1222 if (ret) {
1223 mlog_errno(ret);
1224 goto bail;
1225 }
1226
d680efe9 1227 ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1);
ccd979bd
MF
1228 if (ret) {
1229 mlog_errno(ret);
1230 goto bail;
1231 }
1232
1233bail:
1234 mlog_exit(ret);
1235 return ret;
1236}
1237
1238int ocfs2_rw_lock(struct inode *inode, int write)
1239{
1240 int status, level;
1241 struct ocfs2_lock_res *lockres;
1242
1243 BUG_ON(!inode);
1244
1245 mlog_entry_void();
1246
b0697053
MF
1247 mlog(0, "inode %llu take %s RW lock\n",
1248 (unsigned long long)OCFS2_I(inode)->ip_blkno,
ccd979bd
MF
1249 write ? "EXMODE" : "PRMODE");
1250
1251 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1252
1253 level = write ? LKM_EXMODE : LKM_PRMODE;
1254
1255 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, 0,
1256 0);
1257 if (status < 0)
1258 mlog_errno(status);
1259
1260 mlog_exit(status);
1261 return status;
1262}
1263
1264void ocfs2_rw_unlock(struct inode *inode, int write)
1265{
1266 int level = write ? LKM_EXMODE : LKM_PRMODE;
1267 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_rw_lockres;
1268
1269 mlog_entry_void();
1270
b0697053
MF
1271 mlog(0, "inode %llu drop %s RW lock\n",
1272 (unsigned long long)OCFS2_I(inode)->ip_blkno,
ccd979bd
MF
1273 write ? "EXMODE" : "PRMODE");
1274
1275 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1276
1277 mlog_exit_void();
1278}
1279
1280int ocfs2_data_lock_full(struct inode *inode,
1281 int write,
1282 int arg_flags)
1283{
1284 int status = 0, level;
1285 struct ocfs2_lock_res *lockres;
1286
1287 BUG_ON(!inode);
1288
1289 mlog_entry_void();
1290
b0697053
MF
1291 mlog(0, "inode %llu take %s DATA lock\n",
1292 (unsigned long long)OCFS2_I(inode)->ip_blkno,
ccd979bd
MF
1293 write ? "EXMODE" : "PRMODE");
1294
1295 /* We'll allow faking a readonly data lock for
1296 * rodevices. */
1297 if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) {
1298 if (write) {
1299 status = -EROFS;
1300 mlog_errno(status);
1301 }
1302 goto out;
1303 }
1304
1305 lockres = &OCFS2_I(inode)->ip_data_lockres;
1306
1307 level = write ? LKM_EXMODE : LKM_PRMODE;
1308
1309 status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level,
1310 0, arg_flags);
1311 if (status < 0 && status != -EAGAIN)
1312 mlog_errno(status);
1313
1314out:
1315 mlog_exit(status);
1316 return status;
1317}
1318
1319/* see ocfs2_meta_lock_with_page() */
1320int ocfs2_data_lock_with_page(struct inode *inode,
1321 int write,
1322 struct page *page)
1323{
1324 int ret;
1325
1326 ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK);
1327 if (ret == -EAGAIN) {
1328 unlock_page(page);
1329 if (ocfs2_data_lock(inode, write) == 0)
1330 ocfs2_data_unlock(inode, write);
1331 ret = AOP_TRUNCATED_PAGE;
1332 }
1333
1334 return ret;
1335}
1336
1337static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
1338 struct ocfs2_lock_res *lockres)
1339{
1340 int kick = 0;
1341
1342 mlog_entry_void();
1343
1344 /* If we know that another node is waiting on our lock, kick
1345 * the vote thread * pre-emptively when we reach a release
1346 * condition. */
1347 if (lockres->l_flags & OCFS2_LOCK_BLOCKED) {
1348 switch(lockres->l_blocking) {
1349 case LKM_EXMODE:
1350 if (!lockres->l_ex_holders && !lockres->l_ro_holders)
1351 kick = 1;
1352 break;
1353 case LKM_PRMODE:
1354 if (!lockres->l_ex_holders)
1355 kick = 1;
1356 break;
1357 default:
1358 BUG();
1359 }
1360 }
1361
1362 if (kick)
1363 ocfs2_kick_vote_thread(osb);
1364
1365 mlog_exit_void();
1366}
1367
1368void ocfs2_data_unlock(struct inode *inode,
1369 int write)
1370{
1371 int level = write ? LKM_EXMODE : LKM_PRMODE;
1372 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres;
1373
1374 mlog_entry_void();
1375
b0697053
MF
1376 mlog(0, "inode %llu drop %s DATA lock\n",
1377 (unsigned long long)OCFS2_I(inode)->ip_blkno,
ccd979bd
MF
1378 write ? "EXMODE" : "PRMODE");
1379
1380 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
1381 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1382
1383 mlog_exit_void();
1384}
1385
1386#define OCFS2_SEC_BITS 34
1387#define OCFS2_SEC_SHIFT (64 - 34)
1388#define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)
1389
1390/* LVB only has room for 64 bits of time here so we pack it for
1391 * now. */
1392static u64 ocfs2_pack_timespec(struct timespec *spec)
1393{
1394 u64 res;
1395 u64 sec = spec->tv_sec;
1396 u32 nsec = spec->tv_nsec;
1397
1398 res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
1399
1400 return res;
1401}
1402
1403/* Call this with the lockres locked. I am reasonably sure we don't
1404 * need ip_lock in this function as anyone who would be changing those
1405 * values is supposed to be blocked in ocfs2_meta_lock right now. */
1406static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1407{
1408 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1409 struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
1410 struct ocfs2_meta_lvb *lvb;
1411
1412 mlog_entry_void();
1413
1414 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1415
4d3b83f7 1416 lvb->lvb_version = OCFS2_LVB_VERSION;
ccd979bd
MF
1417 lvb->lvb_isize = cpu_to_be64(i_size_read(inode));
1418 lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
1419 lvb->lvb_iuid = cpu_to_be32(inode->i_uid);
1420 lvb->lvb_igid = cpu_to_be32(inode->i_gid);
1421 lvb->lvb_imode = cpu_to_be16(inode->i_mode);
1422 lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
1423 lvb->lvb_iatime_packed =
1424 cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
1425 lvb->lvb_ictime_packed =
1426 cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
1427 lvb->lvb_imtime_packed =
1428 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
ca4d147e 1429 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
ccd979bd
MF
1430
1431 mlog_meta_lvb(0, lockres);
1432
1433 mlog_exit_void();
1434}
1435
1436static void ocfs2_unpack_timespec(struct timespec *spec,
1437 u64 packed_time)
1438{
1439 spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
1440 spec->tv_nsec = packed_time & OCFS2_NSEC_MASK;
1441}
1442
1443static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1444{
1445 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1446 struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres;
1447 struct ocfs2_meta_lvb *lvb;
1448
1449 mlog_entry_void();
1450
1451 mlog_meta_lvb(0, lockres);
1452
1453 lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1454
1455 /* We're safe here without the lockres lock... */
1456 spin_lock(&oi->ip_lock);
1457 oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
1458 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
1459
ca4d147e
HP
1460 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
1461 ocfs2_set_inode_flags(inode);
1462
ccd979bd
MF
1463 /* fast-symlinks are a special case */
1464 if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
1465 inode->i_blocks = 0;
1466 else
1467 inode->i_blocks =
1468 ocfs2_align_bytes_to_sectors(i_size_read(inode));
1469
1470 inode->i_uid = be32_to_cpu(lvb->lvb_iuid);
1471 inode->i_gid = be32_to_cpu(lvb->lvb_igid);
1472 inode->i_mode = be16_to_cpu(lvb->lvb_imode);
1473 inode->i_nlink = be16_to_cpu(lvb->lvb_inlink);
1474 ocfs2_unpack_timespec(&inode->i_atime,
1475 be64_to_cpu(lvb->lvb_iatime_packed));
1476 ocfs2_unpack_timespec(&inode->i_mtime,
1477 be64_to_cpu(lvb->lvb_imtime_packed));
1478 ocfs2_unpack_timespec(&inode->i_ctime,
1479 be64_to_cpu(lvb->lvb_ictime_packed));
1480 spin_unlock(&oi->ip_lock);
1481
1482 mlog_exit_void();
1483}
1484
1485static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres)
1486{
1487 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
1488
4d3b83f7 1489 if (lvb->lvb_version == OCFS2_LVB_VERSION)
ccd979bd
MF
1490 return 1;
1491 return 0;
1492}
1493
1494/* Determine whether a lock resource needs to be refreshed, and
1495 * arbitrate who gets to refresh it.
1496 *
1497 * 0 means no refresh needed.
1498 *
1499 * > 0 means you need to refresh this and you MUST call
1500 * ocfs2_complete_lock_res_refresh afterwards. */
1501static int ocfs2_should_refresh_lock_res(struct ocfs2_lock_res *lockres)
1502{
1503 unsigned long flags;
1504 int status = 0;
1505
1506 mlog_entry_void();
1507
1508refresh_check:
1509 spin_lock_irqsave(&lockres->l_lock, flags);
1510 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
1511 spin_unlock_irqrestore(&lockres->l_lock, flags);
1512 goto bail;
1513 }
1514
1515 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
1516 spin_unlock_irqrestore(&lockres->l_lock, flags);
1517
1518 ocfs2_wait_on_refreshing_lock(lockres);
1519 goto refresh_check;
1520 }
1521
1522 /* Ok, I'll be the one to refresh this lock. */
1523 lockres_or_flags(lockres, OCFS2_LOCK_REFRESHING);
1524 spin_unlock_irqrestore(&lockres->l_lock, flags);
1525
1526 status = 1;
1527bail:
1528 mlog_exit(status);
1529 return status;
1530}
1531
1532/* If status is non zero, I'll mark it as not being in refresh
1533 * anymroe, but i won't clear the needs refresh flag. */
1534static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockres,
1535 int status)
1536{
1537 unsigned long flags;
1538 mlog_entry_void();
1539
1540 spin_lock_irqsave(&lockres->l_lock, flags);
1541 lockres_clear_flags(lockres, OCFS2_LOCK_REFRESHING);
1542 if (!status)
1543 lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
1544 spin_unlock_irqrestore(&lockres->l_lock, flags);
1545
1546 wake_up(&lockres->l_event);
1547
1548 mlog_exit_void();
1549}
1550
1551/* may or may not return a bh if it went to disk. */
1552static int ocfs2_meta_lock_update(struct inode *inode,
1553 struct buffer_head **bh)
1554{
1555 int status = 0;
1556 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1557 struct ocfs2_lock_res *lockres;
1558 struct ocfs2_dinode *fe;
1559
1560 mlog_entry_void();
1561
1562 spin_lock(&oi->ip_lock);
1563 if (oi->ip_flags & OCFS2_INODE_DELETED) {
b0697053 1564 mlog(0, "Orphaned inode %llu was deleted while we "
ccd979bd 1565 "were waiting on a lock. ip_flags = 0x%x\n",
b0697053 1566 (unsigned long long)oi->ip_blkno, oi->ip_flags);
ccd979bd
MF
1567 spin_unlock(&oi->ip_lock);
1568 status = -ENOENT;
1569 goto bail;
1570 }
1571 spin_unlock(&oi->ip_lock);
1572
1573 lockres = &oi->ip_meta_lockres;
1574
1575 if (!ocfs2_should_refresh_lock_res(lockres))
1576 goto bail;
1577
1578 /* This will discard any caching information we might have had
1579 * for the inode metadata. */
1580 ocfs2_metadata_cache_purge(inode);
1581
1582 /* will do nothing for inode types that don't use the extent
1583 * map (directories, bitmap files, etc) */
1584 ocfs2_extent_map_trunc(inode, 0);
1585
1586 if (ocfs2_meta_lvb_is_trustable(lockres)) {
b0697053
MF
1587 mlog(0, "Trusting LVB on inode %llu\n",
1588 (unsigned long long)oi->ip_blkno);
ccd979bd
MF
1589 ocfs2_refresh_inode_from_lvb(inode);
1590 } else {
1591 /* Boo, we have to go to disk. */
1592 /* read bh, cast, ocfs2_refresh_inode */
1593 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno,
1594 bh, OCFS2_BH_CACHED, inode);
1595 if (status < 0) {
1596 mlog_errno(status);
1597 goto bail_refresh;
1598 }
1599 fe = (struct ocfs2_dinode *) (*bh)->b_data;
1600
1601 /* This is a good chance to make sure we're not
1602 * locking an invalid object.
1603 *
1604 * We bug on a stale inode here because we checked
1605 * above whether it was wiped from disk. The wiping
1606 * node provides a guarantee that we receive that
1607 * message and can mark the inode before dropping any
1608 * locks associated with it. */
1609 if (!OCFS2_IS_VALID_DINODE(fe)) {
1610 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
1611 status = -EIO;
1612 goto bail_refresh;
1613 }
1614 mlog_bug_on_msg(inode->i_generation !=
1615 le32_to_cpu(fe->i_generation),
b0697053 1616 "Invalid dinode %llu disk generation: %u "
ccd979bd 1617 "inode->i_generation: %u\n",
b0697053
MF
1618 (unsigned long long)oi->ip_blkno,
1619 le32_to_cpu(fe->i_generation),
ccd979bd
MF
1620 inode->i_generation);
1621 mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) ||
1622 !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)),
b0697053
MF
1623 "Stale dinode %llu dtime: %llu flags: 0x%x\n",
1624 (unsigned long long)oi->ip_blkno,
1625 (unsigned long long)le64_to_cpu(fe->i_dtime),
ccd979bd
MF
1626 le32_to_cpu(fe->i_flags));
1627
1628 ocfs2_refresh_inode(inode, fe);
1629 }
1630
1631 status = 0;
1632bail_refresh:
1633 ocfs2_complete_lock_res_refresh(lockres, status);
1634bail:
1635 mlog_exit(status);
1636 return status;
1637}
1638
1639static int ocfs2_assign_bh(struct inode *inode,
1640 struct buffer_head **ret_bh,
1641 struct buffer_head *passed_bh)
1642{
1643 int status;
1644
1645 if (passed_bh) {
1646 /* Ok, the update went to disk for us, use the
1647 * returned bh. */
1648 *ret_bh = passed_bh;
1649 get_bh(*ret_bh);
1650
1651 return 0;
1652 }
1653
1654 status = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1655 OCFS2_I(inode)->ip_blkno,
1656 ret_bh,
1657 OCFS2_BH_CACHED,
1658 inode);
1659 if (status < 0)
1660 mlog_errno(status);
1661
1662 return status;
1663}
1664
1665/*
1666 * returns < 0 error if the callback will never be called, otherwise
1667 * the result of the lock will be communicated via the callback.
1668 */
1669int ocfs2_meta_lock_full(struct inode *inode,
1670 struct ocfs2_journal_handle *handle,
1671 struct buffer_head **ret_bh,
1672 int ex,
1673 int arg_flags)
1674{
1675 int status, level, dlm_flags, acquired;
1676 struct ocfs2_lock_res *lockres;
1677 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1678 struct buffer_head *local_bh = NULL;
1679
1680 BUG_ON(!inode);
1681
1682 mlog_entry_void();
1683
b0697053
MF
1684 mlog(0, "inode %llu, take %s META lock\n",
1685 (unsigned long long)OCFS2_I(inode)->ip_blkno,
ccd979bd
MF
1686 ex ? "EXMODE" : "PRMODE");
1687
1688 status = 0;
1689 acquired = 0;
1690 /* We'll allow faking a readonly metadata lock for
1691 * rodevices. */
1692 if (ocfs2_is_hard_readonly(osb)) {
1693 if (ex)
1694 status = -EROFS;
1695 goto bail;
1696 }
1697
1698 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
1699 wait_event(osb->recovery_event,
1700 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1701
1702 acquired = 0;
1703 lockres = &OCFS2_I(inode)->ip_meta_lockres;
1704 level = ex ? LKM_EXMODE : LKM_PRMODE;
1705 dlm_flags = 0;
1706 if (arg_flags & OCFS2_META_LOCK_NOQUEUE)
1707 dlm_flags |= LKM_NOQUEUE;
1708
1709 status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags);
1710 if (status < 0) {
1711 if (status != -EAGAIN && status != -EIOCBRETRY)
1712 mlog_errno(status);
1713 goto bail;
1714 }
1715
1716 /* Notify the error cleanup path to drop the cluster lock. */
1717 acquired = 1;
1718
1719 /* We wait twice because a node may have died while we were in
1720 * the lower dlm layers. The second time though, we've
1721 * committed to owning this lock so we don't allow signals to
1722 * abort the operation. */
1723 if (!(arg_flags & OCFS2_META_LOCK_RECOVERY))
1724 wait_event(osb->recovery_event,
1725 ocfs2_node_map_is_empty(osb, &osb->recovery_map));
1726
1727 /* This is fun. The caller may want a bh back, or it may
1728 * not. ocfs2_meta_lock_update definitely wants one in, but
1729 * may or may not read one, depending on what's in the
1730 * LVB. The result of all of this is that we've *only* gone to
1731 * disk if we have to, so the complexity is worthwhile. */
1732 status = ocfs2_meta_lock_update(inode, &local_bh);
1733 if (status < 0) {
1734 if (status != -ENOENT)
1735 mlog_errno(status);
1736 goto bail;
1737 }
1738
1739 if (ret_bh) {
1740 status = ocfs2_assign_bh(inode, ret_bh, local_bh);
1741 if (status < 0) {
1742 mlog_errno(status);
1743 goto bail;
1744 }
1745 }
1746
1747 if (handle) {
1748 status = ocfs2_handle_add_lock(handle, inode);
1749 if (status < 0)
1750 mlog_errno(status);
1751 }
1752
1753bail:
1754 if (status < 0) {
1755 if (ret_bh && (*ret_bh)) {
1756 brelse(*ret_bh);
1757 *ret_bh = NULL;
1758 }
1759 if (acquired)
1760 ocfs2_meta_unlock(inode, ex);
1761 }
1762
1763 if (local_bh)
1764 brelse(local_bh);
1765
1766 mlog_exit(status);
1767 return status;
1768}
1769
1770/*
1771 * This is working around a lock inversion between tasks acquiring DLM locks
1772 * while holding a page lock and the vote thread which blocks dlm lock acquiry
1773 * while acquiring page locks.
1774 *
1775 * ** These _with_page variantes are only intended to be called from aop
1776 * methods that hold page locks and return a very specific *positive* error
1777 * code that aop methods pass up to the VFS -- test for errors with != 0. **
1778 *
1779 * The DLM is called such that it returns -EAGAIN if it would have blocked
1780 * waiting for the vote thread. In that case we unlock our page so the vote
1781 * thread can make progress. Once we've done this we have to return
1782 * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up
1783 * into the VFS who will then immediately retry the aop call.
1784 *
1785 * We do a blocking lock and immediate unlock before returning, though, so that
1786 * the lock has a great chance of being cached on this node by the time the VFS
1787 * calls back to retry the aop. This has a potential to livelock as nodes
1788 * ping locks back and forth, but that's a risk we're willing to take to avoid
1789 * the lock inversion simply.
1790 */
1791int ocfs2_meta_lock_with_page(struct inode *inode,
1792 struct ocfs2_journal_handle *handle,
1793 struct buffer_head **ret_bh,
1794 int ex,
1795 struct page *page)
1796{
1797 int ret;
1798
1799 ret = ocfs2_meta_lock_full(inode, handle, ret_bh, ex,
1800 OCFS2_LOCK_NONBLOCK);
1801 if (ret == -EAGAIN) {
1802 unlock_page(page);
1803 if (ocfs2_meta_lock(inode, handle, ret_bh, ex) == 0)
1804 ocfs2_meta_unlock(inode, ex);
1805 ret = AOP_TRUNCATED_PAGE;
1806 }
1807
1808 return ret;
1809}
1810
1811void ocfs2_meta_unlock(struct inode *inode,
1812 int ex)
1813{
1814 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1815 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
1816
1817 mlog_entry_void();
1818
b0697053
MF
1819 mlog(0, "inode %llu drop %s META lock\n",
1820 (unsigned long long)OCFS2_I(inode)->ip_blkno,
ccd979bd
MF
1821 ex ? "EXMODE" : "PRMODE");
1822
1823 if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)))
1824 ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level);
1825
1826 mlog_exit_void();
1827}
1828
1829int ocfs2_super_lock(struct ocfs2_super *osb,
1830 int ex)
1831{
1832 int status;
1833 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1834 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
1835 struct buffer_head *bh;
1836 struct ocfs2_slot_info *si = osb->slot_info;
1837
1838 mlog_entry_void();
1839
1840 if (ocfs2_is_hard_readonly(osb))
1841 return -EROFS;
1842
1843 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
1844 if (status < 0) {
1845 mlog_errno(status);
1846 goto bail;
1847 }
1848
1849 /* The super block lock path is really in the best position to
1850 * know when resources covered by the lock need to be
1851 * refreshed, so we do it here. Of course, making sense of
1852 * everything is up to the caller :) */
1853 status = ocfs2_should_refresh_lock_res(lockres);
1854 if (status < 0) {
1855 mlog_errno(status);
1856 goto bail;
1857 }
1858 if (status) {
1859 bh = si->si_bh;
1860 status = ocfs2_read_block(osb, bh->b_blocknr, &bh, 0,
1861 si->si_inode);
1862 if (status == 0)
1863 ocfs2_update_slot_info(si);
1864
1865 ocfs2_complete_lock_res_refresh(lockres, status);
1866
1867 if (status < 0)
1868 mlog_errno(status);
1869 }
1870bail:
1871 mlog_exit(status);
1872 return status;
1873}
1874
1875void ocfs2_super_unlock(struct ocfs2_super *osb,
1876 int ex)
1877{
1878 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1879 struct ocfs2_lock_res *lockres = &osb->osb_super_lockres;
1880
1881 ocfs2_cluster_unlock(osb, lockres, level);
1882}
1883
1884int ocfs2_rename_lock(struct ocfs2_super *osb)
1885{
1886 int status;
1887 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
1888
1889 if (ocfs2_is_hard_readonly(osb))
1890 return -EROFS;
1891
1892 status = ocfs2_cluster_lock(osb, lockres, LKM_EXMODE, 0, 0);
1893 if (status < 0)
1894 mlog_errno(status);
1895
1896 return status;
1897}
1898
1899void ocfs2_rename_unlock(struct ocfs2_super *osb)
1900{
1901 struct ocfs2_lock_res *lockres = &osb->osb_rename_lockres;
1902
1903 ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
1904}
1905
d680efe9
MF
1906int ocfs2_dentry_lock(struct dentry *dentry, int ex)
1907{
1908 int ret;
1909 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1910 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1911 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1912
1913 BUG_ON(!dl);
1914
1915 if (ocfs2_is_hard_readonly(osb))
1916 return -EROFS;
1917
1918 ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
1919 if (ret < 0)
1920 mlog_errno(ret);
1921
1922 return ret;
1923}
1924
1925void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
1926{
1927 int level = ex ? LKM_EXMODE : LKM_PRMODE;
1928 struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
1929 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
1930
1931 ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
1932}
1933
ccd979bd
MF
1934/* Reference counting of the dlm debug structure. We want this because
1935 * open references on the debug inodes can live on after a mount, so
1936 * we can't rely on the ocfs2_super to always exist. */
1937static void ocfs2_dlm_debug_free(struct kref *kref)
1938{
1939 struct ocfs2_dlm_debug *dlm_debug;
1940
1941 dlm_debug = container_of(kref, struct ocfs2_dlm_debug, d_refcnt);
1942
1943 kfree(dlm_debug);
1944}
1945
1946void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug)
1947{
1948 if (dlm_debug)
1949 kref_put(&dlm_debug->d_refcnt, ocfs2_dlm_debug_free);
1950}
1951
1952static void ocfs2_get_dlm_debug(struct ocfs2_dlm_debug *debug)
1953{
1954 kref_get(&debug->d_refcnt);
1955}
1956
1957struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
1958{
1959 struct ocfs2_dlm_debug *dlm_debug;
1960
1961 dlm_debug = kmalloc(sizeof(struct ocfs2_dlm_debug), GFP_KERNEL);
1962 if (!dlm_debug) {
1963 mlog_errno(-ENOMEM);
1964 goto out;
1965 }
1966
1967 kref_init(&dlm_debug->d_refcnt);
1968 INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
1969 dlm_debug->d_locking_state = NULL;
1970out:
1971 return dlm_debug;
1972}
1973
1974/* Access to this is arbitrated for us via seq_file->sem. */
1975struct ocfs2_dlm_seq_priv {
1976 struct ocfs2_dlm_debug *p_dlm_debug;
1977 struct ocfs2_lock_res p_iter_res;
1978 struct ocfs2_lock_res p_tmp_res;
1979};
1980
1981static struct ocfs2_lock_res *ocfs2_dlm_next_res(struct ocfs2_lock_res *start,
1982 struct ocfs2_dlm_seq_priv *priv)
1983{
1984 struct ocfs2_lock_res *iter, *ret = NULL;
1985 struct ocfs2_dlm_debug *dlm_debug = priv->p_dlm_debug;
1986
1987 assert_spin_locked(&ocfs2_dlm_tracking_lock);
1988
1989 list_for_each_entry(iter, &start->l_debug_list, l_debug_list) {
1990 /* discover the head of the list */
1991 if (&iter->l_debug_list == &dlm_debug->d_lockres_tracking) {
1992 mlog(0, "End of list found, %p\n", ret);
1993 break;
1994 }
1995
1996 /* We track our "dummy" iteration lockres' by a NULL
1997 * l_ops field. */
1998 if (iter->l_ops != NULL) {
1999 ret = iter;
2000 break;
2001 }
2002 }
2003
2004 return ret;
2005}
2006
2007static void *ocfs2_dlm_seq_start(struct seq_file *m, loff_t *pos)
2008{
2009 struct ocfs2_dlm_seq_priv *priv = m->private;
2010 struct ocfs2_lock_res *iter;
2011
2012 spin_lock(&ocfs2_dlm_tracking_lock);
2013 iter = ocfs2_dlm_next_res(&priv->p_iter_res, priv);
2014 if (iter) {
2015 /* Since lockres' have the lifetime of their container
2016 * (which can be inodes, ocfs2_supers, etc) we want to
2017 * copy this out to a temporary lockres while still
2018 * under the spinlock. Obviously after this we can't
2019 * trust any pointers on the copy returned, but that's
2020 * ok as the information we want isn't typically held
2021 * in them. */
2022 priv->p_tmp_res = *iter;
2023 iter = &priv->p_tmp_res;
2024 }
2025 spin_unlock(&ocfs2_dlm_tracking_lock);
2026
2027 return iter;
2028}
2029
2030static void ocfs2_dlm_seq_stop(struct seq_file *m, void *v)
2031{
2032}
2033
2034static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
2035{
2036 struct ocfs2_dlm_seq_priv *priv = m->private;
2037 struct ocfs2_lock_res *iter = v;
2038 struct ocfs2_lock_res *dummy = &priv->p_iter_res;
2039
2040 spin_lock(&ocfs2_dlm_tracking_lock);
2041 iter = ocfs2_dlm_next_res(iter, priv);
2042 list_del_init(&dummy->l_debug_list);
2043 if (iter) {
2044 list_add(&dummy->l_debug_list, &iter->l_debug_list);
2045 priv->p_tmp_res = *iter;
2046 iter = &priv->p_tmp_res;
2047 }
2048 spin_unlock(&ocfs2_dlm_tracking_lock);
2049
2050 return iter;
2051}
2052
2053/* So that debugfs.ocfs2 can determine which format is being used */
2054#define OCFS2_DLM_DEBUG_STR_VERSION 1
2055static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
2056{
2057 int i;
2058 char *lvb;
2059 struct ocfs2_lock_res *lockres = v;
2060
2061 if (!lockres)
2062 return -EINVAL;
2063
d680efe9
MF
2064 seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
2065
2066 if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
2067 seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
2068 lockres->l_name,
2069 (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
2070 else
2071 seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
2072
2073 seq_printf(m, "%d\t"
ccd979bd
MF
2074 "0x%lx\t"
2075 "0x%x\t"
2076 "0x%x\t"
2077 "%u\t"
2078 "%u\t"
2079 "%d\t"
2080 "%d\t",
ccd979bd
MF
2081 lockres->l_level,
2082 lockres->l_flags,
2083 lockres->l_action,
2084 lockres->l_unlock_action,
2085 lockres->l_ro_holders,
2086 lockres->l_ex_holders,
2087 lockres->l_requested,
2088 lockres->l_blocking);
2089
2090 /* Dump the raw LVB */
2091 lvb = lockres->l_lksb.lvb;
2092 for(i = 0; i < DLM_LVB_LEN; i++)
2093 seq_printf(m, "0x%x\t", lvb[i]);
2094
2095 /* End the line */
2096 seq_printf(m, "\n");
2097 return 0;
2098}
2099
2100static struct seq_operations ocfs2_dlm_seq_ops = {
2101 .start = ocfs2_dlm_seq_start,
2102 .stop = ocfs2_dlm_seq_stop,
2103 .next = ocfs2_dlm_seq_next,
2104 .show = ocfs2_dlm_seq_show,
2105};
2106
2107static int ocfs2_dlm_debug_release(struct inode *inode, struct file *file)
2108{
2109 struct seq_file *seq = (struct seq_file *) file->private_data;
2110 struct ocfs2_dlm_seq_priv *priv = seq->private;
2111 struct ocfs2_lock_res *res = &priv->p_iter_res;
2112
2113 ocfs2_remove_lockres_tracking(res);
2114 ocfs2_put_dlm_debug(priv->p_dlm_debug);
2115 return seq_release_private(inode, file);
2116}
2117
2118static int ocfs2_dlm_debug_open(struct inode *inode, struct file *file)
2119{
2120 int ret;
2121 struct ocfs2_dlm_seq_priv *priv;
2122 struct seq_file *seq;
2123 struct ocfs2_super *osb;
2124
2125 priv = kzalloc(sizeof(struct ocfs2_dlm_seq_priv), GFP_KERNEL);
2126 if (!priv) {
2127 ret = -ENOMEM;
2128 mlog_errno(ret);
2129 goto out;
2130 }
2131 osb = (struct ocfs2_super *) inode->u.generic_ip;
2132 ocfs2_get_dlm_debug(osb->osb_dlm_debug);
2133 priv->p_dlm_debug = osb->osb_dlm_debug;
2134 INIT_LIST_HEAD(&priv->p_iter_res.l_debug_list);
2135
2136 ret = seq_open(file, &ocfs2_dlm_seq_ops);
2137 if (ret) {
2138 kfree(priv);
2139 mlog_errno(ret);
2140 goto out;
2141 }
2142
2143 seq = (struct seq_file *) file->private_data;
2144 seq->private = priv;
2145
2146 ocfs2_add_lockres_tracking(&priv->p_iter_res,
2147 priv->p_dlm_debug);
2148
2149out:
2150 return ret;
2151}
2152
4b6f5d20 2153static const struct file_operations ocfs2_dlm_debug_fops = {
ccd979bd
MF
2154 .open = ocfs2_dlm_debug_open,
2155 .release = ocfs2_dlm_debug_release,
2156 .read = seq_read,
2157 .llseek = seq_lseek,
2158};
2159
2160static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
2161{
2162 int ret = 0;
2163 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2164
2165 dlm_debug->d_locking_state = debugfs_create_file("locking_state",
2166 S_IFREG|S_IRUSR,
2167 osb->osb_debug_root,
2168 osb,
2169 &ocfs2_dlm_debug_fops);
2170 if (!dlm_debug->d_locking_state) {
2171 ret = -EINVAL;
2172 mlog(ML_ERROR,
2173 "Unable to create locking state debugfs file.\n");
2174 goto out;
2175 }
2176
2177 ocfs2_get_dlm_debug(dlm_debug);
2178out:
2179 return ret;
2180}
2181
2182static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
2183{
2184 struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
2185
2186 if (dlm_debug) {
2187 debugfs_remove(dlm_debug->d_locking_state);
2188 ocfs2_put_dlm_debug(dlm_debug);
2189 }
2190}
2191
2192int ocfs2_dlm_init(struct ocfs2_super *osb)
2193{
2194 int status;
2195 u32 dlm_key;
2196 struct dlm_ctxt *dlm;
2197
2198 mlog_entry_void();
2199
2200 status = ocfs2_dlm_init_debug(osb);
2201 if (status < 0) {
2202 mlog_errno(status);
2203 goto bail;
2204 }
2205
2206 /* launch vote thread */
78427043 2207 osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote");
ccd979bd
MF
2208 if (IS_ERR(osb->vote_task)) {
2209 status = PTR_ERR(osb->vote_task);
2210 osb->vote_task = NULL;
2211 mlog_errno(status);
2212 goto bail;
2213 }
2214
2215 /* used by the dlm code to make message headers unique, each
2216 * node in this domain must agree on this. */
2217 dlm_key = crc32_le(0, osb->uuid_str, strlen(osb->uuid_str));
2218
2219 /* for now, uuid == domain */
2220 dlm = dlm_register_domain(osb->uuid_str, dlm_key);
2221 if (IS_ERR(dlm)) {
2222 status = PTR_ERR(dlm);
2223 mlog_errno(status);
2224 goto bail;
2225 }
2226
2227 ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
2228 ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
2229
2230 dlm_register_eviction_cb(dlm, &osb->osb_eviction_cb);
2231
2232 osb->dlm = dlm;
2233
2234 status = 0;
2235bail:
2236 if (status < 0) {
2237 ocfs2_dlm_shutdown_debug(osb);
2238 if (osb->vote_task)
2239 kthread_stop(osb->vote_task);
2240 }
2241
2242 mlog_exit(status);
2243 return status;
2244}
2245
2246void ocfs2_dlm_shutdown(struct ocfs2_super *osb)
2247{
2248 mlog_entry_void();
2249
2250 dlm_unregister_eviction_cb(&osb->osb_eviction_cb);
2251
2252 ocfs2_drop_osb_locks(osb);
2253
2254 if (osb->vote_task) {
2255 kthread_stop(osb->vote_task);
2256 osb->vote_task = NULL;
2257 }
2258
2259 ocfs2_lock_res_free(&osb->osb_super_lockres);
2260 ocfs2_lock_res_free(&osb->osb_rename_lockres);
2261
2262 dlm_unregister_domain(osb->dlm);
2263 osb->dlm = NULL;
2264
2265 ocfs2_dlm_shutdown_debug(osb);
2266
2267 mlog_exit_void();
2268}
2269
2270static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status)
2271{
2272 struct ocfs2_lock_res *lockres = opaque;
2273 unsigned long flags;
2274
2275 mlog_entry_void();
2276
2277 mlog(0, "UNLOCK AST called on lock %s, action = %d\n", lockres->l_name,
2278 lockres->l_unlock_action);
2279
2280 spin_lock_irqsave(&lockres->l_lock, flags);
2281 /* We tried to cancel a convert request, but it was already
2282 * granted. All we want to do here is clear our unlock
2283 * state. The wake_up call done at the bottom is redundant
2284 * (ocfs2_prepare_cancel_convert doesn't sleep on this) but doesn't
2285 * hurt anything anyway */
2286 if (status == DLM_CANCELGRANT &&
2287 lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
2288 mlog(0, "Got cancelgrant for %s\n", lockres->l_name);
2289
2290 /* We don't clear the busy flag in this case as it
2291 * should have been cleared by the ast which the dlm
2292 * has called. */
2293 goto complete_unlock;
2294 }
2295
2296 if (status != DLM_NORMAL) {
2297 mlog(ML_ERROR, "Dlm passes status %d for lock %s, "
2298 "unlock_action %d\n", status, lockres->l_name,
2299 lockres->l_unlock_action);
2300 spin_unlock_irqrestore(&lockres->l_lock, flags);
2301 return;
2302 }
2303
2304 switch(lockres->l_unlock_action) {
2305 case OCFS2_UNLOCK_CANCEL_CONVERT:
2306 mlog(0, "Cancel convert success for %s\n", lockres->l_name);
2307 lockres->l_action = OCFS2_AST_INVALID;
2308 break;
2309 case OCFS2_UNLOCK_DROP_LOCK:
2310 lockres->l_level = LKM_IVMODE;
2311 break;
2312 default:
2313 BUG();
2314 }
2315
2316 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
2317complete_unlock:
2318 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
2319 spin_unlock_irqrestore(&lockres->l_lock, flags);
2320
2321 wake_up(&lockres->l_event);
2322
2323 mlog_exit_void();
2324}
2325
2326typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *);
2327
2328struct drop_lock_cb {
2329 ocfs2_pre_drop_cb_t *drop_func;
2330 void *drop_data;
2331};
2332
2333static int ocfs2_drop_lock(struct ocfs2_super *osb,
2334 struct ocfs2_lock_res *lockres,
2335 struct drop_lock_cb *dcb)
2336{
2337 enum dlm_status status;
2338 unsigned long flags;
2339
2340 /* We didn't get anywhere near actually using this lockres. */
2341 if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
2342 goto out;
2343
2344 spin_lock_irqsave(&lockres->l_lock, flags);
2345
2346 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
2347 "lockres %s, flags 0x%lx\n",
2348 lockres->l_name, lockres->l_flags);
2349
2350 while (lockres->l_flags & OCFS2_LOCK_BUSY) {
2351 mlog(0, "waiting on busy lock \"%s\": flags = %lx, action = "
2352 "%u, unlock_action = %u\n",
2353 lockres->l_name, lockres->l_flags, lockres->l_action,
2354 lockres->l_unlock_action);
2355
2356 spin_unlock_irqrestore(&lockres->l_lock, flags);
2357
2358 /* XXX: Today we just wait on any busy
2359 * locks... Perhaps we need to cancel converts in the
2360 * future? */
2361 ocfs2_wait_on_busy_lock(lockres);
2362
2363 spin_lock_irqsave(&lockres->l_lock, flags);
2364 }
2365
2366 if (dcb)
2367 dcb->drop_func(lockres, dcb->drop_data);
2368
2369 if (lockres->l_flags & OCFS2_LOCK_BUSY)
2370 mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
2371 lockres->l_name);
2372 if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
2373 mlog(0, "destroying blocked lock: \"%s\"\n", lockres->l_name);
2374
2375 if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
2376 spin_unlock_irqrestore(&lockres->l_lock, flags);
2377 goto out;
2378 }
2379
2380 lockres_clear_flags(lockres, OCFS2_LOCK_ATTACHED);
2381
2382 /* make sure we never get here while waiting for an ast to
2383 * fire. */
2384 BUG_ON(lockres->l_action != OCFS2_AST_INVALID);
2385
2386 /* is this necessary? */
2387 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
2388 lockres->l_unlock_action = OCFS2_UNLOCK_DROP_LOCK;
2389 spin_unlock_irqrestore(&lockres->l_lock, flags);
2390
2391 mlog(0, "lock %s\n", lockres->l_name);
2392
2393 status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK,
2394 lockres->l_ops->unlock_ast, lockres);
2395 if (status != DLM_NORMAL) {
2396 ocfs2_log_dlm_error("dlmunlock", status, lockres);
2397 mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
2398 dlm_print_one_lock(lockres->l_lksb.lockid);
2399 BUG();
2400 }
2401 mlog(0, "lock %s, successfull return from dlmunlock\n",
2402 lockres->l_name);
2403
2404 ocfs2_wait_on_busy_lock(lockres);
2405out:
2406 mlog_exit(0);
2407 return 0;
2408}
2409
2410/* Mark the lockres as being dropped. It will no longer be
2411 * queued if blocking, but we still may have to wait on it
2412 * being dequeued from the vote thread before we can consider
2413 * it safe to drop.
2414 *
2415 * You can *not* attempt to call cluster_lock on this lockres anymore. */
2416void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
2417{
2418 int status;
2419 struct ocfs2_mask_waiter mw;
2420 unsigned long flags;
2421
2422 ocfs2_init_mask_waiter(&mw);
2423
2424 spin_lock_irqsave(&lockres->l_lock, flags);
2425 lockres->l_flags |= OCFS2_LOCK_FREEING;
2426 while (lockres->l_flags & OCFS2_LOCK_QUEUED) {
2427 lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0);
2428 spin_unlock_irqrestore(&lockres->l_lock, flags);
2429
2430 mlog(0, "Waiting on lockres %s\n", lockres->l_name);
2431
2432 status = ocfs2_wait_for_mask(&mw);
2433 if (status)
2434 mlog_errno(status);
2435
2436 spin_lock_irqsave(&lockres->l_lock, flags);
2437 }
2438 spin_unlock_irqrestore(&lockres->l_lock, flags);
2439}
2440
d680efe9
MF
2441void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
2442 struct ocfs2_lock_res *lockres)
ccd979bd 2443{
d680efe9 2444 int ret;
ccd979bd 2445
d680efe9
MF
2446 ocfs2_mark_lockres_freeing(lockres);
2447 ret = ocfs2_drop_lock(osb, lockres, NULL);
2448 if (ret)
2449 mlog_errno(ret);
2450}
ccd979bd 2451
d680efe9
MF
2452static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
2453{
2454 ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
2455 ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
ccd979bd
MF
2456}
2457
2458static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data)
2459{
2460 struct inode *inode = data;
2461
2462 /* the metadata lock requires a bit more work as we have an
2463 * LVB to worry about. */
2464 if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
2465 lockres->l_level == LKM_EXMODE &&
2466 !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
2467 __ocfs2_stuff_meta_lvb(inode);
2468}
2469
2470int ocfs2_drop_inode_locks(struct inode *inode)
2471{
2472 int status, err;
2473 struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, };
2474
2475 mlog_entry_void();
2476
2477 /* No need to call ocfs2_mark_lockres_freeing here -
2478 * ocfs2_clear_inode has done it for us. */
2479
2480 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2481 &OCFS2_I(inode)->ip_data_lockres,
2482 NULL);
2483 if (err < 0)
2484 mlog_errno(err);
2485
2486 status = err;
2487
2488 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2489 &OCFS2_I(inode)->ip_meta_lockres,
2490 &meta_dcb);
2491 if (err < 0)
2492 mlog_errno(err);
2493 if (err < 0 && !status)
2494 status = err;
2495
2496 err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
2497 &OCFS2_I(inode)->ip_rw_lockres,
2498 NULL);
2499 if (err < 0)
2500 mlog_errno(err);
2501 if (err < 0 && !status)
2502 status = err;
2503
2504 mlog_exit(status);
2505 return status;
2506}
2507
2508static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres,
2509 int new_level)
2510{
2511 assert_spin_locked(&lockres->l_lock);
2512
2513 BUG_ON(lockres->l_blocking <= LKM_NLMODE);
2514
2515 if (lockres->l_level <= new_level) {
2516 mlog(ML_ERROR, "lockres->l_level (%u) <= new_level (%u)\n",
2517 lockres->l_level, new_level);
2518 BUG();
2519 }
2520
2521 mlog(0, "lock %s, new_level = %d, l_blocking = %d\n",
2522 lockres->l_name, new_level, lockres->l_blocking);
2523
2524 lockres->l_action = OCFS2_AST_DOWNCONVERT;
2525 lockres->l_requested = new_level;
2526 lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
2527}
2528
2529static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
2530 struct ocfs2_lock_res *lockres,
2531 int new_level,
2532 int lvb)
2533{
2534 int ret, dlm_flags = LKM_CONVERT;
2535 enum dlm_status status;
2536
2537 mlog_entry_void();
2538
2539 if (lvb)
2540 dlm_flags |= LKM_VALBLK;
2541
2542 status = dlmlock(osb->dlm,
2543 new_level,
2544 &lockres->l_lksb,
2545 dlm_flags,
2546 lockres->l_name,
f0681062 2547 OCFS2_LOCK_ID_MAX_LEN - 1,
ccd979bd
MF
2548 lockres->l_ops->ast,
2549 lockres,
2550 lockres->l_ops->bast);
2551 if (status != DLM_NORMAL) {
2552 ocfs2_log_dlm_error("dlmlock", status, lockres);
2553 ret = -EINVAL;
2554 ocfs2_recover_from_dlm_error(lockres, 1);
2555 goto bail;
2556 }
2557
2558 ret = 0;
2559bail:
2560 mlog_exit(ret);
2561 return ret;
2562}
2563
2564/* returns 1 when the caller should unlock and call dlmunlock */
2565static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb,
2566 struct ocfs2_lock_res *lockres)
2567{
2568 assert_spin_locked(&lockres->l_lock);
2569
2570 mlog_entry_void();
2571 mlog(0, "lock %s\n", lockres->l_name);
2572
2573 if (lockres->l_unlock_action == OCFS2_UNLOCK_CANCEL_CONVERT) {
2574 /* If we're already trying to cancel a lock conversion
2575 * then just drop the spinlock and allow the caller to
2576 * requeue this lock. */
2577
2578 mlog(0, "Lockres %s, skip convert\n", lockres->l_name);
2579 return 0;
2580 }
2581
2582 /* were we in a convert when we got the bast fire? */
2583 BUG_ON(lockres->l_action != OCFS2_AST_CONVERT &&
2584 lockres->l_action != OCFS2_AST_DOWNCONVERT);
2585 /* set things up for the unlockast to know to just
2586 * clear out the ast_action and unset busy, etc. */
2587 lockres->l_unlock_action = OCFS2_UNLOCK_CANCEL_CONVERT;
2588
2589 mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_BUSY),
2590 "lock %s, invalid flags: 0x%lx\n",
2591 lockres->l_name, lockres->l_flags);
2592
2593 return 1;
2594}
2595
2596static int ocfs2_cancel_convert(struct ocfs2_super *osb,
2597 struct ocfs2_lock_res *lockres)
2598{
2599 int ret;
2600 enum dlm_status status;
2601
2602 mlog_entry_void();
2603 mlog(0, "lock %s\n", lockres->l_name);
2604
2605 ret = 0;
2606 status = dlmunlock(osb->dlm,
2607 &lockres->l_lksb,
2608 LKM_CANCEL,
2609 lockres->l_ops->unlock_ast,
2610 lockres);
2611 if (status != DLM_NORMAL) {
2612 ocfs2_log_dlm_error("dlmunlock", status, lockres);
2613 ret = -EINVAL;
2614 ocfs2_recover_from_dlm_error(lockres, 0);
2615 }
2616
2617 mlog(0, "lock %s return from dlmunlock\n", lockres->l_name);
2618
2619 mlog_exit(ret);
2620 return ret;
2621}
2622
2623static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
2624 struct ocfs2_lock_res *lockres,
2625 int new_level)
2626{
2627 int ret;
2628
2629 mlog_entry_void();
2630
2631 BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
2632
2633 if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
2634 ret = 0;
2635 mlog(0, "lockres %s currently being refreshed -- backing "
2636 "off!\n", lockres->l_name);
2637 } else if (new_level == LKM_PRMODE)
2638 ret = !lockres->l_ex_holders &&
2639 ocfs2_inode_fully_checkpointed(inode);
2640 else /* Must be NLMODE we're converting to. */
2641 ret = !lockres->l_ro_holders && !lockres->l_ex_holders &&
2642 ocfs2_inode_fully_checkpointed(inode);
2643
2644 mlog_exit(ret);
2645 return ret;
2646}
2647
2648static int ocfs2_do_unblock_meta(struct inode *inode,
2649 int *requeue)
2650{
2651 int new_level;
2652 int set_lvb = 0;
2653 int ret = 0;
2654 struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
2655 unsigned long flags;
2656
2657 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2658
2659 mlog_entry_void();
2660
2661 spin_lock_irqsave(&lockres->l_lock, flags);
2662
2663 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
2664
2665 mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level,
2666 lockres->l_blocking);
2667
2668 BUG_ON(lockres->l_level != LKM_EXMODE &&
2669 lockres->l_level != LKM_PRMODE);
2670
2671 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
2672 *requeue = 1;
2673 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2674 spin_unlock_irqrestore(&lockres->l_lock, flags);
2675 if (ret) {
2676 ret = ocfs2_cancel_convert(osb, lockres);
2677 if (ret < 0)
2678 mlog_errno(ret);
2679 }
2680 goto leave;
2681 }
2682
2683 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
2684
2685 mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n",
2686 lockres->l_level, lockres->l_blocking, new_level);
2687
2688 if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) {
2689 if (lockres->l_level == LKM_EXMODE)
2690 set_lvb = 1;
2691
2692 /* If the lock hasn't been refreshed yet (rare), then
2693 * our memory inode values are old and we skip
2694 * stuffing the lvb. There's no need to actually clear
2695 * out the lvb here as it's value is still valid. */
2696 if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
2697 if (set_lvb)
2698 __ocfs2_stuff_meta_lvb(inode);
2699 } else
2700 mlog(0, "lockres %s: downconverting stale lock!\n",
2701 lockres->l_name);
2702
2703 mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, "
2704 "l_blocking=%d, new_level=%d\n",
2705 lockres->l_level, lockres->l_blocking, new_level);
2706
2707 ocfs2_prepare_downconvert(lockres, new_level);
2708 spin_unlock_irqrestore(&lockres->l_lock, flags);
2709 ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
2710 goto leave;
2711 }
2712 if (!ocfs2_inode_fully_checkpointed(inode))
2713 ocfs2_start_checkpoint(osb);
2714
2715 *requeue = 1;
2716 spin_unlock_irqrestore(&lockres->l_lock, flags);
2717 ret = 0;
2718leave:
2719 mlog_exit(ret);
2720 return ret;
2721}
2722
2723static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
2724 struct ocfs2_lock_res *lockres,
d680efe9 2725 struct ocfs2_unblock_ctl *ctl,
ccd979bd
MF
2726 ocfs2_convert_worker_t *worker)
2727{
2728 unsigned long flags;
2729 int blocking;
2730 int new_level;
2731 int ret = 0;
2732
2733 mlog_entry_void();
2734
2735 spin_lock_irqsave(&lockres->l_lock, flags);
2736
2737 BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
2738
2739recheck:
2740 if (lockres->l_flags & OCFS2_LOCK_BUSY) {
d680efe9 2741 ctl->requeue = 1;
ccd979bd
MF
2742 ret = ocfs2_prepare_cancel_convert(osb, lockres);
2743 spin_unlock_irqrestore(&lockres->l_lock, flags);
2744 if (ret) {
2745 ret = ocfs2_cancel_convert(osb, lockres);
2746 if (ret < 0)
2747 mlog_errno(ret);
2748 }
2749 goto leave;
2750 }
2751
2752 /* if we're blocking an exclusive and we have *any* holders,
2753 * then requeue. */
2754 if ((lockres->l_blocking == LKM_EXMODE)
2755 && (lockres->l_ex_holders || lockres->l_ro_holders)) {
2756 spin_unlock_irqrestore(&lockres->l_lock, flags);
d680efe9 2757 ctl->requeue = 1;
ccd979bd
MF
2758 ret = 0;
2759 goto leave;
2760 }
2761
2762 /* If it's a PR we're blocking, then only
2763 * requeue if we've got any EX holders */
2764 if (lockres->l_blocking == LKM_PRMODE &&
2765 lockres->l_ex_holders) {
2766 spin_unlock_irqrestore(&lockres->l_lock, flags);
d680efe9 2767 ctl->requeue = 1;
ccd979bd
MF
2768 ret = 0;
2769 goto leave;
2770 }
2771
2772 /* If we get here, then we know that there are no more
2773 * incompatible holders (and anyone asking for an incompatible
2774 * lock is blocked). We can now downconvert the lock */
2775 if (!worker)
2776 goto downconvert;
2777
2778 /* Some lockres types want to do a bit of work before
2779 * downconverting a lock. Allow that here. The worker function
2780 * may sleep, so we save off a copy of what we're blocking as
2781 * it may change while we're not holding the spin lock. */
2782 blocking = lockres->l_blocking;
2783 spin_unlock_irqrestore(&lockres->l_lock, flags);
2784
d680efe9
MF
2785 ctl->unblock_action = worker(lockres, blocking);
2786
2787 if (ctl->unblock_action == UNBLOCK_STOP_POST)
2788 goto leave;
ccd979bd
MF
2789
2790 spin_lock_irqsave(&lockres->l_lock, flags);
2791 if (blocking != lockres->l_blocking) {
2792 /* If this changed underneath us, then we can't drop
2793 * it just yet. */
2794 goto recheck;
2795 }
2796
2797downconvert:
d680efe9 2798 ctl->requeue = 0;
ccd979bd
MF
2799 new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
2800
2801 ocfs2_prepare_downconvert(lockres, new_level);
2802 spin_unlock_irqrestore(&lockres->l_lock, flags);
2803 ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0);
2804leave:
2805 mlog_exit(ret);
2806 return ret;
2807}
2808
d680efe9
MF
2809static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
2810 int blocking)
ccd979bd
MF
2811{
2812 struct inode *inode;
2813 struct address_space *mapping;
2814
ccd979bd
MF
2815 inode = ocfs2_lock_res_inode(lockres);
2816 mapping = inode->i_mapping;
2817
2818 if (filemap_fdatawrite(mapping)) {
b0697053
MF
2819 mlog(ML_ERROR, "Could not sync inode %llu for downconvert!",
2820 (unsigned long long)OCFS2_I(inode)->ip_blkno);
ccd979bd
MF
2821 }
2822 sync_mapping_buffers(mapping);
2823 if (blocking == LKM_EXMODE) {
2824 truncate_inode_pages(mapping, 0);
2825 unmap_mapping_range(mapping, 0, 0, 0);
2826 } else {
2827 /* We only need to wait on the I/O if we're not also
2828 * truncating pages because truncate_inode_pages waits
2829 * for us above. We don't truncate pages if we're
2830 * blocking anything < EXMODE because we want to keep
2831 * them around in that case. */
2832 filemap_fdatawait(mapping);
2833 }
2834
d680efe9 2835 return UNBLOCK_CONTINUE;
ccd979bd
MF
2836}
2837
2838int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
d680efe9 2839 struct ocfs2_unblock_ctl *ctl)
ccd979bd
MF
2840{
2841 int status;
2842 struct inode *inode;
2843 struct ocfs2_super *osb;
2844
2845 mlog_entry_void();
2846
2847 inode = ocfs2_lock_res_inode(lockres);
2848 osb = OCFS2_SB(inode->i_sb);
2849
b0697053
MF
2850 mlog(0, "unblock inode %llu\n",
2851 (unsigned long long)OCFS2_I(inode)->ip_blkno);
ccd979bd 2852
d680efe9 2853 status = ocfs2_generic_unblock_lock(osb, lockres, ctl,
ccd979bd
MF
2854 ocfs2_data_convert_worker);
2855 if (status < 0)
2856 mlog_errno(status);
2857
b0697053 2858 mlog(0, "inode %llu, requeue = %d\n",
d680efe9 2859 (unsigned long long)OCFS2_I(inode)->ip_blkno, ctl->requeue);
ccd979bd
MF
2860
2861 mlog_exit(status);
2862 return status;
2863}
2864
2865static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
d680efe9 2866 struct ocfs2_unblock_ctl *ctl)
ccd979bd
MF
2867{
2868 int status;
2869 struct inode *inode;
2870
2871 mlog_entry_void();
2872
2873 mlog(0, "Unblock lockres %s\n", lockres->l_name);
2874
2875 inode = ocfs2_lock_res_inode(lockres);
2876
2877 status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb),
d680efe9 2878 lockres, ctl, NULL);
ccd979bd
MF
2879 if (status < 0)
2880 mlog_errno(status);
2881
2882 mlog_exit(status);
2883 return status;
2884}
2885
d680efe9
MF
2886static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
2887 struct ocfs2_unblock_ctl *ctl)
ccd979bd
MF
2888{
2889 int status;
2890 struct inode *inode;
2891
2892 mlog_entry_void();
2893
2894 inode = ocfs2_lock_res_inode(lockres);
2895
b0697053
MF
2896 mlog(0, "unblock inode %llu\n",
2897 (unsigned long long)OCFS2_I(inode)->ip_blkno);
ccd979bd 2898
d680efe9 2899 status = ocfs2_do_unblock_meta(inode, &ctl->requeue);
ccd979bd
MF
2900 if (status < 0)
2901 mlog_errno(status);
2902
b0697053 2903 mlog(0, "inode %llu, requeue = %d\n",
d680efe9 2904 (unsigned long long)OCFS2_I(inode)->ip_blkno, ctl->requeue);
ccd979bd
MF
2905
2906 mlog_exit(status);
2907 return status;
2908}
2909
d680efe9
MF
2910/*
2911 * Does the final reference drop on our dentry lock. Right now this
2912 * happens in the vote thread, but we could choose to simplify the
2913 * dlmglue API and push these off to the ocfs2_wq in the future.
2914 */
2915static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
2916 struct ocfs2_lock_res *lockres)
2917{
2918 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
2919 ocfs2_dentry_lock_put(osb, dl);
2920}
2921
2922/*
2923 * d_delete() matching dentries before the lock downconvert.
2924 *
2925 * At this point, any process waiting to destroy the
2926 * dentry_lock due to last ref count is stopped by the
2927 * OCFS2_LOCK_QUEUED flag.
2928 *
2929 * We have two potential problems
2930 *
2931 * 1) If we do the last reference drop on our dentry_lock (via dput)
2932 * we'll wind up in ocfs2_release_dentry_lock(), waiting on
2933 * the downconvert to finish. Instead we take an elevated
2934 * reference and push the drop until after we've completed our
2935 * unblock processing.
2936 *
2937 * 2) There might be another process with a final reference,
2938 * waiting on us to finish processing. If this is the case, we
2939 * detect it and exit out - there's no more dentries anyway.
2940 */
2941static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
2942 int blocking)
2943{
2944 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
2945 struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
2946 struct dentry *dentry;
2947 unsigned long flags;
2948 int extra_ref = 0;
2949
2950 /*
2951 * This node is blocking another node from getting a read
2952 * lock. This happens when we've renamed within a
2953 * directory. We've forced the other nodes to d_delete(), but
2954 * we never actually dropped our lock because it's still
2955 * valid. The downconvert code will retain a PR for this node,
2956 * so there's no further work to do.
2957 */
2958 if (blocking == LKM_PRMODE)
2959 return UNBLOCK_CONTINUE;
2960
2961 /*
2962 * Mark this inode as potentially orphaned. The code in
2963 * ocfs2_delete_inode() will figure out whether it actually
2964 * needs to be freed or not.
2965 */
2966 spin_lock(&oi->ip_lock);
2967 oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
2968 spin_unlock(&oi->ip_lock);
2969
2970 /*
2971 * Yuck. We need to make sure however that the check of
2972 * OCFS2_LOCK_FREEING and the extra reference are atomic with
2973 * respect to a reference decrement or the setting of that
2974 * flag.
2975 */
2976 spin_lock_irqsave(&lockres->l_lock, flags);
2977 spin_lock(&dentry_attach_lock);
2978 if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
2979 && dl->dl_count) {
2980 dl->dl_count++;
2981 extra_ref = 1;
2982 }
2983 spin_unlock(&dentry_attach_lock);
2984 spin_unlock_irqrestore(&lockres->l_lock, flags);
2985
2986 mlog(0, "extra_ref = %d\n", extra_ref);
2987
2988 /*
2989 * We have a process waiting on us in ocfs2_dentry_iput(),
2990 * which means we can't have any more outstanding
2991 * aliases. There's no need to do any more work.
2992 */
2993 if (!extra_ref)
2994 return UNBLOCK_CONTINUE;
2995
2996 spin_lock(&dentry_attach_lock);
2997 while (1) {
2998 dentry = ocfs2_find_local_alias(dl->dl_inode,
2999 dl->dl_parent_blkno, 1);
3000 if (!dentry)
3001 break;
3002 spin_unlock(&dentry_attach_lock);
3003
3004 mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
3005 dentry->d_name.name);
3006
3007 /*
3008 * The following dcache calls may do an
3009 * iput(). Normally we don't want that from the
3010 * downconverting thread, but in this case it's ok
3011 * because the requesting node already has an
3012 * exclusive lock on the inode, so it can't be queued
3013 * for a downconvert.
3014 */
3015 d_delete(dentry);
3016 dput(dentry);
3017
3018 spin_lock(&dentry_attach_lock);
3019 }
3020 spin_unlock(&dentry_attach_lock);
3021
3022 /*
3023 * If we are the last holder of this dentry lock, there is no
3024 * reason to downconvert so skip straight to the unlock.
3025 */
3026 if (dl->dl_count == 1)
3027 return UNBLOCK_STOP_POST;
3028
3029 return UNBLOCK_CONTINUE_POST;
3030}
3031
3032static int ocfs2_unblock_dentry_lock(struct ocfs2_lock_res *lockres,
3033 struct ocfs2_unblock_ctl *ctl)
3034{
3035 int ret;
3036 struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
3037 struct ocfs2_super *osb = OCFS2_SB(dl->dl_inode->i_sb);
3038
3039 mlog(0, "unblock dentry lock: %llu\n",
3040 (unsigned long long)OCFS2_I(dl->dl_inode)->ip_blkno);
3041
3042 ret = ocfs2_generic_unblock_lock(osb,
3043 lockres,
3044 ctl,
3045 ocfs2_dentry_convert_worker);
3046 if (ret < 0)
3047 mlog_errno(ret);
3048
3049 mlog(0, "requeue = %d, post = %d\n", ctl->requeue, ctl->unblock_action);
3050
3051 return ret;
3052}
3053
ccd979bd
MF
3054/* Generic unblock function for any lockres whose private data is an
3055 * ocfs2_super pointer. */
3056static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
d680efe9 3057 struct ocfs2_unblock_ctl *ctl)
ccd979bd
MF
3058{
3059 int status;
3060 struct ocfs2_super *osb;
3061
3062 mlog_entry_void();
3063
3064 mlog(0, "Unblock lockres %s\n", lockres->l_name);
3065
3066 osb = ocfs2_lock_res_super(lockres);
3067
3068 status = ocfs2_generic_unblock_lock(osb,
3069 lockres,
d680efe9 3070 ctl,
ccd979bd
MF
3071 NULL);
3072 if (status < 0)
3073 mlog_errno(status);
3074
3075 mlog_exit(status);
3076 return status;
3077}
3078
3079void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
3080 struct ocfs2_lock_res *lockres)
3081{
3082 int status;
d680efe9 3083 struct ocfs2_unblock_ctl ctl = {0, 0,};
ccd979bd
MF
3084 unsigned long flags;
3085
3086 /* Our reference to the lockres in this function can be
3087 * considered valid until we remove the OCFS2_LOCK_QUEUED
3088 * flag. */
3089
3090 mlog_entry_void();
3091
3092 BUG_ON(!lockres);
3093 BUG_ON(!lockres->l_ops);
3094 BUG_ON(!lockres->l_ops->unblock);
3095
3096 mlog(0, "lockres %s blocked.\n", lockres->l_name);
3097
3098 /* Detect whether a lock has been marked as going away while
3099 * the vote thread was processing other things. A lock can
3100 * still be marked with OCFS2_LOCK_FREEING after this check,
3101 * but short circuiting here will still save us some
3102 * performance. */
3103 spin_lock_irqsave(&lockres->l_lock, flags);
3104 if (lockres->l_flags & OCFS2_LOCK_FREEING)
3105 goto unqueue;
3106 spin_unlock_irqrestore(&lockres->l_lock, flags);
3107
d680efe9 3108 status = lockres->l_ops->unblock(lockres, &ctl);
ccd979bd
MF
3109 if (status < 0)
3110 mlog_errno(status);
3111
3112 spin_lock_irqsave(&lockres->l_lock, flags);
3113unqueue:
d680efe9 3114 if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
ccd979bd
MF
3115 lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
3116 } else
3117 ocfs2_schedule_blocked_lock(osb, lockres);
3118
3119 mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
d680efe9 3120 ctl.requeue ? "yes" : "no");
ccd979bd
MF
3121 spin_unlock_irqrestore(&lockres->l_lock, flags);
3122
d680efe9
MF
3123 if (ctl.unblock_action != UNBLOCK_CONTINUE
3124 && lockres->l_ops->post_unlock)
3125 lockres->l_ops->post_unlock(osb, lockres);
3126
ccd979bd
MF
3127 mlog_exit_void();
3128}
3129
3130static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
3131 struct ocfs2_lock_res *lockres)
3132{
3133 mlog_entry_void();
3134
3135 assert_spin_locked(&lockres->l_lock);
3136
3137 if (lockres->l_flags & OCFS2_LOCK_FREEING) {
3138 /* Do not schedule a lock for downconvert when it's on
3139 * the way to destruction - any nodes wanting access
3140 * to the resource will get it soon. */
3141 mlog(0, "Lockres %s won't be scheduled: flags 0x%lx\n",
3142 lockres->l_name, lockres->l_flags);
3143 return;
3144 }
3145
3146 lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
3147
3148 spin_lock(&osb->vote_task_lock);
3149 if (list_empty(&lockres->l_blocked_list)) {
3150 list_add_tail(&lockres->l_blocked_list,
3151 &osb->blocked_lock_list);
3152 osb->blocked_lock_count++;
3153 }
3154 spin_unlock(&osb->vote_task_lock);
3155
3156 mlog_exit_void();
3157}
3158
3159/* This aids in debugging situations where a bad LVB might be involved. */
3160void ocfs2_dump_meta_lvb_info(u64 level,
3161 const char *function,
3162 unsigned int line,
3163 struct ocfs2_lock_res *lockres)
3164{
3165 struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
3166
3167 mlog(level, "LVB information for %s (called from %s:%u):\n",
3168 lockres->l_name, function, line);
3169 mlog(level, "version: %u, clusters: %u\n",
4d3b83f7 3170 lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters));
b0697053
MF
3171 mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
3172 (unsigned long long)be64_to_cpu(lvb->lvb_isize),
3173 be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
3174 be16_to_cpu(lvb->lvb_imode));
3175 mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
ca4d147e 3176 "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
b0697053
MF
3177 (long long)be64_to_cpu(lvb->lvb_iatime_packed),
3178 (long long)be64_to_cpu(lvb->lvb_ictime_packed),
ca4d147e
HP
3179 (long long)be64_to_cpu(lvb->lvb_imtime_packed),
3180 be32_to_cpu(lvb->lvb_iattr));
ccd979bd 3181}