[XFS] implement generic xfs_btree_updkey
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / fs / xfs / xfs_ialloc.c
CommitLineData
1da177e4 1/*
7b718769
NS
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
1da177e4 4 *
7b718769
NS
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
1da177e4
LT
7 * published by the Free Software Foundation.
8 *
7b718769
NS
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
1da177e4 13 *
7b718769
NS
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1da177e4 17 */
1da177e4 18#include "xfs.h"
a844f451 19#include "xfs_fs.h"
1da177e4 20#include "xfs_types.h"
a844f451 21#include "xfs_bit.h"
1da177e4 22#include "xfs_log.h"
a844f451 23#include "xfs_inum.h"
1da177e4
LT
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
1da177e4
LT
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
1da177e4 30#include "xfs_bmap_btree.h"
a844f451 31#include "xfs_alloc_btree.h"
1da177e4 32#include "xfs_ialloc_btree.h"
1da177e4 33#include "xfs_dir2_sf.h"
a844f451 34#include "xfs_attr_sf.h"
1da177e4
LT
35#include "xfs_dinode.h"
36#include "xfs_inode.h"
a844f451
NS
37#include "xfs_btree.h"
38#include "xfs_ialloc.h"
1da177e4 39#include "xfs_alloc.h"
1da177e4
LT
40#include "xfs_rtalloc.h"
41#include "xfs_error.h"
42#include "xfs_bmap.h"
43
44/*
45 * Log specified fields for the inode given by bp and off.
46 */
47STATIC void
48xfs_ialloc_log_di(
49 xfs_trans_t *tp, /* transaction pointer */
50 xfs_buf_t *bp, /* inode buffer */
51 int off, /* index of inode in buffer */
52 int fields) /* bitmask of fields to log */
53{
54 int first; /* first byte number */
55 int ioffset; /* off in bytes */
56 int last; /* last byte number */
57 xfs_mount_t *mp; /* mount point structure */
58 static const short offsets[] = { /* field offsets */
59 /* keep in sync with bits */
60 offsetof(xfs_dinode_core_t, di_magic),
61 offsetof(xfs_dinode_core_t, di_mode),
62 offsetof(xfs_dinode_core_t, di_version),
63 offsetof(xfs_dinode_core_t, di_format),
64 offsetof(xfs_dinode_core_t, di_onlink),
65 offsetof(xfs_dinode_core_t, di_uid),
66 offsetof(xfs_dinode_core_t, di_gid),
67 offsetof(xfs_dinode_core_t, di_nlink),
68 offsetof(xfs_dinode_core_t, di_projid),
69 offsetof(xfs_dinode_core_t, di_pad),
70 offsetof(xfs_dinode_core_t, di_atime),
71 offsetof(xfs_dinode_core_t, di_mtime),
72 offsetof(xfs_dinode_core_t, di_ctime),
73 offsetof(xfs_dinode_core_t, di_size),
74 offsetof(xfs_dinode_core_t, di_nblocks),
75 offsetof(xfs_dinode_core_t, di_extsize),
76 offsetof(xfs_dinode_core_t, di_nextents),
77 offsetof(xfs_dinode_core_t, di_anextents),
78 offsetof(xfs_dinode_core_t, di_forkoff),
79 offsetof(xfs_dinode_core_t, di_aformat),
80 offsetof(xfs_dinode_core_t, di_dmevmask),
81 offsetof(xfs_dinode_core_t, di_dmstate),
82 offsetof(xfs_dinode_core_t, di_flags),
83 offsetof(xfs_dinode_core_t, di_gen),
84 offsetof(xfs_dinode_t, di_next_unlinked),
85 offsetof(xfs_dinode_t, di_u),
86 offsetof(xfs_dinode_t, di_a),
87 sizeof(xfs_dinode_t)
88 };
89
90
91 ASSERT(offsetof(xfs_dinode_t, di_core) == 0);
92 ASSERT((fields & (XFS_DI_U|XFS_DI_A)) == 0);
93 mp = tp->t_mountp;
94 /*
95 * Get the inode-relative first and last bytes for these fields
96 */
97 xfs_btree_offsets(fields, offsets, XFS_DI_NUM_BITS, &first, &last);
98 /*
99 * Convert to buffer offsets and log it.
100 */
101 ioffset = off << mp->m_sb.sb_inodelog;
102 first += ioffset;
103 last += ioffset;
104 xfs_trans_log_buf(tp, bp, first, last);
105}
106
107/*
108 * Allocation group level functions.
109 */
75de2a91
DC
110static inline int
111xfs_ialloc_cluster_alignment(
112 xfs_alloc_arg_t *args)
113{
114 if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
115 args->mp->m_sb.sb_inoalignmt >=
116 XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
117 return args->mp->m_sb.sb_inoalignmt;
118 return 1;
119}
1da177e4 120
fe033cc8
CH
121/*
122 * Lookup the record equal to ino in the btree given by cur.
123 */
124STATIC int /* error */
125xfs_inobt_lookup_eq(
126 struct xfs_btree_cur *cur, /* btree cursor */
127 xfs_agino_t ino, /* starting inode of chunk */
128 __int32_t fcnt, /* free inode count */
129 xfs_inofree_t free, /* free inode mask */
130 int *stat) /* success/failure */
131{
132 cur->bc_rec.i.ir_startino = ino;
133 cur->bc_rec.i.ir_freecount = fcnt;
134 cur->bc_rec.i.ir_free = free;
135 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
136}
137
138/*
139 * Lookup the first record greater than or equal to ino
140 * in the btree given by cur.
141 */
142int /* error */
143xfs_inobt_lookup_ge(
144 struct xfs_btree_cur *cur, /* btree cursor */
145 xfs_agino_t ino, /* starting inode of chunk */
146 __int32_t fcnt, /* free inode count */
147 xfs_inofree_t free, /* free inode mask */
148 int *stat) /* success/failure */
149{
150 cur->bc_rec.i.ir_startino = ino;
151 cur->bc_rec.i.ir_freecount = fcnt;
152 cur->bc_rec.i.ir_free = free;
153 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
154}
155
156/*
157 * Lookup the first record less than or equal to ino
158 * in the btree given by cur.
159 */
160int /* error */
161xfs_inobt_lookup_le(
162 struct xfs_btree_cur *cur, /* btree cursor */
163 xfs_agino_t ino, /* starting inode of chunk */
164 __int32_t fcnt, /* free inode count */
165 xfs_inofree_t free, /* free inode mask */
166 int *stat) /* success/failure */
167{
168 cur->bc_rec.i.ir_startino = ino;
169 cur->bc_rec.i.ir_freecount = fcnt;
170 cur->bc_rec.i.ir_free = free;
171 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
172}
173
1da177e4
LT
174/*
175 * Allocate new inodes in the allocation group specified by agbp.
176 * Return 0 for success, else error code.
177 */
178STATIC int /* error code or 0 */
179xfs_ialloc_ag_alloc(
180 xfs_trans_t *tp, /* transaction pointer */
181 xfs_buf_t *agbp, /* alloc group buffer */
182 int *alloc)
183{
184 xfs_agi_t *agi; /* allocation group header */
185 xfs_alloc_arg_t args; /* allocation argument structure */
186 int blks_per_cluster; /* fs blocks per inode cluster */
187 xfs_btree_cur_t *cur; /* inode btree cursor */
188 xfs_daddr_t d; /* disk addr of buffer */
92821e2b 189 xfs_agnumber_t agno;
1da177e4
LT
190 int error;
191 xfs_buf_t *fbuf; /* new free inodes' buffer */
192 xfs_dinode_t *free; /* new free inode structure */
193 int i; /* inode counter */
194 int j; /* block counter */
195 int nbufs; /* num bufs of new inodes */
196 xfs_agino_t newino; /* new first inode's number */
197 xfs_agino_t newlen; /* new number of inodes */
198 int ninodes; /* num inodes per buf */
199 xfs_agino_t thisino; /* current inode number, for loop */
200 int version; /* inode version number to use */
3ccb8b5f 201 int isaligned = 0; /* inode allocation at stripe unit */
1da177e4 202 /* boundary */
359346a9 203 unsigned int gen;
1da177e4
LT
204
205 args.tp = tp;
206 args.mp = tp->t_mountp;
207
208 /*
209 * Locking will ensure that we don't have two callers in here
210 * at one time.
211 */
212 newlen = XFS_IALLOC_INODES(args.mp);
213 if (args.mp->m_maxicount &&
214 args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
215 return XFS_ERROR(ENOSPC);
216 args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
217 /*
3ccb8b5f
GO
218 * First try to allocate inodes contiguous with the last-allocated
219 * chunk of inodes. If the filesystem is striped, this will fill
220 * an entire stripe unit with inodes.
221 */
1da177e4 222 agi = XFS_BUF_TO_AGI(agbp);
3ccb8b5f 223 newino = be32_to_cpu(agi->agi_newino);
019ff2d5
NS
224 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
225 XFS_IALLOC_BLOCKS(args.mp);
226 if (likely(newino != NULLAGINO &&
227 (args.agbno < be32_to_cpu(agi->agi_length)))) {
3ccb8b5f
GO
228 args.fsbno = XFS_AGB_TO_FSB(args.mp,
229 be32_to_cpu(agi->agi_seqno), args.agbno);
230 args.type = XFS_ALLOCTYPE_THIS_BNO;
231 args.mod = args.total = args.wasdel = args.isfl =
232 args.userdata = args.minalignslop = 0;
233 args.prod = 1;
75de2a91 234
3ccb8b5f 235 /*
75de2a91
DC
236 * We need to take into account alignment here to ensure that
237 * we don't modify the free list if we fail to have an exact
238 * block. If we don't have an exact match, and every oher
239 * attempt allocation attempt fails, we'll end up cancelling
240 * a dirty transaction and shutting down.
241 *
242 * For an exact allocation, alignment must be 1,
243 * however we need to take cluster alignment into account when
244 * fixing up the freelist. Use the minalignslop field to
245 * indicate that extra blocks might be required for alignment,
246 * but not to use them in the actual exact allocation.
3ccb8b5f 247 */
75de2a91
DC
248 args.alignment = 1;
249 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
250
251 /* Allow space for the inode btree to split. */
3ccb8b5f
GO
252 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
253 if ((error = xfs_alloc_vextent(&args)))
254 return error;
255 } else
256 args.fsbno = NULLFSBLOCK;
1da177e4 257
3ccb8b5f
GO
258 if (unlikely(args.fsbno == NULLFSBLOCK)) {
259 /*
260 * Set the alignment for the allocation.
261 * If stripe alignment is turned on then align at stripe unit
262 * boundary.
019ff2d5
NS
263 * If the cluster size is smaller than a filesystem block
264 * then we're doing I/O for inodes in filesystem block size
3ccb8b5f
GO
265 * pieces, so don't need alignment anyway.
266 */
267 isaligned = 0;
268 if (args.mp->m_sinoalign) {
269 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
270 args.alignment = args.mp->m_dalign;
271 isaligned = 1;
75de2a91
DC
272 } else
273 args.alignment = xfs_ialloc_cluster_alignment(&args);
3ccb8b5f
GO
274 /*
275 * Need to figure out where to allocate the inode blocks.
276 * Ideally they should be spaced out through the a.g.
277 * For now, just allocate blocks up front.
278 */
279 args.agbno = be32_to_cpu(agi->agi_root);
280 args.fsbno = XFS_AGB_TO_FSB(args.mp,
281 be32_to_cpu(agi->agi_seqno), args.agbno);
282 /*
283 * Allocate a fixed-size extent of inodes.
284 */
285 args.type = XFS_ALLOCTYPE_NEAR_BNO;
286 args.mod = args.total = args.wasdel = args.isfl =
287 args.userdata = args.minalignslop = 0;
288 args.prod = 1;
289 /*
290 * Allow space for the inode btree to split.
291 */
292 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
293 if ((error = xfs_alloc_vextent(&args)))
294 return error;
295 }
019ff2d5 296
1da177e4
LT
297 /*
298 * If stripe alignment is turned on, then try again with cluster
299 * alignment.
300 */
301 if (isaligned && args.fsbno == NULLFSBLOCK) {
302 args.type = XFS_ALLOCTYPE_NEAR_BNO;
16259e7d 303 args.agbno = be32_to_cpu(agi->agi_root);
1da177e4 304 args.fsbno = XFS_AGB_TO_FSB(args.mp,
16259e7d 305 be32_to_cpu(agi->agi_seqno), args.agbno);
75de2a91 306 args.alignment = xfs_ialloc_cluster_alignment(&args);
1da177e4
LT
307 if ((error = xfs_alloc_vextent(&args)))
308 return error;
309 }
310
311 if (args.fsbno == NULLFSBLOCK) {
312 *alloc = 0;
313 return 0;
314 }
315 ASSERT(args.len == args.minlen);
316 /*
317 * Convert the results.
318 */
319 newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
320 /*
321 * Loop over the new block(s), filling in the inodes.
322 * For small block sizes, manipulate the inodes in buffers
323 * which are multiples of the blocks size.
324 */
325 if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
326 blks_per_cluster = 1;
327 nbufs = (int)args.len;
328 ninodes = args.mp->m_sb.sb_inopblock;
329 } else {
330 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
331 args.mp->m_sb.sb_blocksize;
332 nbufs = (int)args.len / blks_per_cluster;
333 ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
334 }
335 /*
336 * Figure out what version number to use in the inodes we create.
337 * If the superblock version has caught up to the one that supports
338 * the new inode format, then use the new inode version. Otherwise
339 * use the old version so that old kernels will continue to be
340 * able to use the file system.
341 */
62118709 342 if (xfs_sb_version_hasnlink(&args.mp->m_sb))
1da177e4
LT
343 version = XFS_DINODE_VERSION_2;
344 else
345 version = XFS_DINODE_VERSION_1;
346
359346a9
DC
347 /*
348 * Seed the new inode cluster with a random generation number. This
349 * prevents short-term reuse of generation numbers if a chunk is
350 * freed and then immediately reallocated. We use random numbers
351 * rather than a linear progression to prevent the next generation
352 * number from being easily guessable.
353 */
354 gen = random32();
1da177e4
LT
355 for (j = 0; j < nbufs; j++) {
356 /*
357 * Get the block.
358 */
16259e7d 359 d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
1da177e4
LT
360 args.agbno + (j * blks_per_cluster));
361 fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
362 args.mp->m_bsize * blks_per_cluster,
363 XFS_BUF_LOCK);
364 ASSERT(fbuf);
365 ASSERT(!XFS_BUF_GETERROR(fbuf));
366 /*
f30a1211 367 * Set initial values for the inodes in this buffer.
1da177e4 368 */
f30a1211 369 xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
1da177e4
LT
370 for (i = 0; i < ninodes; i++) {
371 free = XFS_MAKE_IPTR(args.mp, fbuf, i);
347d1c01
CH
372 free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
373 free->di_core.di_version = version;
359346a9 374 free->di_core.di_gen = cpu_to_be32(gen);
347d1c01 375 free->di_next_unlinked = cpu_to_be32(NULLAGINO);
1da177e4
LT
376 xfs_ialloc_log_di(tp, fbuf, i,
377 XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
378 }
379 xfs_trans_inode_alloc_buf(tp, fbuf);
380 }
413d57c9
MS
381 be32_add_cpu(&agi->agi_count, newlen);
382 be32_add_cpu(&agi->agi_freecount, newlen);
92821e2b 383 agno = be32_to_cpu(agi->agi_seqno);
1da177e4 384 down_read(&args.mp->m_peraglock);
92821e2b 385 args.mp->m_perag[agno].pagi_freecount += newlen;
1da177e4 386 up_read(&args.mp->m_peraglock);
16259e7d 387 agi->agi_newino = cpu_to_be32(newino);
1da177e4
LT
388 /*
389 * Insert records describing the new inode chunk into the btree.
390 */
561f7d17 391 cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno);
1da177e4
LT
392 for (thisino = newino;
393 thisino < newino + newlen;
394 thisino += XFS_INODES_PER_CHUNK) {
395 if ((error = xfs_inobt_lookup_eq(cur, thisino,
396 XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
397 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
398 return error;
399 }
400 ASSERT(i == 0);
401 if ((error = xfs_inobt_insert(cur, &i))) {
402 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
403 return error;
404 }
405 ASSERT(i == 1);
406 }
407 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
408 /*
409 * Log allocation group header fields
410 */
411 xfs_ialloc_log_agi(tp, agbp,
412 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
413 /*
414 * Modify/log superblock values for inode count and inode free count.
415 */
416 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
417 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
418 *alloc = 1;
419 return 0;
420}
421
7989cb8e 422STATIC_INLINE xfs_agnumber_t
1da177e4
LT
423xfs_ialloc_next_ag(
424 xfs_mount_t *mp)
425{
426 xfs_agnumber_t agno;
427
428 spin_lock(&mp->m_agirotor_lock);
429 agno = mp->m_agirotor;
430 if (++mp->m_agirotor == mp->m_maxagi)
431 mp->m_agirotor = 0;
432 spin_unlock(&mp->m_agirotor_lock);
433
434 return agno;
435}
436
437/*
438 * Select an allocation group to look for a free inode in, based on the parent
439 * inode and then mode. Return the allocation group buffer.
440 */
441STATIC xfs_buf_t * /* allocation group buffer */
442xfs_ialloc_ag_select(
443 xfs_trans_t *tp, /* transaction pointer */
444 xfs_ino_t parent, /* parent directory inode number */
445 mode_t mode, /* bits set to indicate file type */
446 int okalloc) /* ok to allocate more space */
447{
448 xfs_buf_t *agbp; /* allocation group header buffer */
449 xfs_agnumber_t agcount; /* number of ag's in the filesystem */
450 xfs_agnumber_t agno; /* current ag number */
451 int flags; /* alloc buffer locking flags */
452 xfs_extlen_t ineed; /* blocks needed for inode allocation */
453 xfs_extlen_t longest = 0; /* longest extent available */
454 xfs_mount_t *mp; /* mount point structure */
455 int needspace; /* file mode implies space allocated */
456 xfs_perag_t *pag; /* per allocation group data */
457 xfs_agnumber_t pagno; /* parent (starting) ag number */
458
459 /*
460 * Files of these types need at least one block if length > 0
461 * (and they won't fit in the inode, but that's hard to figure out).
462 */
463 needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
464 mp = tp->t_mountp;
465 agcount = mp->m_maxagi;
466 if (S_ISDIR(mode))
467 pagno = xfs_ialloc_next_ag(mp);
468 else {
469 pagno = XFS_INO_TO_AGNO(mp, parent);
470 if (pagno >= agcount)
471 pagno = 0;
472 }
473 ASSERT(pagno < agcount);
474 /*
475 * Loop through allocation groups, looking for one with a little
476 * free space in it. Note we don't look for free inodes, exactly.
477 * Instead, we include whether there is a need to allocate inodes
478 * to mean that blocks must be allocated for them,
479 * if none are currently free.
480 */
481 agno = pagno;
482 flags = XFS_ALLOC_FLAG_TRYLOCK;
483 down_read(&mp->m_peraglock);
484 for (;;) {
485 pag = &mp->m_perag[agno];
486 if (!pag->pagi_init) {
487 if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
488 agbp = NULL;
489 goto nextag;
490 }
491 } else
492 agbp = NULL;
493
494 if (!pag->pagi_inodeok) {
495 xfs_ialloc_next_ag(mp);
496 goto unlock_nextag;
497 }
498
499 /*
500 * Is there enough free space for the file plus a block
501 * of inodes (if we need to allocate some)?
502 */
503 ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
504 if (ineed && !pag->pagf_init) {
505 if (agbp == NULL &&
506 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
507 agbp = NULL;
508 goto nextag;
509 }
510 (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
511 }
512 if (!ineed || pag->pagf_init) {
513 if (ineed && !(longest = pag->pagf_longest))
514 longest = pag->pagf_flcount > 0;
515 if (!ineed ||
516 (pag->pagf_freeblks >= needspace + ineed &&
517 longest >= ineed &&
518 okalloc)) {
519 if (agbp == NULL &&
520 xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
521 agbp = NULL;
522 goto nextag;
523 }
524 up_read(&mp->m_peraglock);
525 return agbp;
526 }
527 }
528unlock_nextag:
529 if (agbp)
530 xfs_trans_brelse(tp, agbp);
531nextag:
532 /*
533 * No point in iterating over the rest, if we're shutting
534 * down.
535 */
536 if (XFS_FORCED_SHUTDOWN(mp)) {
537 up_read(&mp->m_peraglock);
1121b219 538 return NULL;
1da177e4
LT
539 }
540 agno++;
541 if (agno >= agcount)
542 agno = 0;
543 if (agno == pagno) {
544 if (flags == 0) {
545 up_read(&mp->m_peraglock);
1121b219 546 return NULL;
1da177e4
LT
547 }
548 flags = 0;
549 }
550 }
551}
552
553/*
554 * Visible inode allocation functions.
555 */
556
557/*
558 * Allocate an inode on disk.
559 * Mode is used to tell whether the new inode will need space, and whether
560 * it is a directory.
561 *
562 * The arguments IO_agbp and alloc_done are defined to work within
563 * the constraint of one allocation per transaction.
564 * xfs_dialloc() is designed to be called twice if it has to do an
565 * allocation to make more free inodes. On the first call,
566 * IO_agbp should be set to NULL. If an inode is available,
567 * i.e., xfs_dialloc() did not need to do an allocation, an inode
568 * number is returned. In this case, IO_agbp would be set to the
569 * current ag_buf and alloc_done set to false.
570 * If an allocation needed to be done, xfs_dialloc would return
571 * the current ag_buf in IO_agbp and set alloc_done to true.
572 * The caller should then commit the current transaction, allocate a new
573 * transaction, and call xfs_dialloc() again, passing in the previous
574 * value of IO_agbp. IO_agbp should be held across the transactions.
575 * Since the agbp is locked across the two calls, the second call is
576 * guaranteed to have a free inode available.
577 *
578 * Once we successfully pick an inode its number is returned and the
579 * on-disk data structures are updated. The inode itself is not read
580 * in, since doing so would break ordering constraints with xfs_reclaim.
581 */
582int
583xfs_dialloc(
584 xfs_trans_t *tp, /* transaction pointer */
585 xfs_ino_t parent, /* parent inode (directory) */
586 mode_t mode, /* mode bits for new inode */
587 int okalloc, /* ok to allocate more space */
588 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
589 boolean_t *alloc_done, /* true if we needed to replenish
590 inode freelist */
591 xfs_ino_t *inop) /* inode number allocated */
592{
593 xfs_agnumber_t agcount; /* number of allocation groups */
594 xfs_buf_t *agbp; /* allocation group header's buffer */
595 xfs_agnumber_t agno; /* allocation group number */
596 xfs_agi_t *agi; /* allocation group header structure */
597 xfs_btree_cur_t *cur; /* inode allocation btree cursor */
598 int error; /* error return value */
599 int i; /* result code */
600 int ialloced; /* inode allocation status */
601 int noroom = 0; /* no space for inode blk allocation */
602 xfs_ino_t ino; /* fs-relative inode to be returned */
603 /* REFERENCED */
604 int j; /* result code */
605 xfs_mount_t *mp; /* file system mount structure */
606 int offset; /* index of inode in chunk */
607 xfs_agino_t pagino; /* parent's a.g. relative inode # */
608 xfs_agnumber_t pagno; /* parent's allocation group number */
61a25848 609 xfs_inobt_rec_incore_t rec; /* inode allocation record */
1da177e4
LT
610 xfs_agnumber_t tagno; /* testing allocation group number */
611 xfs_btree_cur_t *tcur; /* temp cursor */
61a25848 612 xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
1da177e4
LT
613
614
615 if (*IO_agbp == NULL) {
616 /*
617 * We do not have an agbp, so select an initial allocation
618 * group for inode allocation.
619 */
620 agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
621 /*
622 * Couldn't find an allocation group satisfying the
623 * criteria, give up.
624 */
625 if (!agbp) {
626 *inop = NULLFSINO;
627 return 0;
628 }
629 agi = XFS_BUF_TO_AGI(agbp);
16259e7d 630 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
1da177e4
LT
631 } else {
632 /*
633 * Continue where we left off before. In this case, we
634 * know that the allocation group has free inodes.
635 */
636 agbp = *IO_agbp;
637 agi = XFS_BUF_TO_AGI(agbp);
16259e7d
CH
638 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
639 ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
1da177e4
LT
640 }
641 mp = tp->t_mountp;
642 agcount = mp->m_sb.sb_agcount;
16259e7d 643 agno = be32_to_cpu(agi->agi_seqno);
1da177e4
LT
644 tagno = agno;
645 pagno = XFS_INO_TO_AGNO(mp, parent);
646 pagino = XFS_INO_TO_AGINO(mp, parent);
647
648 /*
649 * If we have already hit the ceiling of inode blocks then clear
650 * okalloc so we scan all available agi structures for a free
651 * inode.
652 */
653
654 if (mp->m_maxicount &&
655 mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
656 noroom = 1;
657 okalloc = 0;
658 }
659
660 /*
661 * Loop until we find an allocation group that either has free inodes
662 * or in which we can allocate some inodes. Iterate through the
663 * allocation groups upward, wrapping at the end.
664 */
665 *alloc_done = B_FALSE;
666 while (!agi->agi_freecount) {
667 /*
668 * Don't do anything if we're not supposed to allocate
669 * any blocks, just go on to the next ag.
670 */
671 if (okalloc) {
672 /*
673 * Try to allocate some new inodes in the allocation
674 * group.
675 */
676 if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
677 xfs_trans_brelse(tp, agbp);
678 if (error == ENOSPC) {
679 *inop = NULLFSINO;
680 return 0;
681 } else
682 return error;
683 }
684 if (ialloced) {
685 /*
686 * We successfully allocated some inodes, return
687 * the current context to the caller so that it
688 * can commit the current transaction and call
689 * us again where we left off.
690 */
16259e7d 691 ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
1da177e4
LT
692 *alloc_done = B_TRUE;
693 *IO_agbp = agbp;
694 *inop = NULLFSINO;
695 return 0;
696 }
697 }
698 /*
699 * If it failed, give up on this ag.
700 */
701 xfs_trans_brelse(tp, agbp);
702 /*
703 * Go on to the next ag: get its ag header.
704 */
705nextag:
706 if (++tagno == agcount)
707 tagno = 0;
708 if (tagno == agno) {
709 *inop = NULLFSINO;
710 return noroom ? ENOSPC : 0;
711 }
712 down_read(&mp->m_peraglock);
713 if (mp->m_perag[tagno].pagi_inodeok == 0) {
714 up_read(&mp->m_peraglock);
715 goto nextag;
716 }
717 error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
718 up_read(&mp->m_peraglock);
719 if (error)
720 goto nextag;
721 agi = XFS_BUF_TO_AGI(agbp);
16259e7d 722 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
1da177e4
LT
723 }
724 /*
725 * Here with an allocation group that has a free inode.
726 * Reset agno since we may have chosen a new ag in the
727 * loop above.
728 */
729 agno = tagno;
730 *IO_agbp = NULL;
561f7d17 731 cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
1da177e4
LT
732 /*
733 * If pagino is 0 (this is the root inode allocation) use newino.
734 * This must work because we've just allocated some.
735 */
736 if (!pagino)
16259e7d 737 pagino = be32_to_cpu(agi->agi_newino);
1da177e4
LT
738#ifdef DEBUG
739 if (cur->bc_nlevels == 1) {
740 int freecount = 0;
741
742 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
743 goto error0;
744 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
745 do {
746 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
747 &rec.ir_freecount, &rec.ir_free, &i)))
748 goto error0;
749 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
750 freecount += rec.ir_freecount;
637aa50f 751 if ((error = xfs_btree_increment(cur, 0, &i)))
1da177e4
LT
752 goto error0;
753 } while (i == 1);
754
16259e7d 755 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1da177e4
LT
756 XFS_FORCED_SHUTDOWN(mp));
757 }
758#endif
759 /*
760 * If in the same a.g. as the parent, try to get near the parent.
761 */
762 if (pagno == agno) {
763 if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
764 goto error0;
765 if (i != 0 &&
766 (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
767 &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
768 j == 1 &&
769 rec.ir_freecount > 0) {
770 /*
771 * Found a free inode in the same chunk
772 * as parent, done.
773 */
774 }
775 /*
776 * In the same a.g. as parent, but parent's chunk is full.
777 */
778 else {
779 int doneleft; /* done, to the left */
780 int doneright; /* done, to the right */
781
782 if (error)
783 goto error0;
784 ASSERT(i == 1);
785 ASSERT(j == 1);
786 /*
787 * Duplicate the cursor, search left & right
788 * simultaneously.
789 */
790 if ((error = xfs_btree_dup_cursor(cur, &tcur)))
791 goto error0;
792 /*
793 * Search left with tcur, back up 1 record.
794 */
8df4da4a 795 if ((error = xfs_btree_decrement(tcur, 0, &i)))
1da177e4
LT
796 goto error1;
797 doneleft = !i;
798 if (!doneleft) {
799 if ((error = xfs_inobt_get_rec(tcur,
800 &trec.ir_startino,
801 &trec.ir_freecount,
802 &trec.ir_free, &i)))
803 goto error1;
804 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
805 }
806 /*
807 * Search right with cur, go forward 1 record.
808 */
637aa50f 809 if ((error = xfs_btree_increment(cur, 0, &i)))
1da177e4
LT
810 goto error1;
811 doneright = !i;
812 if (!doneright) {
813 if ((error = xfs_inobt_get_rec(cur,
814 &rec.ir_startino,
815 &rec.ir_freecount,
816 &rec.ir_free, &i)))
817 goto error1;
818 XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
819 }
820 /*
821 * Loop until we find the closest inode chunk
822 * with a free one.
823 */
824 while (!doneleft || !doneright) {
825 int useleft; /* using left inode
826 chunk this time */
827
828 /*
829 * Figure out which block is closer,
830 * if both are valid.
831 */
832 if (!doneleft && !doneright)
833 useleft =
834 pagino -
835 (trec.ir_startino +
836 XFS_INODES_PER_CHUNK - 1) <
837 rec.ir_startino - pagino;
838 else
839 useleft = !doneleft;
840 /*
841 * If checking the left, does it have
842 * free inodes?
843 */
844 if (useleft && trec.ir_freecount) {
845 /*
846 * Yes, set it up as the chunk to use.
847 */
848 rec = trec;
849 xfs_btree_del_cursor(cur,
850 XFS_BTREE_NOERROR);
851 cur = tcur;
852 break;
853 }
854 /*
855 * If checking the right, does it have
856 * free inodes?
857 */
858 if (!useleft && rec.ir_freecount) {
859 /*
860 * Yes, it's already set up.
861 */
862 xfs_btree_del_cursor(tcur,
863 XFS_BTREE_NOERROR);
864 break;
865 }
866 /*
867 * If used the left, get another one
868 * further left.
869 */
870 if (useleft) {
8df4da4a 871 if ((error = xfs_btree_decrement(tcur, 0,
1da177e4
LT
872 &i)))
873 goto error1;
874 doneleft = !i;
875 if (!doneleft) {
876 if ((error = xfs_inobt_get_rec(
877 tcur,
878 &trec.ir_startino,
879 &trec.ir_freecount,
880 &trec.ir_free, &i)))
881 goto error1;
882 XFS_WANT_CORRUPTED_GOTO(i == 1,
883 error1);
884 }
885 }
886 /*
887 * If used the right, get another one
888 * further right.
889 */
890 else {
637aa50f 891 if ((error = xfs_btree_increment(cur, 0,
1da177e4
LT
892 &i)))
893 goto error1;
894 doneright = !i;
895 if (!doneright) {
896 if ((error = xfs_inobt_get_rec(
897 cur,
898 &rec.ir_startino,
899 &rec.ir_freecount,
900 &rec.ir_free, &i)))
901 goto error1;
902 XFS_WANT_CORRUPTED_GOTO(i == 1,
903 error1);
904 }
905 }
906 }
907 ASSERT(!doneleft || !doneright);
908 }
909 }
910 /*
911 * In a different a.g. from the parent.
912 * See if the most recently allocated block has any free.
913 */
16259e7d 914 else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
1da177e4 915 if ((error = xfs_inobt_lookup_eq(cur,
16259e7d 916 be32_to_cpu(agi->agi_newino), 0, 0, &i)))
1da177e4
LT
917 goto error0;
918 if (i == 1 &&
919 (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
920 &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
921 j == 1 &&
922 rec.ir_freecount > 0) {
923 /*
924 * The last chunk allocated in the group still has
925 * a free inode.
926 */
927 }
928 /*
929 * None left in the last group, search the whole a.g.
930 */
931 else {
932 if (error)
933 goto error0;
934 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
935 goto error0;
936 ASSERT(i == 1);
937 for (;;) {
938 if ((error = xfs_inobt_get_rec(cur,
939 &rec.ir_startino,
940 &rec.ir_freecount, &rec.ir_free,
941 &i)))
942 goto error0;
943 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
944 if (rec.ir_freecount > 0)
945 break;
637aa50f 946 if ((error = xfs_btree_increment(cur, 0, &i)))
1da177e4
LT
947 goto error0;
948 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
949 }
950 }
951 }
952 offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
953 ASSERT(offset >= 0);
954 ASSERT(offset < XFS_INODES_PER_CHUNK);
955 ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
956 XFS_INODES_PER_CHUNK) == 0);
957 ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
958 XFS_INOBT_CLR_FREE(&rec, offset);
959 rec.ir_freecount--;
960 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
961 rec.ir_free)))
962 goto error0;
413d57c9 963 be32_add_cpu(&agi->agi_freecount, -1);
1da177e4
LT
964 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
965 down_read(&mp->m_peraglock);
966 mp->m_perag[tagno].pagi_freecount--;
967 up_read(&mp->m_peraglock);
968#ifdef DEBUG
969 if (cur->bc_nlevels == 1) {
970 int freecount = 0;
971
972 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
973 goto error0;
974 do {
975 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
976 &rec.ir_freecount, &rec.ir_free, &i)))
977 goto error0;
978 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
979 freecount += rec.ir_freecount;
637aa50f 980 if ((error = xfs_btree_increment(cur, 0, &i)))
1da177e4
LT
981 goto error0;
982 } while (i == 1);
16259e7d 983 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1da177e4
LT
984 XFS_FORCED_SHUTDOWN(mp));
985 }
986#endif
987 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
988 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
989 *inop = ino;
990 return 0;
991error1:
992 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
993error0:
994 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
995 return error;
996}
997
998/*
999 * Free disk inode. Carefully avoids touching the incore inode, all
1000 * manipulations incore are the caller's responsibility.
1001 * The on-disk inode is not changed by this operation, only the
1002 * btree (free inode mask) is changed.
1003 */
1004int
1005xfs_difree(
1006 xfs_trans_t *tp, /* transaction pointer */
1007 xfs_ino_t inode, /* inode to be freed */
1008 xfs_bmap_free_t *flist, /* extents to free */
1009 int *delete, /* set if inode cluster was deleted */
1010 xfs_ino_t *first_ino) /* first inode in deleted cluster */
1011{
1012 /* REFERENCED */
1013 xfs_agblock_t agbno; /* block number containing inode */
1014 xfs_buf_t *agbp; /* buffer containing allocation group header */
1015 xfs_agino_t agino; /* inode number relative to allocation group */
1016 xfs_agnumber_t agno; /* allocation group number */
1017 xfs_agi_t *agi; /* allocation group header */
1018 xfs_btree_cur_t *cur; /* inode btree cursor */
1019 int error; /* error return value */
1020 int i; /* result code */
1021 int ilen; /* inodes in an inode cluster */
1022 xfs_mount_t *mp; /* mount structure for filesystem */
1023 int off; /* offset of inode in inode chunk */
61a25848 1024 xfs_inobt_rec_incore_t rec; /* btree record */
1da177e4
LT
1025
1026 mp = tp->t_mountp;
1027
1028 /*
1029 * Break up inode number into its components.
1030 */
1031 agno = XFS_INO_TO_AGNO(mp, inode);
1032 if (agno >= mp->m_sb.sb_agcount) {
1033 cmn_err(CE_WARN,
1034 "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.",
1035 agno, mp->m_sb.sb_agcount, mp->m_fsname);
1036 ASSERT(0);
1037 return XFS_ERROR(EINVAL);
1038 }
1039 agino = XFS_INO_TO_AGINO(mp, inode);
1040 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1041 cmn_err(CE_WARN,
da1650a5
CH
1042 "xfs_difree: inode != XFS_AGINO_TO_INO() "
1043 "(%llu != %llu) on %s. Returning EINVAL.",
1044 (unsigned long long)inode,
1045 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
1046 mp->m_fsname);
1da177e4
LT
1047 ASSERT(0);
1048 return XFS_ERROR(EINVAL);
1049 }
1050 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1051 if (agbno >= mp->m_sb.sb_agblocks) {
1052 cmn_err(CE_WARN,
1053 "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.",
1054 agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
1055 ASSERT(0);
1056 return XFS_ERROR(EINVAL);
1057 }
1058 /*
1059 * Get the allocation group header.
1060 */
1061 down_read(&mp->m_peraglock);
1062 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1063 up_read(&mp->m_peraglock);
1064 if (error) {
1065 cmn_err(CE_WARN,
1066 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.",
1067 error, mp->m_fsname);
1068 return error;
1069 }
1070 agi = XFS_BUF_TO_AGI(agbp);
16259e7d
CH
1071 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
1072 ASSERT(agbno < be32_to_cpu(agi->agi_length));
1da177e4
LT
1073 /*
1074 * Initialize the cursor.
1075 */
561f7d17 1076 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1da177e4
LT
1077#ifdef DEBUG
1078 if (cur->bc_nlevels == 1) {
1079 int freecount = 0;
1080
1081 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
1082 goto error0;
1083 do {
1084 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
1085 &rec.ir_freecount, &rec.ir_free, &i)))
1086 goto error0;
1087 if (i) {
1088 freecount += rec.ir_freecount;
637aa50f 1089 if ((error = xfs_btree_increment(cur, 0, &i)))
1da177e4
LT
1090 goto error0;
1091 }
1092 } while (i == 1);
16259e7d 1093 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1da177e4
LT
1094 XFS_FORCED_SHUTDOWN(mp));
1095 }
1096#endif
1097 /*
1098 * Look for the entry describing this inode.
1099 */
1100 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
1101 cmn_err(CE_WARN,
1102 "xfs_difree: xfs_inobt_lookup_le returned() an error %d on %s. Returning error.",
1103 error, mp->m_fsname);
1104 goto error0;
1105 }
1106 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1107 if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount,
1108 &rec.ir_free, &i))) {
1109 cmn_err(CE_WARN,
1110 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.",
1111 error, mp->m_fsname);
1112 goto error0;
1113 }
1114 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1115 /*
1116 * Get the offset in the inode chunk.
1117 */
1118 off = agino - rec.ir_startino;
1119 ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
1120 ASSERT(!XFS_INOBT_IS_FREE(&rec, off));
1121 /*
1122 * Mark the inode free & increment the count.
1123 */
1124 XFS_INOBT_SET_FREE(&rec, off);
1125 rec.ir_freecount++;
1126
1127 /*
c41564b5 1128 * When an inode cluster is free, it becomes eligible for removal
1da177e4 1129 */
1bd960ee 1130 if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
1da177e4
LT
1131 (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
1132
1133 *delete = 1;
1134 *first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
1135
1136 /*
1137 * Remove the inode cluster from the AGI B+Tree, adjust the
1138 * AGI and Superblock inode counts, and mark the disk space
1139 * to be freed when the transaction is committed.
1140 */
1141 ilen = XFS_IALLOC_INODES(mp);
413d57c9
MS
1142 be32_add_cpu(&agi->agi_count, -ilen);
1143 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
1da177e4
LT
1144 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
1145 down_read(&mp->m_peraglock);
1146 mp->m_perag[agno].pagi_freecount -= ilen - 1;
1147 up_read(&mp->m_peraglock);
1148 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
1149 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1150
1151 if ((error = xfs_inobt_delete(cur, &i))) {
1152 cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n",
1153 error, mp->m_fsname);
1154 goto error0;
1155 }
1156
1157 xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
1158 agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
1159 XFS_IALLOC_BLOCKS(mp), flist, mp);
1160 } else {
1161 *delete = 0;
1162
1163 if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
1164 cmn_err(CE_WARN,
1165 "xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
1166 error, mp->m_fsname);
1167 goto error0;
1168 }
1169 /*
1170 * Change the inode free counts and log the ag/sb changes.
1171 */
413d57c9 1172 be32_add_cpu(&agi->agi_freecount, 1);
1da177e4
LT
1173 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1174 down_read(&mp->m_peraglock);
1175 mp->m_perag[agno].pagi_freecount++;
1176 up_read(&mp->m_peraglock);
1177 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
1178 }
1179
1180#ifdef DEBUG
1181 if (cur->bc_nlevels == 1) {
1182 int freecount = 0;
1183
1184 if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
1185 goto error0;
1186 do {
1187 if ((error = xfs_inobt_get_rec(cur,
1188 &rec.ir_startino,
1189 &rec.ir_freecount,
1190 &rec.ir_free, &i)))
1191 goto error0;
1192 if (i) {
1193 freecount += rec.ir_freecount;
637aa50f 1194 if ((error = xfs_btree_increment(cur, 0, &i)))
1da177e4
LT
1195 goto error0;
1196 }
1197 } while (i == 1);
16259e7d 1198 ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
1da177e4
LT
1199 XFS_FORCED_SHUTDOWN(mp));
1200 }
1201#endif
1202 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1203 return 0;
1204
1205error0:
1206 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1207 return error;
1208}
1209
1210/*
1211 * Return the location of the inode in bno/off, for mapping it into a buffer.
1212 */
1213/*ARGSUSED*/
1214int
1215xfs_dilocate(
1216 xfs_mount_t *mp, /* file system mount structure */
1217 xfs_trans_t *tp, /* transaction pointer */
1218 xfs_ino_t ino, /* inode to locate */
1219 xfs_fsblock_t *bno, /* output: block containing inode */
1220 int *len, /* output: num blocks in inode cluster */
1221 int *off, /* output: index in block of inode */
1222 uint flags) /* flags concerning inode lookup */
1223{
1224 xfs_agblock_t agbno; /* block number of inode in the alloc group */
1225 xfs_buf_t *agbp; /* agi buffer */
1226 xfs_agino_t agino; /* inode number within alloc group */
1227 xfs_agnumber_t agno; /* allocation group number */
1228 int blks_per_cluster; /* num blocks per inode cluster */
1229 xfs_agblock_t chunk_agbno; /* first block in inode chunk */
1230 xfs_agino_t chunk_agino; /* first agino in inode chunk */
1231 __int32_t chunk_cnt; /* count of free inodes in chunk */
1232 xfs_inofree_t chunk_free; /* mask of free inodes in chunk */
1233 xfs_agblock_t cluster_agbno; /* first block in inode cluster */
1234 xfs_btree_cur_t *cur; /* inode btree cursor */
1235 int error; /* error code */
1236 int i; /* temp state */
1237 int offset; /* index of inode in its buffer */
1238 int offset_agbno; /* blks from chunk start to inode */
1239
1240 ASSERT(ino != NULLFSINO);
1241 /*
1242 * Split up the inode number into its parts.
1243 */
1244 agno = XFS_INO_TO_AGNO(mp, ino);
1245 agino = XFS_INO_TO_AGINO(mp, ino);
1246 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1247 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1248 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1249#ifdef DEBUG
4d1a2ed3
NS
1250 /* no diagnostics for bulkstat, ino comes from userspace */
1251 if (flags & XFS_IMAP_BULKSTAT)
1252 return XFS_ERROR(EINVAL);
1da177e4
LT
1253 if (agno >= mp->m_sb.sb_agcount) {
1254 xfs_fs_cmn_err(CE_ALERT, mp,
1255 "xfs_dilocate: agno (%d) >= "
1256 "mp->m_sb.sb_agcount (%d)",
1257 agno, mp->m_sb.sb_agcount);
1258 }
1259 if (agbno >= mp->m_sb.sb_agblocks) {
1260 xfs_fs_cmn_err(CE_ALERT, mp,
1261 "xfs_dilocate: agbno (0x%llx) >= "
1262 "mp->m_sb.sb_agblocks (0x%lx)",
1263 (unsigned long long) agbno,
1264 (unsigned long) mp->m_sb.sb_agblocks);
1265 }
1266 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1267 xfs_fs_cmn_err(CE_ALERT, mp,
1268 "xfs_dilocate: ino (0x%llx) != "
1269 "XFS_AGINO_TO_INO(mp, agno, agino) "
1270 "(0x%llx)",
1271 ino, XFS_AGINO_TO_INO(mp, agno, agino));
1272 }
745b1f47 1273 xfs_stack_trace();
1da177e4
LT
1274#endif /* DEBUG */
1275 return XFS_ERROR(EINVAL);
1276 }
1277 if ((mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) ||
1278 !(flags & XFS_IMAP_LOOKUP)) {
1279 offset = XFS_INO_TO_OFFSET(mp, ino);
1280 ASSERT(offset < mp->m_sb.sb_inopblock);
1281 *bno = XFS_AGB_TO_FSB(mp, agno, agbno);
1282 *off = offset;
1283 *len = 1;
1284 return 0;
1285 }
1286 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1287 if (*bno != NULLFSBLOCK) {
1288 offset = XFS_INO_TO_OFFSET(mp, ino);
1289 ASSERT(offset < mp->m_sb.sb_inopblock);
1290 cluster_agbno = XFS_FSB_TO_AGBNO(mp, *bno);
1291 *off = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1292 offset;
1293 *len = blks_per_cluster;
1294 return 0;
1295 }
1296 if (mp->m_inoalign_mask) {
1297 offset_agbno = agbno & mp->m_inoalign_mask;
1298 chunk_agbno = agbno - offset_agbno;
1299 } else {
1300 down_read(&mp->m_peraglock);
1301 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1302 up_read(&mp->m_peraglock);
1303 if (error) {
1304#ifdef DEBUG
1305 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1306 "xfs_ialloc_read_agi() returned "
1307 "error %d, agno %d",
1308 error, agno);
1309#endif /* DEBUG */
1310 return error;
1311 }
561f7d17 1312 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1da177e4
LT
1313 if ((error = xfs_inobt_lookup_le(cur, agino, 0, 0, &i))) {
1314#ifdef DEBUG
1315 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1316 "xfs_inobt_lookup_le() failed");
1317#endif /* DEBUG */
1318 goto error0;
1319 }
1320 if ((error = xfs_inobt_get_rec(cur, &chunk_agino, &chunk_cnt,
1321 &chunk_free, &i))) {
1322#ifdef DEBUG
1323 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1324 "xfs_inobt_get_rec() failed");
1325#endif /* DEBUG */
1326 goto error0;
1327 }
1328 if (i == 0) {
1329#ifdef DEBUG
1330 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_dilocate: "
1331 "xfs_inobt_get_rec() failed");
1332#endif /* DEBUG */
1333 error = XFS_ERROR(EINVAL);
1334 }
1335 xfs_trans_brelse(tp, agbp);
1336 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1337 if (error)
1338 return error;
1339 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_agino);
1340 offset_agbno = agbno - chunk_agbno;
1341 }
1342 ASSERT(agbno >= chunk_agbno);
1343 cluster_agbno = chunk_agbno +
1344 ((offset_agbno / blks_per_cluster) * blks_per_cluster);
1345 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
1346 XFS_INO_TO_OFFSET(mp, ino);
1347 *bno = XFS_AGB_TO_FSB(mp, agno, cluster_agbno);
1348 *off = offset;
1349 *len = blks_per_cluster;
1350 return 0;
1351error0:
1352 xfs_trans_brelse(tp, agbp);
1353 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1354 return error;
1355}
1356
1357/*
1358 * Compute and fill in value of m_in_maxlevels.
1359 */
1360void
1361xfs_ialloc_compute_maxlevels(
1362 xfs_mount_t *mp) /* file system mount structure */
1363{
1364 int level;
1365 uint maxblocks;
1366 uint maxleafents;
1367 int minleafrecs;
1368 int minnoderecs;
1369
1370 maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
1371 XFS_INODES_PER_CHUNK_LOG;
1372 minleafrecs = mp->m_alloc_mnr[0];
1373 minnoderecs = mp->m_alloc_mnr[1];
1374 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1375 for (level = 1; maxblocks > 1; level++)
1376 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1377 mp->m_in_maxlevels = level;
1378}
1379
1380/*
1381 * Log specified fields for the ag hdr (inode section)
1382 */
1383void
1384xfs_ialloc_log_agi(
1385 xfs_trans_t *tp, /* transaction pointer */
1386 xfs_buf_t *bp, /* allocation group header buffer */
1387 int fields) /* bitmask of fields to log */
1388{
1389 int first; /* first byte number */
1390 int last; /* last byte number */
1391 static const short offsets[] = { /* field starting offsets */
1392 /* keep in sync with bit definitions */
1393 offsetof(xfs_agi_t, agi_magicnum),
1394 offsetof(xfs_agi_t, agi_versionnum),
1395 offsetof(xfs_agi_t, agi_seqno),
1396 offsetof(xfs_agi_t, agi_length),
1397 offsetof(xfs_agi_t, agi_count),
1398 offsetof(xfs_agi_t, agi_root),
1399 offsetof(xfs_agi_t, agi_level),
1400 offsetof(xfs_agi_t, agi_freecount),
1401 offsetof(xfs_agi_t, agi_newino),
1402 offsetof(xfs_agi_t, agi_dirino),
1403 offsetof(xfs_agi_t, agi_unlinked),
1404 sizeof(xfs_agi_t)
1405 };
1406#ifdef DEBUG
1407 xfs_agi_t *agi; /* allocation group header */
1408
1409 agi = XFS_BUF_TO_AGI(bp);
16259e7d 1410 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
1da177e4
LT
1411#endif
1412 /*
1413 * Compute byte offsets for the first and last fields.
1414 */
1415 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last);
1416 /*
1417 * Log the allocation group inode header buffer.
1418 */
1419 xfs_trans_log_buf(tp, bp, first, last);
1420}
1421
1422/*
1423 * Read in the allocation group header (inode allocation section)
1424 */
1425int
1426xfs_ialloc_read_agi(
1427 xfs_mount_t *mp, /* file system mount structure */
1428 xfs_trans_t *tp, /* transaction pointer */
1429 xfs_agnumber_t agno, /* allocation group number */
1430 xfs_buf_t **bpp) /* allocation group hdr buf */
1431{
1432 xfs_agi_t *agi; /* allocation group header */
1433 int agi_ok; /* agi is consistent */
1434 xfs_buf_t *bp; /* allocation group hdr buf */
1435 xfs_perag_t *pag; /* per allocation group data */
1436 int error;
1437
1438 ASSERT(agno != NULLAGNUMBER);
1439 error = xfs_trans_read_buf(
1440 mp, tp, mp->m_ddev_targp,
1441 XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
1442 XFS_FSS_TO_BB(mp, 1), 0, &bp);
1443 if (error)
1444 return error;
1445 ASSERT(bp && !XFS_BUF_GETERROR(bp));
1446
1447 /*
1448 * Validate the magic number of the agi block.
1449 */
1450 agi = XFS_BUF_TO_AGI(bp);
1451 agi_ok =
16259e7d
CH
1452 be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
1453 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum));
1da177e4
LT
1454 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
1455 XFS_RANDOM_IALLOC_READ_AGI))) {
1456 XFS_CORRUPTION_ERROR("xfs_ialloc_read_agi", XFS_ERRLEVEL_LOW,
1457 mp, agi);
1458 xfs_trans_brelse(tp, bp);
1459 return XFS_ERROR(EFSCORRUPTED);
1460 }
1461 pag = &mp->m_perag[agno];
1462 if (!pag->pagi_init) {
16259e7d 1463 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
92821e2b 1464 pag->pagi_count = be32_to_cpu(agi->agi_count);
1da177e4
LT
1465 pag->pagi_init = 1;
1466 } else {
1467 /*
1468 * It's possible for these to be out of sync if
1469 * we are in the middle of a forced shutdown.
1470 */
16259e7d
CH
1471 ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
1472 XFS_FORCED_SHUTDOWN(mp));
1da177e4
LT
1473 }
1474
1475#ifdef DEBUG
1476 {
1477 int i;
1478
1479 for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++)
1480 ASSERT(agi->agi_unlinked[i]);
1481 }
1482#endif
1483
1484 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGI, XFS_AGI_REF);
1485 *bpp = bp;
1486 return 0;
1487}
92821e2b
DC
1488
1489/*
1490 * Read in the agi to initialise the per-ag data in the mount structure
1491 */
1492int
1493xfs_ialloc_pagi_init(
1494 xfs_mount_t *mp, /* file system mount structure */
1495 xfs_trans_t *tp, /* transaction pointer */
1496 xfs_agnumber_t agno) /* allocation group number */
1497{
1498 xfs_buf_t *bp = NULL;
1499 int error;
1500
1501 error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
1502 if (error)
1503 return error;
1504 if (bp)
1505 xfs_trans_brelse(tp, bp);
1506 return 0;
1507}