rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
}
+ if (pb->pb_flags & PBF_ORDERED) {
+ ASSERT(!(pb->pb_flags & PBF_READ));
+ rw = WRITE_BARRIER;
+ }
+
/* Special code path for reading a sub page size pagebuf in --
* we populate up the whole page, and hence the other metadata
* in the same page. This optimization is only valid when the
PBF_DELWRI = (1 << 6), /* buffer has dirty pages */
PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
- PBF_FLUSH = (1 << 11), /* flush disk write cache */
+ PBF_ORDERED = (1 << 11), /* use ordered writes */
PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
/* flags used only as arguments to access routines */
#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)
#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)
-#define XFS_BUF_FLUSH(x) ((x)->pb_flags |= PBF_FLUSH)
-#define XFS_BUF_UNFLUSH(x) ((x)->pb_flags &= ~PBF_FLUSH)
-#define XFS_BUF_ISFLUSH(x) ((x)->pb_flags & PBF_FLUSH)
+#define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED)
+#define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED)
+#define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED)
#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")
#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")
close_bdev_excl(bdev);
}
+/*
+ * Try to write out the superblock using barriers.
+ */
+STATIC int
+xfs_barrier_test(
+ xfs_mount_t *mp)
+{
+ xfs_buf_t *sbp = xfs_getsb(mp, 0);
+ int error;
+
+ XFS_BUF_UNDONE(sbp);
+ XFS_BUF_UNREAD(sbp);
+ XFS_BUF_UNDELAYWRITE(sbp);
+ XFS_BUF_WRITE(sbp);
+ XFS_BUF_UNASYNC(sbp);
+ XFS_BUF_ORDERED(sbp);
+
+ xfsbdstrat(mp, sbp);
+ error = xfs_iowait(sbp);
+
+ /*
+ * Clear all the flags we set and possible error state in the
+ * buffer. We only did the write to try out whether barriers
+ * worked and shouldn't leave any traces in the superblock
+ * buffer.
+ */
+ XFS_BUF_DONE(sbp);
+ XFS_BUF_ERROR(sbp, 0);
+ XFS_BUF_UNORDERED(sbp);
+
+ xfs_buf_relse(sbp);
+ return error;
+}
+
+void
+xfs_mountfs_check_barriers(xfs_mount_t *mp)
+{
+ int error;
+
+ if (mp->m_logdev_targp != mp->m_ddev_targp) {
+ xfs_fs_cmn_err(CE_NOTE, mp,
+ "Disabling barriers, not supported with external log device");
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ }
+
+ if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered ==
+ QUEUE_ORDERED_NONE) {
+ xfs_fs_cmn_err(CE_NOTE, mp,
+ "Disabling barriers, not supported by the underlying device");
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ }
+
+ error = xfs_barrier_test(mp);
+ if (error) {
+ xfs_fs_cmn_err(CE_NOTE, mp,
+ "Disabling barriers, trial barrier write failed");
+ mp->m_flags &= ~XFS_MOUNT_BARRIER;
+ }
+}
+
+void
+xfs_blkdev_issue_flush(
+ xfs_buftarg_t *buftarg)
+{
+ blkdev_issue_flush(buftarg->pbr_bdev, NULL);
+}
STATIC struct inode *
linvfs_alloc_inode(
extern int xfs_blkdev_get(struct xfs_mount *, const char *,
struct block_device **);
extern void xfs_blkdev_put(struct block_device *);
+extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
extern struct export_operations linvfs_export_ops;
* enforcement */
#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
-#define XFSMNT_NOLOGFLUSH 0x04000000 /* Don't flush for log blocks */
+#define XFSMNT_BARRIER 0x04000000 /* use write barriers */
#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */
#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
* allocation */
STATIC void xlog_state_switch_iclogs(xlog_t *log,
xlog_in_core_t *iclog,
int eventual_size);
-STATIC int xlog_state_sync(xlog_t *log, xfs_lsn_t lsn, uint flags);
-STATIC int xlog_state_sync_all(xlog_t *log, uint flags);
+STATIC int xlog_state_sync(xlog_t *log,
+ xfs_lsn_t lsn,
+ uint flags,
+ int *log_flushed);
+STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
/* local functions to manipulate grant head */
* semaphore.
*/
int
-xfs_log_force(xfs_mount_t *mp,
- xfs_lsn_t lsn,
- uint flags)
+_xfs_log_force(
+ xfs_mount_t *mp,
+ xfs_lsn_t lsn,
+ uint flags,
+ int *log_flushed)
{
- int rval;
- xlog_t *log = mp->m_log;
+ xlog_t *log = mp->m_log;
+ int dummy;
+
+ if (!log_flushed)
+ log_flushed = &dummy;
#if defined(DEBUG) || defined(XLOG_NOLOG)
if (!xlog_debug && xlog_target == log->l_targ)
XFS_STATS_INC(xs_log_force);
- if ((log->l_flags & XLOG_IO_ERROR) == 0) {
- if (lsn == 0)
- rval = xlog_state_sync_all(log, flags);
- else
- rval = xlog_state_sync(log, lsn, flags);
- } else {
- rval = XFS_ERROR(EIO);
- }
-
- return rval;
-
+ if (log->l_flags & XLOG_IO_ERROR)
+ return XFS_ERROR(EIO);
+ if (lsn == 0)
+ return xlog_state_sync_all(log, flags, log_flushed);
+ else
+ return xlog_state_sync(log, lsn, flags, log_flushed);
} /* xfs_log_force */
/*
XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
/*
- * Do a disk write cache flush for the log block.
- * This is a bit of a sledgehammer, it would be better
- * to use a tag barrier here that just prevents reordering.
+ * Do an ordered write for the log block.
+ *
* It may not be needed to flush the first split block in the log wrap
* case, but do it anyways to be safe -AK
*/
- if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH))
- XFS_BUF_FLUSH(bp);
+ if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+ XFS_BUF_ORDERED(bp);
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
XFS_BUF_SET_FSPRIVATE(bp, iclog);
XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
- if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH))
- XFS_BUF_FLUSH(bp);
+ if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
+ XFS_BUF_ORDERED(bp);
dptr = XFS_BUF_PTR(bp);
/*
* Bump the cycle numbers at the start of each block
* not in the active nor dirty state.
*/
STATIC int
-xlog_state_sync_all(xlog_t *log, uint flags)
+xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
{
xlog_in_core_t *iclog;
xfs_lsn_t lsn;
if (xlog_state_release_iclog(log, iclog))
return XFS_ERROR(EIO);
+ *log_flushed = 1;
s = LOG_LOCK(log);
if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn &&
iclog->ic_state != XLOG_STATE_DIRTY)
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
return XFS_ERROR(EIO);
+ *log_flushed = 1;
} else {
int
xlog_state_sync(xlog_t *log,
xfs_lsn_t lsn,
- uint flags)
+ uint flags,
+ int *log_flushed)
{
xlog_in_core_t *iclog;
int already_slept = 0;
XFS_STATS_INC(xs_log_force_sleep);
sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
&log->l_icloglock, s);
+ *log_flushed = 1;
already_slept = 1;
goto try_again;
} else {
LOG_UNLOCK(log, s);
if (xlog_state_release_iclog(log, iclog))
return XFS_ERROR(EIO);
+ *log_flushed = 1;
s = LOG_LOCK(log);
}
}
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
return XFS_ERROR(EIO);
+ *log_flushed = 1;
} else { /* just return */
LOG_UNLOCK(log, s);
}
xlog_ticket_t *tic;
xlog_t *log;
int retval;
+ int dummy;
SPLDECL(s);
SPLDECL(s2);
* Force the incore logs to disk before shutting the
* log down completely.
*/
- xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC);
+ xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy);
s2 = LOG_LOCK(log);
retval = xlog_state_ioerror(log);
LOG_UNLOCK(log, s2);
xfs_log_ticket_t ticket,
void **iclog,
uint flags);
-int xfs_log_force(struct xfs_mount *mp,
- xfs_lsn_t lsn,
- uint flags);
+int _xfs_log_force(struct xfs_mount *mp,
+ xfs_lsn_t lsn,
+ uint flags,
+ int *log_forced);
+#define xfs_log_force(mp, lsn, flags) \
+ _xfs_log_force(mp, lsn, flags, NULL);
int xfs_log_mount(struct xfs_mount *mp,
struct xfs_buftarg *log_target,
xfs_daddr_t start_block,
* 32 bits in size */
#define XFS_MOUNT_32BITINOOPT 0x00008000 /* saved mount option state */
#define XFS_MOUNT_NOUUID 0x00010000 /* ignore uuid during mount */
-#define XFS_MOUNT_NOLOGFLUSH 0x00020000
+#define XFS_MOUNT_BARRIER 0x00020000
#define XFS_MOUNT_IDELETE 0x00040000 /* delete empty inode clusters*/
#define XFS_MOUNT_SWALLOC 0x00080000 /* turn on stripe width
* allocation */
extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int);
+extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
extern int xfs_unmountfs(xfs_mount_t *, struct cred *);
extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
*/
/*ARGSUSED*/
int
-xfs_trans_commit(
+_xfs_trans_commit(
xfs_trans_t *tp,
uint flags,
- xfs_lsn_t *commit_lsn_p)
+ xfs_lsn_t *commit_lsn_p,
+ int *log_flushed)
{
xfs_log_iovec_t *log_vector;
int nvec;
* log out now and wait for it.
*/
if (sync) {
- if (!error)
- error = xfs_log_force(mp, commit_lsn,
- XFS_LOG_FORCE | XFS_LOG_SYNC);
+ if (!error) {
+ error = _xfs_log_force(mp, commit_lsn,
+ XFS_LOG_FORCE | XFS_LOG_SYNC,
+ log_flushed);
+ }
XFS_STATS_INC(xs_trans_sync);
} else {
XFS_STATS_INC(xs_trans_async);
struct xfs_efd_log_item *,
xfs_fsblock_t,
xfs_extlen_t);
-int xfs_trans_commit(xfs_trans_t *, uint flags, xfs_lsn_t *);
+int _xfs_trans_commit(xfs_trans_t *,
+ uint flags,
+ xfs_lsn_t *,
+ int *);
+#define xfs_trans_commit(tp, flags, lsn) \
+ _xfs_trans_commit(tp, flags, lsn, NULL)
void xfs_trans_cancel(xfs_trans_t *, int);
void xfs_trans_ail_init(struct xfs_mount *);
xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
if (ap->flags & XFSMNT_NOUUID)
mp->m_flags |= XFS_MOUNT_NOUUID;
- if (ap->flags & XFSMNT_NOLOGFLUSH)
- mp->m_flags |= XFS_MOUNT_NOLOGFLUSH;
+ if (ap->flags & XFSMNT_BARRIER)
+ mp->m_flags |= XFS_MOUNT_BARRIER;
return 0;
}
goto error2;
error = XFS_IOINIT(vfsp, args, flags);
- if (!error)
- return 0;
+ if (error)
+ goto error2;
+
+ if ((args->flags & XFSMNT_BARRIER) &&
+ !(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY))
+ xfs_mountfs_check_barriers(mp);
+ return 0;
+
error2:
if (mp->m_sb_bp)
xfs_freesb(mp);
else
mp->m_flags &= ~XFS_MOUNT_NOATIME;
- if (!(vfsp->vfs_flag & VFS_RDONLY)) {
- VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
+ if ((vfsp->vfs_flag & VFS_RDONLY) &&
+ !(*flags & MS_RDONLY)) {
+ vfsp->vfs_flag &= ~VFS_RDONLY;
+
+ if (args->flags & XFSMNT_BARRIER)
+ xfs_mountfs_check_barriers(mp);
}
- if (*flags & MS_RDONLY) {
+ if (!(vfsp->vfs_flag & VFS_RDONLY) &&
+ (*flags & MS_RDONLY)) {
+ VFS_SYNC(vfsp, SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR, NULL, error);
+
xfs_quiesce_fs(mp);
/* Ok now write out an unmount record */
xfs_log_unmount_write(mp);
xfs_unmountfs_writesb(mp);
vfsp->vfs_flag |= VFS_RDONLY;
- } else {
- vfsp->vfs_flag &= ~VFS_RDONLY;
}
return 0;
#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */
#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */
#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
-#define MNTOPT_NOLOGFLUSH "nologflush" /* don't hard flush on log writes */
+#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
+ unwritten extent conversion */
#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
#endif
} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
args->flags |= XFSMNT_NOUUID;
- } else if (!strcmp(this_char, MNTOPT_NOLOGFLUSH)) {
- args->flags |= XFSMNT_NOLOGFLUSH;
+ } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+ args->flags |= XFSMNT_BARRIER;
} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
args->flags &= ~XFSMNT_IDELETE;
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
{ XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
{ XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
{ XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
- { XFS_MOUNT_NOLOGFLUSH, "," MNTOPT_NOLOGFLUSH },
+ { XFS_MOUNT_BARRIER, "," MNTOPT_BARRIER },
{ XFS_MOUNT_IDELETE, "," MNTOPT_NOIKEEP },
{ 0, NULL }
};
xfs_inode_t *ip;
xfs_trans_t *tp;
int error;
+ int log_flushed = 0, changed = 1;
vn_trace_entry(BHV_TO_VNODE(bdp),
__FUNCTION__, (inst_t *)__return_address);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip)) {
- xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
+ _xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
XFS_LOG_FORCE |
((flag & FSYNC_WAIT)
- ? XFS_LOG_SYNC : 0));
+ ? XFS_LOG_SYNC : 0),
+ &log_flushed);
+ } else {
+ /*
+ * If the inode is not pinned and nothing
+ * has changed we don't need to flush the
+ * cache.
+ */
+ changed = 0;
}
error = 0;
} else {
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (flag & FSYNC_WAIT)
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0, NULL);
+ error = _xfs_trans_commit(tp, 0, NULL, &log_flushed);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
+
+ if ((ip->i_mount->m_flags & XFS_MOUNT_BARRIER) && changed) {
+ /*
+ * If the log write didn't issue an ordered tag we need
+ * to flush the disk cache for the data device now.
+ */
+ if (!log_flushed)
+ xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
+
+ /*
+ * If this inode is on the RT dev we need to flush that
+ * cache aswell.
+ */
+ if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
+ xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
+ }
+
return error;
}