/* flags for direct write completions */
#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
#define XFS_DIO_FLAG_APPEND (1 << 1)
+#define XFS_DIO_FLAG_COW (1 << 2)
/*
* structure owned by writepages passed to individual writepage calls
struct inode *inode,
struct buffer_head *bh_result,
struct xfs_bmbt_irec *imap,
- xfs_off_t offset)
+ xfs_off_t offset,
+ bool is_cow)
{
uintptr_t *flags = (uintptr_t *)&bh_result->b_private;
xfs_off_t size = bh_result->b_size;
trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
- ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
+ ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW :
+ XFS_IO_OVERWRITE, imap);
if (ISUNWRITTEN(imap)) {
*flags |= XFS_DIO_FLAG_UNWRITTEN;
set_buffer_defer_completion(bh_result);
- } else if (offset + size > i_size_read(inode) || offset + size < 0) {
+ } else if (is_cow) {
+ *flags |= XFS_DIO_FLAG_COW;
+ set_buffer_defer_completion(bh_result);
+ }
+ if (offset + size > i_size_read(inode) || offset + size < 0) {
*flags |= XFS_DIO_FLAG_APPEND;
set_buffer_defer_completion(bh_result);
}
bh_result->b_size = mapping_size;
}
+/* Bounce unaligned directio writes to the page cache. */
+static int
+xfs_bounce_unaligned_dio_write(
+ struct xfs_inode *ip,
+ xfs_fileoff_t offset_fsb,
+ struct xfs_bmbt_irec *imap)
+{
+ struct xfs_bmbt_irec irec;
+ xfs_fileoff_t delta;
+ bool shared;
+ bool x;
+ int error;
+
+ irec = *imap;
+ if (offset_fsb > irec.br_startoff) {
+ delta = offset_fsb - irec.br_startoff;
+ irec.br_blockcount -= delta;
+ irec.br_startblock += delta;
+ irec.br_startoff = offset_fsb;
+ }
+ error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
+ if (error)
+ return error;
+
+ /*
+ * We're here because we're trying to do a directio write to a
+ * region that isn't aligned to a filesystem block. If any part
+ * of the extent is shared, fall back to buffered mode to handle
+ * the RMW. This is done by returning -EREMCHG ("remote addr
+ * changed"), which is caught further up the call stack.
+ */
+ if (shared) {
+ trace_xfs_reflink_bounce_dio_write(ip, imap);
+ return -EREMCHG;
+ }
+ return 0;
+}
+
STATIC int
__xfs_get_blocks(
struct inode *inode,
xfs_off_t offset;
ssize_t size;
int new = 0;
+ bool is_cow = false;
+ bool need_alloc = false;
BUG_ON(create && !direct);
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
- error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
- &imap, &nimaps, XFS_BMAPI_ENTIRE);
+ if (create && direct && xfs_is_reflink_inode(ip))
+ is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap,
+ &need_alloc);
+ if (!is_cow) {
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+ &imap, &nimaps, XFS_BMAPI_ENTIRE);
+ /*
+ * Truncate an overwrite extent if there's a pending CoW
+ * reservation before the end of this extent. This
+ * forces us to come back to get_blocks to take care of
+ * the CoW.
+ */
+ if (create && direct && nimaps &&
+ imap.br_startblock != HOLESTARTBLOCK &&
+ imap.br_startblock != DELAYSTARTBLOCK &&
+ !ISUNWRITTEN(&imap))
+ xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb,
+ &imap);
+ }
+ ASSERT(!need_alloc);
if (error)
goto out_unlock;
if (imap.br_startblock != HOLESTARTBLOCK &&
imap.br_startblock != DELAYSTARTBLOCK &&
(create || !ISUNWRITTEN(&imap))) {
+ if (create && direct && !is_cow) {
+ error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
+ &imap);
+ if (error)
+ return error;
+ }
+
xfs_map_buffer(inode, bh_result, &imap, offset);
if (ISUNWRITTEN(&imap))
set_buffer_unwritten(bh_result);
if (dax_fault)
ASSERT(!ISUNWRITTEN(&imap));
else
- xfs_map_direct(inode, bh_result, &imap, offset);
+ xfs_map_direct(inode, bh_result, &imap, offset,
+ is_cow);
}
}
trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
error = xfs_iomap_write_unwritten(ip, offset, size);
- } else if (flags & XFS_DIO_FLAG_APPEND) {
+ }
+ if (flags & XFS_DIO_FLAG_COW)
+ error = xfs_reflink_end_cow(ip, offset, size);
+ if (flags & XFS_DIO_FLAG_APPEND) {
trace_xfs_end_io_direct_write_append(ip, offset, size);
error = xfs_setfilesize(ip, offset, size);
#include "xfs_icache.h"
#include "xfs_pnfs.h"
#include "xfs_iomap.h"
+#include "xfs_reflink.h"
#include <linux/dcache.h>
#include <linux/falloc.h>
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
+ /* If this is a block-aligned directio CoW, remap immediately. */
+ if (xfs_is_reflink_inode(ip) && !unaligned_io) {
+ ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count);
+ if (ret)
+ goto out;
+ }
+
data = *from;
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
xfs_get_blocks_direct, xfs_end_io_direct_write,
if (IS_DAX(inode))
ret = xfs_file_dax_write(iocb, from);
- else if (iocb->ki_flags & IOCB_DIRECT)
+ else if (iocb->ki_flags & IOCB_DIRECT) {
+ /*
+ * Allow a directio write to fall back to a buffered
+ * write *only* in the case that we're doing a reflink
+ * CoW. In all other directio scenarios we do not
+ * allow an operation to fall back to buffered mode.
+ */
ret = xfs_file_dio_aio_write(iocb, from);
- else
+ if (ret == -EREMCHG)
+ goto buffered;
+ } else {
+buffered:
ret = xfs_file_buffered_aio_write(iocb, from);
+ }
if (ret > 0) {
XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
__xfs_reflink_reserve_cow(
struct xfs_inode *ip,
xfs_fileoff_t *offset_fsb,
- xfs_fileoff_t end_fsb)
+ xfs_fileoff_t end_fsb,
+ bool *skipped)
{
struct xfs_bmbt_irec got, prev, imap;
xfs_fileoff_t orig_end_fsb;
end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
/* Not shared? Just report the (potentially capped) extent. */
- if (!shared)
+ if (!shared) {
+ *skipped = true;
goto done;
+ }
/*
* Fork all the shared blocks from our write offset until the end of
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb, end_fsb;
+ bool skipped = false;
int error;
trace_xfs_reflink_reserve_cow_range(ip, offset, count);
xfs_ilock(ip, XFS_ILOCK_EXCL);
while (offset_fsb < end_fsb) {
- error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb);
+ error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
+ &skipped);
if (error) {
trace_xfs_reflink_reserve_cow_range_error(ip, error,
_RET_IP_);
return error;
}
+/* Allocate all CoW reservations covering a range of blocks in a file. */
+static int
+__xfs_reflink_allocate_cow(
+ struct xfs_inode *ip,
+ xfs_fileoff_t *offset_fsb,
+ xfs_fileoff_t end_fsb)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_bmbt_irec imap;
+ struct xfs_defer_ops dfops;
+ struct xfs_trans *tp;
+ xfs_fsblock_t first_block;
+ xfs_fileoff_t next_fsb;
+ int nimaps = 1, error;
+ bool skipped = false;
+
+ xfs_defer_init(&dfops, &first_block);
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0,
+ XFS_TRANS_RESERVE, &tp);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ next_fsb = *offset_fsb;
+ error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
+ if (error)
+ goto out_trans_cancel;
+
+ if (skipped) {
+ *offset_fsb = next_fsb;
+ goto out_trans_cancel;
+ }
+
+ xfs_trans_ijoin(tp, ip, 0);
+ error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
+ XFS_BMAPI_COWFORK, &first_block,
+ XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
+ &imap, &nimaps, &dfops);
+ if (error)
+ goto out_trans_cancel;
+
+ /* We might not have been able to map the whole delalloc extent */
+ *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
+
+ error = xfs_defer_finish(&tp, &dfops, NULL);
+ if (error)
+ goto out_trans_cancel;
+
+ error = xfs_trans_commit(tp);
+
+out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+out_trans_cancel:
+ xfs_defer_cancel(&dfops);
+ xfs_trans_cancel(tp);
+ goto out_unlock;
+}
+
+/* Allocate all CoW reservations covering a part of a file. */
+int
+xfs_reflink_allocate_cow_range(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t count)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
+ int error;
+
+ ASSERT(xfs_is_reflink_inode(ip));
+
+ trace_xfs_reflink_allocate_cow_range(ip, offset, count);
+
+ /*
+ * Make sure that the dquots are there.
+ */
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
+ return error;
+
+ while (offset_fsb < end_fsb) {
+ error = __xfs_reflink_allocate_cow(ip, &offset_fsb, end_fsb);
+ if (error) {
+ trace_xfs_reflink_allocate_cow_range_error(ip, error,
+ _RET_IP_);
+ break;
+ }
+ }
+
+ return error;
+}
+
/*
* Find the CoW reservation (and whether or not it needs block allocation)
* for a given byte offset of a file.
extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
+ xfs_off_t offset, xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
struct xfs_bmbt_irec *imap, bool *need_alloc);
extern int xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_allocate_cow_extent);
DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);