/* OBD_MD_FLRMTRGETFACL (0x0008000000000000ULL) lfs rgetfacl, obsolete */
#define OBD_MD_FLDATAVERSION (0x0010000000000000ULL) /* iversion sum */
-#define OBD_MD_FLRELEASED (0x0020000000000000ULL) /* file released */
+#define OBD_MD_CLOSE_INTENT_EXECED (0x0020000000000000ULL) /* close intent
+ * executed
+ */
#define OBD_MD_DEFAULT_MEA (0x0040000000000000ULL) /* default MEA */
MDS_OWNEROVERRIDE = 1 << 11,
MDS_HSM_RELEASE = 1 << 12,
MDS_RENAME_MIGRATE = BIT(13),
+ MDS_CLOSE_LAYOUT_SWAP = BIT(14),
};
/* instance of mdt_reint_rec */
#define SWAP_LAYOUTS_CHECK_DV2 (1 << 1)
#define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
#define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
+#define SWAP_LAYOUTS_CLOSE BIT(4)
/* Swap XATTR_NAME_HSM as well, only on the MDT so far */
#define SWAP_LAYOUTS_MDS_HSM (1 << 31)
*/
extern struct req_format RQF_MDS_GETATTR_NAME;
extern struct req_format RQF_MDS_CLOSE;
-extern struct req_format RQF_MDS_RELEASE_CLOSE;
+extern struct req_format RQF_MDS_INTENT_CLOSE;
extern struct req_format RQF_MDS_CONNECT;
extern struct req_format RQF_MDS_DISCONNECT;
extern struct req_format RQF_MDS_GET_INFO;
0, 0, LUSTRE_OPC_ANY, NULL);
}
+/**
+ * Perform a close, possibly with a bias.
+ * The meaning of "data" depends on the value of "bias".
+ *
+ * If \a bias is MDS_HSM_RELEASE then \a data is a pointer to the data version.
+ * If \a bias is MDS_CLOSE_LAYOUT_SWAP then \a data is a pointer to the inode to
+ * swap layouts with.
+ */
static int ll_close_inode_openhandle(struct obd_export *md_exp,
- struct inode *inode,
struct obd_client_handle *och,
- const __u64 *data_version)
+ struct inode *inode,
+ enum mds_op_bias bias,
+ void *data)
{
struct obd_export *exp = ll_i2mdexp(inode);
struct md_op_data *op_data;
}
ll_prepare_close(inode, op_data, och);
- if (data_version) {
- /* Pass in data_version implies release. */
+ switch (bias) {
+ case MDS_CLOSE_LAYOUT_SWAP:
+ LASSERT(data);
+ op_data->op_bias |= MDS_CLOSE_LAYOUT_SWAP;
+ op_data->op_data_version = 0;
+ op_data->op_lease_handle = och->och_lease_handle;
+ op_data->op_fid2 = *ll_inode2fid(data);
+ break;
+
+ case MDS_HSM_RELEASE:
+ LASSERT(data);
op_data->op_bias |= MDS_HSM_RELEASE;
- op_data->op_data_version = *data_version;
+ op_data->op_data_version = *(__u64 *)data;
op_data->op_lease_handle = och->och_lease_handle;
op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+ break;
+
+ default:
+ LASSERT(!data);
+ break;
}
rc = md_close(md_exp, op_data, och->och_mod, &req);
spin_unlock(&lli->lli_lock);
}
- if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
+ if (op_data->op_bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP) &&
+ !rc) {
struct mdt_body *body;
body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
- if (!(body->mbo_valid & OBD_MD_FLRELEASED))
+ if (!(body->mbo_valid & OBD_MD_CLOSE_INTENT_EXECED))
rc = -EBUSY;
}
* be closed.
*/
rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
- inode, och, NULL);
+ och, inode, 0, NULL);
}
return rc;
}
if (fd->fd_och) {
- rc = ll_close_inode_openhandle(md_exp, inode, fd->fd_och, NULL);
+ rc = ll_close_inode_openhandle(md_exp, fd->fd_och, inode, 0,
+ NULL);
fd->fd_och = NULL;
goto out;
}
it.it_lock_mode = 0;
och->och_lease_handle.cookie = 0ULL;
}
- rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, inode, och, NULL);
+ rc2 = ll_close_inode_openhandle(sbi->ll_md_exp, och, inode, 0, NULL);
if (rc2 < 0)
CERROR("%s: error closing file "DFID": %d\n",
ll_get_fsname(inode->i_sb, NULL, 0),
return ERR_PTR(rc);
}
+/**
+ * Check whether a layout swap can be done between two inodes.
+ *
+ * \param[in] inode1 First inode to check
+ * \param[in] inode2 Second inode to check
+ *
+ * \retval 0 on success, layout swap can be performed between both inodes
+ * \retval negative error code if requirements are not met
+ */
+static int ll_check_swap_layouts_validity(struct inode *inode1,
+ struct inode *inode2)
+{
+ if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
+ return -EINVAL;
+
+ if (inode_permission(inode1, MAY_WRITE) ||
+ inode_permission(inode2, MAY_WRITE))
+ return -EPERM;
+
+ if (inode1->i_sb != inode2->i_sb)
+ return -EXDEV;
+
+ return 0;
+}
+
+static int ll_swap_layouts_close(struct obd_client_handle *och,
+ struct inode *inode, struct inode *inode2)
+{
+ const struct lu_fid *fid1 = ll_inode2fid(inode);
+ const struct lu_fid *fid2;
+ int rc;
+
+ CDEBUG(D_INODE, "%s: biased close of file " DFID "\n",
+ ll_get_fsname(inode->i_sb, NULL, 0), PFID(fid1));
+
+ rc = ll_check_swap_layouts_validity(inode, inode2);
+ if (rc < 0)
+ goto out_free_och;
+
+ /* We now know that inode2 is a lustre inode */
+ fid2 = ll_inode2fid(inode2);
+
+ rc = lu_fid_cmp(fid1, fid2);
+ if (!rc) {
+ rc = -EINVAL;
+ goto out_free_och;
+ }
+
+ /*
+ * Close the file and swap layouts between inode & inode2.
+ * NB: lease lock handle is released in mdc_close_layout_swap_pack()
+ * because we still need it to pack l_remote_handle to MDT.
+ */
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
+ MDS_CLOSE_LAYOUT_SWAP, inode2);
+
+ och = NULL; /* freed in ll_close_inode_openhandle() */
+
+out_free_och:
+ kfree(och);
+ return rc;
+}
+
/**
* Release lease and close the file.
* It will check if the lease has ever broken.
*lease_broken = cancelled;
return ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
- inode, och, NULL);
+ och, inode, 0, NULL);
}
int ll_merge_attr(const struct lu_env *env, struct inode *inode)
range_locked = true;
}
- down_read(&lli->lli_trunc_sem);
ll_cl_add(file, env, io);
rc = cl_io_loop(env, io);
ll_cl_remove(file, env);
- up_read(&lli->lli_trunc_sem);
if (range_locked) {
CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
range.rl_node.in_extent.start,
ll_och_fill(ll_i2sbi(inode)->ll_md_exp, it, och);
rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
- inode, och, NULL);
+ och, inode, 0, NULL);
out:
/* this one is in place of ll_file_open */
if (it_disposition(it, DISP_ENQ_OPEN_REF)) {
* NB: lease lock handle is released in mdc_hsm_release_pack() because
* we still need it to pack l_remote_handle to MDT.
*/
- rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
- &data_version);
+ rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, och, inode,
+ MDS_HSM_RELEASE, &data_version);
och = NULL;
out:
}
struct ll_swap_stack {
- struct iattr ia1, ia2;
- __u64 dv1, dv2;
- struct inode *inode1, *inode2;
- bool check_dv1, check_dv2;
+ u64 dv1;
+ u64 dv2;
+ struct inode *inode1;
+ struct inode *inode2;
+ bool check_dv1;
+ bool check_dv2;
};
static int ll_swap_layouts(struct file *file1, struct file *file2,
llss->inode1 = file_inode(file1);
llss->inode2 = file_inode(file2);
- if (!S_ISREG(llss->inode2->i_mode)) {
- rc = -EINVAL;
- goto free;
- }
-
- if (inode_permission(llss->inode1, MAY_WRITE) ||
- inode_permission(llss->inode2, MAY_WRITE)) {
- rc = -EPERM;
- goto free;
- }
-
- if (llss->inode2->i_sb != llss->inode1->i_sb) {
- rc = -EXDEV;
+ rc = ll_check_swap_layouts_validity(llss->inode1, llss->inode2);
+ if (rc < 0)
goto free;
- }
/* we use 2 bool because it is easier to swap than 2 bits */
if (lsl->sl_flags & SWAP_LAYOUTS_CHECK_DV1)
llss->dv2 = lsl->sl_dv2;
rc = lu_fid_cmp(ll_inode2fid(llss->inode1), ll_inode2fid(llss->inode2));
- if (rc == 0) /* same file, done! */ {
- rc = 0;
+ if (!rc) /* same file, done! */
goto free;
- }
if (rc < 0) { /* sequentialize it */
swap(llss->inode1, llss->inode2);
}
}
- /* to be able to restore mtime and atime after swap
- * we need to first save them
- */
- if (lsl->sl_flags &
- (SWAP_LAYOUTS_KEEP_MTIME | SWAP_LAYOUTS_KEEP_ATIME)) {
- llss->ia1.ia_mtime = llss->inode1->i_mtime;
- llss->ia1.ia_atime = llss->inode1->i_atime;
- llss->ia1.ia_valid = ATTR_MTIME | ATTR_ATIME;
- llss->ia2.ia_mtime = llss->inode2->i_mtime;
- llss->ia2.ia_atime = llss->inode2->i_atime;
- llss->ia2.ia_valid = ATTR_MTIME | ATTR_ATIME;
- }
-
/* ultimate check, before swapping the layouts we check if
* dataversion has changed (if requested)
*/
ll_put_grouplock(llss->inode1, file1, gid);
}
- /* rc can be set from obd_iocontrol() or from a GOTO(putgl, ...) */
- if (rc != 0)
- goto free;
-
- /* clear useless flags */
- if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_MTIME)) {
- llss->ia1.ia_valid &= ~ATTR_MTIME;
- llss->ia2.ia_valid &= ~ATTR_MTIME;
- }
-
- if (!(lsl->sl_flags & SWAP_LAYOUTS_KEEP_ATIME)) {
- llss->ia1.ia_valid &= ~ATTR_ATIME;
- llss->ia2.ia_valid &= ~ATTR_ATIME;
- }
-
- /* update time if requested */
- rc = 0;
- if (llss->ia2.ia_valid != 0) {
- inode_lock(llss->inode1);
- rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
- inode_unlock(llss->inode1);
- }
-
- if (llss->ia1.ia_valid != 0) {
- int rc1;
-
- inode_lock(llss->inode2);
- rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
- inode_unlock(llss->inode2);
- if (rc == 0)
- rc = rc1;
- }
-
free:
kfree(llss);
sizeof(struct lustre_swap_layouts)))
return -EFAULT;
- if ((file->f_flags & O_ACCMODE) == 0) /* O_RDONLY */
+ if ((file->f_flags & O_ACCMODE) == O_RDONLY)
return -EPERM;
file2 = fget(lsl.sl_fd);
if (!file2)
return -EBADF;
- rc = -EPERM;
- if ((file2->f_flags & O_ACCMODE) != 0) /* O_WRONLY or O_RDWR */
+ /* O_WRONLY or O_RDWR */
+ if ((file2->f_flags & O_ACCMODE) == O_RDONLY) {
+ rc = -EPERM;
+ goto out;
+ }
+
+ if (lsl.sl_flags & SWAP_LAYOUTS_CLOSE) {
+ struct obd_client_handle *och = NULL;
+ struct ll_inode_info *lli;
+ struct inode *inode2;
+
+ if (lsl.sl_flags != SWAP_LAYOUTS_CLOSE) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ lli = ll_i2info(inode);
+ mutex_lock(&lli->lli_och_mutex);
+ if (fd->fd_lease_och) {
+ och = fd->fd_lease_och;
+ fd->fd_lease_och = NULL;
+ }
+ mutex_unlock(&lli->lli_och_mutex);
+ if (!och) {
+ rc = -ENOLCK;
+ goto out;
+ }
+ inode2 = file_inode(file2);
+ rc = ll_swap_layouts_close(och, inode, inode2);
+ } else {
rc = ll_swap_layouts(file, file2, &lsl);
+ }
+out:
fput(file2);
return rc;
}
* setting times to past, but it is necessary due to possible
* time de-synchronization between MDT inode and OST objects
*/
- if (attr->ia_valid & ATTR_SIZE)
- down_write(&lli->lli_trunc_sem);
rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, attr, 0);
- if (attr->ia_valid & ATTR_SIZE)
- up_write(&lli->lli_trunc_sem);
}
out:
if (op_data)
return result;
}
-static int vvp_io_setattr_trunc(const struct lu_env *env,
- const struct cl_io_slice *ios,
- struct inode *inode, loff_t size)
-{
- inode_dio_wait(inode);
- return 0;
-}
-
static int vvp_io_setattr_time(const struct lu_env *env,
const struct cl_io_slice *ios)
{
{
struct cl_io *io = ios->cis_io;
struct inode *inode = vvp_object_inode(io->ci_obj);
- int result = 0;
+ struct ll_inode_info *lli = ll_i2info(inode);
- inode_lock(inode);
- if (cl_io_is_trunc(io))
- result = vvp_io_setattr_trunc(env, ios, inode,
- io->u.ci_setattr.sa_attr.lvb_size);
- if (!result && io->u.ci_setattr.sa_valid & TIMES_SET_FLAGS)
- result = vvp_io_setattr_time(env, ios);
- return result;
+ if (cl_io_is_trunc(io)) {
+ down_write(&lli->lli_trunc_sem);
+ inode_lock(inode);
+ inode_dio_wait(inode);
+ } else {
+ inode_lock(inode);
+ }
+
+ if (io->u.ci_setattr.sa_valid & TIMES_SET_FLAGS)
+ return vvp_io_setattr_time(env, ios);
+
+ return 0;
}
static void vvp_io_setattr_end(const struct lu_env *env,
{
struct cl_io *io = ios->cis_io;
struct inode *inode = vvp_object_inode(io->ci_obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
- if (cl_io_is_trunc(io))
+ if (cl_io_is_trunc(io)) {
/* Truncate in memory pages - they must be clean pages
* because osc has already notified to destroy osc_extents.
*/
vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
-
- inode_unlock(inode);
+ inode_unlock(inode);
+ up_write(&lli->lli_trunc_sem);
+ } else {
+ inode_unlock(inode);
+ }
}
static void vvp_io_setattr_fini(const struct lu_env *env,
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
struct inode *inode = vvp_object_inode(obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
struct file *file = vio->vui_fd->fd_file;
int result;
CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
+ down_read(&lli->lli_trunc_sem);
+
if (!can_populate_pages(env, io, inode))
return 0;
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
struct inode *inode = vvp_object_inode(obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
ssize_t result = 0;
loff_t pos = io->u.ci_wr.wr.crw_pos;
size_t cnt = io->u.ci_wr.wr.crw_count;
+ down_read(&lli->lli_trunc_sem);
+
if (!can_populate_pages(env, io, inode))
return 0;
return result;
}
+static void vvp_io_rw_end(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct inode *inode = vvp_object_inode(ios->cis_obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ up_read(&lli->lli_trunc_sem);
+}
+
static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
{
struct vm_fault *vmf = cfio->ft_vmf;
struct cl_io *io = ios->cis_io;
struct cl_object *obj = io->ci_obj;
struct inode *inode = vvp_object_inode(obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
struct cl_fault_io *fio = &io->u.ci_fault;
struct vvp_fault_io *cfio = &vio->u.fault;
loff_t offset;
" changed while waiting for the page fault lock\n",
PFID(lu_object_fid(&obj->co_lu)));
+ down_read(&lli->lli_trunc_sem);
+
/* offset of the last byte on the page */
offset = cl_offset(obj, fio->ft_index + 1) - 1;
LASSERT(cl_index(obj, offset) == fio->ft_index);
return result;
}
+static void vvp_io_fault_end(const struct lu_env *env,
+ const struct cl_io_slice *ios)
+{
+ struct inode *inode = vvp_object_inode(ios->cis_obj);
+ struct ll_inode_info *lli = ll_i2info(inode);
+
+ CLOBINVRNT(env, ios->cis_io->ci_obj,
+ vvp_object_invariant(ios->cis_io->ci_obj));
+ up_read(&lli->lli_trunc_sem);
+}
+
static int vvp_io_fsync_start(const struct lu_env *env,
const struct cl_io_slice *ios)
{
return result;
}
-static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
-{
- CLOBINVRNT(env, ios->cis_io->ci_obj,
- vvp_object_invariant(ios->cis_io->ci_obj));
-}
-
static const struct cl_io_operations vvp_io_ops = {
.op = {
[CIT_READ] = {
.cio_fini = vvp_io_fini,
.cio_lock = vvp_io_read_lock,
.cio_start = vvp_io_read_start,
+ .cio_end = vvp_io_rw_end,
.cio_advance = vvp_io_advance,
},
[CIT_WRITE] = {
.cio_iter_fini = vvp_io_write_iter_fini,
.cio_lock = vvp_io_write_lock,
.cio_start = vvp_io_write_start,
+ .cio_end = vvp_io_rw_end,
.cio_advance = vvp_io_advance,
},
[CIT_SETATTR] = {
.cio_iter_init = vvp_io_fault_iter_init,
.cio_lock = vvp_io_fault_lock,
.cio_start = vvp_io_fault_start,
- .cio_end = vvp_io_end,
+ .cio_end = vvp_io_fault_end,
},
[CIT_FSYNC] = {
.cio_start = vvp_io_fsync_start,
op_data->op_namelen);
}
-static void mdc_hsm_release_pack(struct ptlrpc_request *req,
- struct md_op_data *op_data)
+static void mdc_intent_close_pack(struct ptlrpc_request *req,
+ struct md_op_data *op_data)
{
- if (op_data->op_bias & MDS_HSM_RELEASE) {
- struct close_data *data;
- struct ldlm_lock *lock;
+ enum mds_op_bias bias = op_data->op_bias;
+ struct close_data *data;
+ struct ldlm_lock *lock;
- data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
+ if (!(bias & (MDS_HSM_RELEASE | MDS_CLOSE_LAYOUT_SWAP |
+ MDS_RENAME_MIGRATE)))
+ return;
- lock = ldlm_handle2lock(&op_data->op_lease_handle);
- if (lock) {
- data->cd_handle = lock->l_remote_handle;
- LDLM_LOCK_PUT(lock);
- }
- ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
+ data = req_capsule_client_get(&req->rq_pill, &RMF_CLOSE_DATA);
+ LASSERT(data);
- data->cd_data_version = op_data->op_data_version;
- data->cd_fid = op_data->op_fid2;
+ lock = ldlm_handle2lock(&op_data->op_lease_handle);
+ if (lock) {
+ data->cd_handle = lock->l_remote_handle;
+ LDLM_LOCK_PUT(lock);
}
+ ldlm_cli_cancel(&op_data->op_lease_handle, LCF_LOCAL);
+
+ data->cd_data_version = op_data->op_data_version;
+ data->cd_fid = op_data->op_fid2;
}
void mdc_close_pack(struct ptlrpc_request *req, struct md_op_data *op_data)
rec->sa_valid &= ~MDS_ATTR_ATIME;
mdc_ioepoch_pack(epoch, op_data);
- mdc_hsm_release_pack(req, op_data);
+ mdc_intent_close_pack(req, op_data);
}
int rc;
int saved_rc = 0;
- req_fmt = &RQF_MDS_CLOSE;
if (op_data->op_bias & MDS_HSM_RELEASE) {
- req_fmt = &RQF_MDS_RELEASE_CLOSE;
+ req_fmt = &RQF_MDS_INTENT_CLOSE;
/* allocate a FID for volatile file */
rc = mdc_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
/* save the errcode and proceed to close */
saved_rc = rc;
}
+ } else if (op_data->op_bias & MDS_CLOSE_LAYOUT_SWAP) {
+ req_fmt = &RQF_MDS_INTENT_CLOSE;
+ } else {
+ req_fmt = &RQF_MDS_CLOSE;
}
*request = NULL;
&RMF_CAPA1
};
-static const struct req_msg_field *mdt_release_close_client[] = {
+static const struct req_msg_field *mdt_intent_close_client[] = {
&RMF_PTLRPC_BODY,
&RMF_MDT_EPOCH,
&RMF_REC_REINT,
&RQF_MDS_GETXATTR,
&RQF_MDS_SYNC,
&RQF_MDS_CLOSE,
- &RQF_MDS_RELEASE_CLOSE,
+ &RQF_MDS_INTENT_CLOSE,
&RQF_MDS_READPAGE,
&RQF_MDS_WRITEPAGE,
&RQF_MDS_REINT,
mdt_close_client, mds_last_unlink_server);
EXPORT_SYMBOL(RQF_MDS_CLOSE);
-struct req_format RQF_MDS_RELEASE_CLOSE =
+struct req_format RQF_MDS_INTENT_CLOSE =
DEFINE_REQ_FMT0("MDS_CLOSE",
- mdt_release_close_client, mds_last_unlink_server);
-EXPORT_SYMBOL(RQF_MDS_RELEASE_CLOSE);
+ mdt_intent_close_client, mds_last_unlink_server);
+EXPORT_SYMBOL(RQF_MDS_INTENT_CLOSE);
struct req_format RQF_MDS_READPAGE =
DEFINE_REQ_FMT0("MDS_READPAGE",