From c1b66fccf9866fe5938be1fea12643a45bef7640 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Tue, 16 Aug 2016 16:19:16 -0400 Subject: [PATCH] staging: lustre: fid: do open-by-fid by default Currently client open-by-fid often packs name into the request, but the name may be invalid, eg. NFS export, and even if it's valid, it may cause inconsistency because this operation is done on this fid, which is globally unique, but name not. Since open-by-fid doesn't pack name, for striped dir we can't know parent stripe fid on client, so we set parent fid the same as child fid, and MDT has to find its parent fid from linkea (this is already supported by MDT). M_CHECK_STALE becomes obsolete. Unset MDS_OPEN_FL_INTERNAL from open syscall flags, because these flags are internally used, and should not be set from user space. It's not necessary to store parent fid in lli_pfid, because MDT can get it's parent fid from linkea, and now that DNE stripe directory stores master inode fid in lli_pfid, stop storing parent fid to avoid conflict. Signed-off-by: Lai Siyao Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3544 Reviewed-on: http://review.whamcloud.com/7476 Reviewed-on: http://review.whamcloud.com/10692 Reviewed-by: Fan Yong Reviewed-by: Nathaniel Clark Reviewed-by: wangdi Reviewed-by: John L. Hammond Reviewed-by: Oleg Drokin Signed-off-by: James Simmons Signed-off-by: Greg Kroah-Hartman --- .../lustre/lustre/include/lustre/lustre_idl.h | 5 ++ .../lustre/lustre/include/lustre_lite.h | 1 - .../lustre/lustre/include/lustre_mds.h | 3 - drivers/staging/lustre/lustre/llite/file.c | 71 ++++++++----------- .../lustre/lustre/llite/llite_internal.h | 4 +- .../staging/lustre/lustre/llite/llite_lib.c | 17 +---- .../staging/lustre/lustre/llite/llite_nfs.c | 14 ++-- drivers/staging/lustre/lustre/llite/namei.c | 1 + .../staging/lustre/lustre/lmv/lmv_intent.c | 41 +++++------ .../staging/lustre/lustre/mdc/mdc_internal.h | 1 - drivers/staging/lustre/lustre/mdc/mdc_lib.c | 5 +- drivers/staging/lustre/lustre/mdc/mdc_locks.c | 21 ------ .../lustre/lustre/obdclass/lprocfs_status.c | 2 +- 13 files changed, 71 insertions(+), 115 deletions(-) diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h index 400ab3cd5877..a9661c0c6aa1 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h @@ -2252,6 +2252,11 @@ void lustre_swab_mdt_rec_setattr(struct mdt_rec_setattr *sa); */ #define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */ +#define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \ + MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \ + MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \ + MDS_OPEN_RELEASE) + enum mds_op_bias { MDS_CHECK_SPLIT = 1 << 0, MDS_CROSS_REF = 1 << 1, diff --git a/drivers/staging/lustre/lustre/include/lustre_lite.h b/drivers/staging/lustre/lustre/include/lustre_lite.h index b16897702559..a3d757348ec6 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lite.h +++ b/drivers/staging/lustre/lustre/include/lustre_lite.h @@ -42,7 +42,6 @@ #include "obd_class.h" #include "lustre_net.h" -#include "lustre_mds.h" #include "lustre_ha.h" /* 4UL * 1024 * 1024 */ diff --git a/drivers/staging/lustre/lustre/include/lustre_mds.h b/drivers/staging/lustre/lustre/include/lustre_mds.h index 4104bd9bd5c4..23a7e4f78e9a 100644 --- a/drivers/staging/lustre/lustre/include/lustre_mds.h +++ b/drivers/staging/lustre/lustre/include/lustre_mds.h @@ -58,9 +58,6 @@ struct mds_group_info { #define MDD_OBD_NAME "mdd_obd" #define MDD_OBD_UUID "mdd_obd_uuid" -/* these are local flags, used only on the client, private */ -#define M_CHECK_STALE 0200000000 - /** @} mds */ #endif diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 563cdf65324e..015b0ab23bbd 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -379,53 +379,35 @@ int ll_file_release(struct inode *inode, struct file *file) return rc; } -static int ll_intent_file_open(struct dentry *dentry, void *lmm, - int lmmsize, struct lookup_intent *itp) +static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize, + struct lookup_intent *itp) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(de); struct ll_sb_info *sbi = ll_i2sbi(inode); - struct dentry *parent = dentry->d_parent; - const char *name = dentry->d_name.name; - const int len = dentry->d_name.len; + struct dentry *parent = de->d_parent; + const char *name = NULL; struct md_op_data *op_data; struct ptlrpc_request *req; - __u32 opc = LUSTRE_OPC_ANY; - int rc; + int len = 0, rc; - /* Usually we come here only for NFSD, and we want open lock. */ - /* We can also get here if there was cached open handle in revalidate_it - * but it disappeared while we were getting from there to ll_file_open. - * But this means this file was closed and immediately opened which - * makes a good candidate for using OPEN lock - */ - /* If lmmsize & lmm are not 0, we are just setting stripe info - * parameters. No need for the open lock + LASSERT(parent); + LASSERT(itp->it_flags & MDS_OPEN_BY_FID); + + /* + * if server supports open-by-fid, or file name is invalid, don't pack + * name in open request */ - if (!lmm && lmmsize == 0) { - struct ll_dentry_data *ldd = ll_d2d(dentry); - /* - * If we came via ll_iget_for_nfs, then we need to request - * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry); - * - * NB: when ldd is NULL, it must have come via normal - * lookup path only, since ll_iget_for_nfs always calls - * ll_d_init(). - */ - if (ldd && ldd->lld_nfs_dentry) { - ldd->lld_nfs_dentry = 0; - itp->it_flags |= MDS_OPEN_LOCK; - } - if (itp->it_flags & FMODE_WRITE) - opc = LUSTRE_OPC_CREATE; + if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) && + lu_name_is_valid_2(de->d_name.name, de->d_name.len)) { + name = de->d_name.name; + len = de->d_name.len; } - op_data = ll_prep_md_op_data(NULL, d_inode(parent), - inode, name, len, - O_RDWR, opc, NULL); + op_data = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len, + O_RDWR, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) return PTR_ERR(op_data); - itp->it_flags |= MDS_OPEN_BY_FID; rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp, 0 /*unused */, &req, ll_md_blocking_ast, 0); ll_finish_md_op_data(op_data); @@ -655,9 +637,19 @@ restart: * result in a deadlock */ mutex_unlock(&lli->lli_och_mutex); - it->it_create_mode |= M_CHECK_STALE; + /* + * Normally called under two situations: + * 1. NFS export. + * 2. revalidate with IT_OPEN (revalidate doesn't + * execute this intent any more). + * + * Always fetch MDS_OPEN_LOCK if this is not setstripe. + * + * Always specify MDS_OPEN_BY_FID because we don't want + * to get file with different fid. + */ + it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID; rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it); - it->it_create_mode &= ~M_CHECK_STALE; if (rc) goto out_openerr; @@ -1399,6 +1391,7 @@ int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry, } ll_inode_size_lock(inode); + oit.it_flags |= MDS_OPEN_BY_FID; rc = ll_intent_file_open(dentry, lum, lum_size, &oit); if (rc) goto out_unlock; @@ -3066,7 +3059,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) if (IS_ERR(op_data)) return PTR_ERR(op_data); - oit.it_create_mode |= M_CHECK_STALE; rc = md_intent_lock(exp, op_data, NULL, 0, /* we are not interested in name * based lookup @@ -3074,7 +3066,6 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits) &oit, 0, &req, ll_md_blocking_ast, 0); ll_finish_md_op_data(op_data); - oit.it_create_mode &= ~M_CHECK_STALE; if (rc < 0) { rc = ll_inode_revalidate_fini(inode, rc); goto out; diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 43269aaa85fb..b4e843a941b1 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -118,9 +118,7 @@ struct ll_inode_info { /* identifying fields for both metadata and data stacks. */ struct lu_fid lli_fid; - /* Parent fid for accessing default stripe data on parent directory - * for allocating OST objects after a mknod() and later open-by-FID. - */ + /* master inode fid for stripe directory */ struct lu_fid lli_pfid; struct list_head lli_close_list; diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c index 5f6343acc227..da00fbd30721 100644 --- a/drivers/staging/lustre/lustre/llite/llite_lib.c +++ b/drivers/staging/lustre/lustre/llite/llite_lib.c @@ -189,7 +189,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE | OBD_CONNECT_FLOCK_DEAD | - OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK; + OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK | + OBD_CONNECT_OPEN_BY_FID; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; @@ -2364,20 +2365,6 @@ struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, op_data->op_mds = 0; op_data->op_data = data; - /* If the file is being opened after mknod() (normally due to NFS) - * try to use the default stripe data from parent directory for - * allocating OST objects. Try to pass the parent FID to MDS. - */ - if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) && - !ll_i2info(i2)->lli_has_smd) { - struct ll_inode_info *lli = ll_i2info(i2); - - spin_lock(&lli->lli_lock); - if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid))) - op_data->op_fid1 = lli->lli_pfid; - spin_unlock(&lli->lli_lock); - } - /* When called by ll_setattr_raw, file is i1. */ if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED) op_data->op_bias |= MDS_DATA_MODIFIED; diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c index ac96d897ab50..2b652407c239 100644 --- a/drivers/staging/lustre/lustre/llite/llite_nfs.c +++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c @@ -148,12 +148,18 @@ ll_iget_for_nfs(struct super_block *sb, struct lu_fid *fid, struct lu_fid *paren return ERR_PTR(-ESTALE); } + result = d_obtain_alias(inode); + if (IS_ERR(result)) { + iput(inode); + return result; + } + /** - * It is an anonymous dentry without OST objects created yet. - * We have to find the parent to tell MDS how to init lov objects. + * In case d_obtain_alias() found a disconnected dentry, always update + * lli_pfid to allow later operation (normally open) have parent fid, + * which may be used by MDS to create data. */ - if (S_ISREG(inode->i_mode) && !ll_i2info(inode)->lli_has_smd && - parent && !fid_is_zero(parent)) { + if (parent) { struct ll_inode_info *lli = ll_i2info(inode); spin_lock(&lli->lli_lock); diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c index ac0f44229a7b..ee5a42e5e95d 100644 --- a/drivers/staging/lustre/lustre/llite/namei.c +++ b/drivers/staging/lustre/lustre/llite/namei.c @@ -650,6 +650,7 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry, } it->it_create_mode = (mode & S_IALLUGO) | S_IFREG; it->it_flags = (open_flags & ~O_ACCMODE) | OPEN_FMODE(open_flags); + it->it_flags &= ~MDS_OPEN_FL_INTERNAL; /* Dentry added to dcache tree in ll_lookup_it */ de = ll_lookup_it(dir, dentry, it, lookup_flags); diff --git a/drivers/staging/lustre/lustre/lmv/lmv_intent.c b/drivers/staging/lustre/lustre/lmv/lmv_intent.c index 761ab248f855..cde1d7b6bfc2 100644 --- a/drivers/staging/lustre/lustre/lmv/lmv_intent.c +++ b/drivers/staging/lustre/lustre/lmv/lmv_intent.c @@ -111,10 +111,6 @@ static int lmv_intent_remote(struct obd_export *exp, void *lmm, */ LASSERT(it->it_op & IT_OPEN); op_data->op_fid2 = *parent_fid; - /* Add object FID to op_fid3, in case it needs to check stale - * (M_CHECK_STALE), see mdc_finish_intent_lock - */ - op_data->op_fid3 = body->mbo_fid1; } op_data->op_bias = MDS_CROSS_REF; @@ -313,17 +309,16 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, struct mdt_body *body; int rc; - if (it->it_flags & MDS_OPEN_BY_FID && fid_is_sane(&op_data->op_fid2)) { - if (op_data->op_mea1) { - struct lmv_stripe_md *lsm = op_data->op_mea1; - const struct lmv_oinfo *oinfo; + if (it->it_flags & MDS_OPEN_BY_FID) { + LASSERT(fid_is_sane(&op_data->op_fid2)); - oinfo = lsm_name_to_stripe_info(lsm, op_data->op_name, - op_data->op_namelen); - if (IS_ERR(oinfo)) - return PTR_ERR(oinfo); - op_data->op_fid1 = oinfo->lmo_fid; - } + /* + * for striped directory, we can't know parent stripe fid + * without name, but we can set it to child fid, and MDT + * will obtain it from linkea in open in such case. + */ + if (op_data->op_mea1) + op_data->op_fid1 = op_data->op_fid2; tgt = lmv_find_target(lmv, &op_data->op_fid2); if (IS_ERR(tgt)) @@ -331,6 +326,10 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, op_data->op_mds = tgt->ltd_idx; } else { + LASSERT(fid_is_sane(&op_data->op_fid1)); + LASSERT(fid_is_zero(&op_data->op_fid2)); + LASSERT(op_data->op_name); + tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); if (IS_ERR(tgt)) return PTR_ERR(tgt); @@ -339,13 +338,11 @@ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, /* If it is ready to open the file by FID, do not need * allocate FID at all, otherwise it will confuse MDT */ - if ((it->it_op & IT_CREAT) && - !(it->it_flags & MDS_OPEN_BY_FID)) { + if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) { /* - * For open with IT_CREATE and for IT_CREATE cases allocate new - * fid and setup FLD for it. + * For lookup(IT_CREATE) cases allocate new fid and setup FLD + * for it. */ - op_data->op_fid3 = op_data->op_fid2; rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); if (rc != 0) return rc; @@ -494,9 +491,9 @@ int lmv_intent_lock(struct obd_export *exp, struct md_op_data *op_data, LASSERT(fid_is_sane(&op_data->op_fid1)); - CDEBUG(D_INODE, "INTENT LOCK '%s' for '%*s' on "DFID"\n", - LL_IT2STR(it), op_data->op_namelen, op_data->op_name, - PFID(&op_data->op_fid1)); + CDEBUG(D_INODE, "INTENT LOCK '%s' for "DFID" '%*s' on "DFID"\n", + LL_IT2STR(it), PFID(&op_data->op_fid2), op_data->op_namelen, + op_data->op_name, PFID(&op_data->op_fid1)); rc = lmv_check_connect(obd); if (rc) diff --git a/drivers/staging/lustre/lustre/mdc/mdc_internal.h b/drivers/staging/lustre/lustre/mdc/mdc_internal.h index 00e8435b938f..1901b9339526 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_internal.h +++ b/drivers/staging/lustre/lustre/mdc/mdc_internal.h @@ -34,7 +34,6 @@ #define _MDC_INTERNAL_H #include "../include/lustre_mdc.h" -#include "../include/lustre_mds.h" void lprocfs_mdc_init_vars(struct lprocfs_static_vars *lvars); diff --git a/drivers/staging/lustre/lustre/mdc/mdc_lib.c b/drivers/staging/lustre/lustre/mdc/mdc_lib.c index 813f923b5785..aa496f3381db 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_lib.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_lib.c @@ -171,10 +171,7 @@ void mdc_create_pack(struct ptlrpc_request *req, struct md_op_data *op_data, static __u64 mds_pack_open_flags(__u64 flags, __u32 mode) { __u64 cr_flags = (flags & (FMODE_READ | FMODE_WRITE | - MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | - MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | - MDS_OPEN_BY_FID | MDS_OPEN_LEASE | - MDS_OPEN_RELEASE)); + MDS_OPEN_FL_INTERNAL)); if (flags & O_CREAT) cr_flags |= MDS_OPEN_CREAT; if (flags & O_EXCL) diff --git a/drivers/staging/lustre/lustre/mdc/mdc_locks.c b/drivers/staging/lustre/lustre/mdc/mdc_locks.c index fab83ddeef65..1c3b78d4dd4c 100644 --- a/drivers/staging/lustre/lustre/mdc/mdc_locks.c +++ b/drivers/staging/lustre/lustre/mdc/mdc_locks.c @@ -922,27 +922,6 @@ static int mdc_finish_intent_lock(struct obd_export *exp, mdt_body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); LASSERT(mdt_body); /* mdc_enqueue checked */ - /* If we were revalidating a fid/name pair, mark the intent in - * case we fail and get called again from lookup - */ - if (fid_is_sane(&op_data->op_fid2) && - it->it_create_mode & M_CHECK_STALE && - it->it_op != IT_GETATTR) { - /* Also: did we find the same inode? */ - /* sever can return one of two fids: - * op_fid2 - new allocated fid - if file is created. - * op_fid3 - existent fid - if file only open. - * op_fid3 is saved in lmv_intent_open - */ - if ((!lu_fid_eq(&op_data->op_fid2, &mdt_body->mbo_fid1)) && - (!lu_fid_eq(&op_data->op_fid3, &mdt_body->mbo_fid1))) { - CDEBUG(D_DENTRY, "Found stale data "DFID"("DFID")/"DFID - "\n", PFID(&op_data->op_fid2), - PFID(&op_data->op_fid2), PFID(&mdt_body->mbo_fid1)); - return -ESTALE; - } - } - rc = it_open_error(DISP_LOOKUP_EXECD, it); if (rc) return rc; diff --git a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c index f42ed17996d7..fbb08517d57a 100644 --- a/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c +++ b/drivers/staging/lustre/lustre/obdclass/lprocfs_status.c @@ -96,7 +96,7 @@ static const char * const obd_connect_names[] = { "pingless", "flock_deadlock", "disp_stripe", - "unknown", + "open_by_fid", "lfsck", "unknown", NULL -- 2.20.1