/* * VBR: save parent version in reply and child version getting by its name. * Version of child is getting and checking during its lookup. If */ static int mdt_reint_unlink(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); struct md_attr *ma = &info->mti_attr; struct lu_fid *child_fid = &info->mti_tmp_fid1; struct mdt_object *mp; struct mdt_object *mc; struct mdt_lock_handle *parent_lh; struct mdt_lock_handle *child_lh; struct lu_name *lname; int rc; int no_name = 0; ENTRY; DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1), rr->rr_name); if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) RETURN(err_serious(-ENOENT)); if (fid_is_obf(rr->rr_fid1) || fid_is_dot_lustre(rr->rr_fid1)) RETURN(-EPERM); /* * step 1: Found the parent. */ mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1); if (IS_ERR(mp)) { rc = PTR_ERR(mp); GOTO(out, rc); } parent_lh = &info->mti_lh[MDT_LH_PARENT]; lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); if (mdt_object_remote(mp)) { mdt_lock_reg_init(parent_lh, LCK_EX); rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh, parent_lh->mlh_rreg_mode, MDS_INODELOCK_UPDATE); if (rc != ELDLM_OK) GOTO(put_parent, rc); } else { mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name, rr->rr_namelen); rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE, MDT_LOCAL_LOCK); if (rc) GOTO(put_parent, rc); rc = mdt_version_get_check_save(info, mp, 0); if (rc) GOTO(unlock_parent, rc); } /* step 2: find & lock the child */ /* lookup child object along with version checking */ fid_zero(child_fid); rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1); if (rc != 0) { /* Name might not be able to find during resend of * remote unlink, considering following case. * dir_A is a remote directory, the name entry of * dir_A is on MDT0, the directory is on MDT1, * * 1. client sends unlink req to MDT1. * 2. MDT1 sends name delete update to MDT0. * 3. name entry is being deleted in MDT0 synchronously. * 4. MDT1 is restarted. * 5. client resends unlink req to MDT1. So it can not * find the name entry on MDT0 anymore. * In this case, MDT1 only needs to destory the local * directory. * */ if (mdt_object_remote(mp) && rc == -ENOENT && !fid_is_zero(rr->rr_fid2) && lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { no_name = 1; *child_fid = *rr->rr_fid2; } else { GOTO(unlock_parent, rc); } } if (fid_is_obf(child_fid) || fid_is_dot_lustre(child_fid)) GOTO(unlock_parent, rc = -EPERM); mdt_reint_init_ma(info, ma); /* We will lock the child regardless it is local or remote. No harm. */ mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid); if (IS_ERR(mc)) GOTO(unlock_parent, rc = PTR_ERR(mc)); child_lh = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_reg_init(child_lh, LCK_EX); if (mdt_object_remote(mc)) { struct mdt_body *repbody; if (!fid_is_zero(rr->rr_fid2)) { CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, PFID(mdt_object_fid(mc))); GOTO(put_child, rc = -ENOENT); } CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, PFID(mdt_object_fid(mc))); if (!mdt_is_dne_client(req->rq_export)) /* Return -EIO for old client */ GOTO(put_child, rc = -EIO); if (info->mti_spec.sp_rm_entry) { struct lu_ucred *uc = mdt_ucred(info); if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) { CERROR("%s: unlink remote entry is only " "permitted for administrator: rc = %d\n", mdt_obd_name(info->mti_mdt), -EPERM); GOTO(put_child, rc = -EPERM); } ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), NULL, lname, ma, no_name); GOTO(put_child, rc); } /* Revoke the LOOKUP lock of the remote object granted by * this MDT. Since the unlink will happen on another MDT, * it will release the LOOKUP lock right away. Then What * would happen if another client try to grab the LOOKUP * lock at the same time with unlink XXX */ mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP, MDT_CROSS_LOCK); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); LASSERT(repbody != NULL); repbody->fid1 = *mdt_object_fid(mc); repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); GOTO(unlock_child, rc = -EREMOTE); } else if (info->mti_spec.sp_rm_entry) { rc = -EPERM; CDEBUG(D_INFO, "%s: no rm_entry on local dir '%s': rc = %d\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, rc); GOTO(put_child, rc); } /* We used to acquire MDS_INODELOCK_FULL here but we can't do * this now because a running HSM restore on the child (unlink * victim) will hold the layout lock. See LU-4002. */ rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE, MDT_CROSS_LOCK); if (rc != 0) GOTO(put_child, rc); mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_UNLINK_WRITE); /* save version when object is locked */ mdt_version_get_save(info, mc, 1); /* * Now we can only make sure we need MA_INODE, in mdd layer, will check * whether need MA_LOV and MA_COOKIE. */ ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); mutex_lock(&mc->mot_lov_mutex); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), mdt_object_child(mc), lname, ma, no_name); mutex_unlock(&mc->mot_lov_mutex); if (rc == 0 && !lu_object_is_dying(&mc->mot_header)) rc = mdt_attr_get_complex(info, mc, ma); if (rc == 0) mdt_handle_last_unlink(info, mc, ma); if (ma->ma_valid & MA_INODE) { switch (ma->ma_attr.la_mode & S_IFMT) { case S_IFDIR: mdt_counter_incr(req, LPROC_MDT_RMDIR); break; case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: mdt_counter_incr(req, LPROC_MDT_UNLINK); break; default: LASSERTF(0, "bad file type %o unlinking\n", ma->ma_attr.la_mode); } } EXIT; unlock_child: mdt_object_unlock(info, mc, child_lh, rc); put_child: mdt_object_put(info->mti_env, mc); unlock_parent: mdt_object_unlock(info, mp, parent_lh, rc); put_parent: mdt_object_put(info->mti_env, mp); out: return rc; }
/* * VBR: save parent version in reply and child version getting by its name. * Version of child is getting and checking during its lookup. If */ static int mdt_reint_unlink(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); struct md_attr *ma = &info->mti_attr; struct lu_fid *child_fid = &info->mti_tmp_fid1; struct mdt_object *mp; struct mdt_object *mc; struct mdt_lock_handle *parent_lh; struct mdt_lock_handle *child_lh; struct lu_name *lname; int rc; ENTRY; DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1), rr->rr_name); if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) RETURN(err_serious(-ENOENT)); /* * step 1: Found the parent. */ mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1); if (IS_ERR(mp)) { rc = PTR_ERR(mp); GOTO(out, rc); } if (mdt_object_obf(mp)) GOTO(put_parent, rc = -EPERM); parent_lh = &info->mti_lh[MDT_LH_PARENT]; lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); if (mdt_object_remote(mp)) { mdt_lock_reg_init(parent_lh, LCK_EX); rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh, parent_lh->mlh_rreg_mode, MDS_INODELOCK_UPDATE); if (rc != ELDLM_OK) GOTO(put_parent, rc); } else { mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name, rr->rr_namelen); rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE, MDT_LOCAL_LOCK); if (rc) GOTO(put_parent, rc); rc = mdt_version_get_check_save(info, mp, 0); if (rc) GOTO(unlock_parent, rc); } /* step 2: find & lock the child */ /* lookup child object along with version checking */ fid_zero(child_fid); rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1); if (rc != 0) GOTO(unlock_parent, rc); mdt_reint_init_ma(info, ma); /* We will lock the child regardless it is local or remote. No harm. */ mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid); if (IS_ERR(mc)) GOTO(unlock_parent, rc = PTR_ERR(mc)); child_lh = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_reg_init(child_lh, LCK_EX); if (mdt_object_remote(mc)) { struct mdt_body *repbody; if (!fid_is_zero(rr->rr_fid2)) { CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name, PFID(mdt_object_fid(mc))); GOTO(unlock_parent, rc = -ENOENT); } CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name, PFID(mdt_object_fid(mc))); if (info->mti_spec.sp_rm_entry) { struct lu_ucred *uc = mdt_ucred(info); if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) { CERROR("%s: unlink remote entry is only " "permitted for administrator: rc = %d\n", mdt2obd_dev(info->mti_mdt)->obd_name, -EPERM); GOTO(unlock_parent, rc = -EPERM); } ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), NULL, lname, ma); mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc); } /* Revoke the LOOKUP lock of the remote object granted by * this MDT. Since the unlink will happen on another MDT, * it will release the LOOKUP lock right away. Then What * would happen if another client try to grab the LOOKUP * lock at the same time with unlink XXX */ mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP, MDT_CROSS_LOCK); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); LASSERT(repbody != NULL); repbody->fid1 = *mdt_object_fid(mc); repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); mdt_object_unlock_put(info, mc, child_lh, rc); GOTO(unlock_parent, rc = -EREMOTE); } else if (info->mti_spec.sp_rm_entry) { CERROR("%s: lfs rmdir should not be used on local dir %s\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name); mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc = -EPERM); } rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL, MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc); } mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_UNLINK_WRITE); /* save version when object is locked */ mdt_version_get_save(info, mc, 1); /* * Now we can only make sure we need MA_INODE, in mdd layer, will check * whether need MA_LOV and MA_COOKIE. */ ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), mdt_object_child(mc), lname, ma); if (rc == 0 && !lu_object_is_dying(&mc->mot_header)) rc = mdt_attr_get_complex(info, mc, ma); if (rc == 0) mdt_handle_last_unlink(info, mc, ma); if (ma->ma_valid & MA_INODE) { switch (ma->ma_attr.la_mode & S_IFMT) { case S_IFDIR: mdt_counter_incr(req, LPROC_MDT_RMDIR); break; case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: mdt_counter_incr(req, LPROC_MDT_UNLINK); break; default: LASSERTF(0, "bad file type %o unlinking\n", ma->ma_attr.la_mode); } } EXIT; mdt_object_unlock_put(info, mc, child_lh, rc); unlock_parent: mdt_object_unlock(info, mp, parent_lh, rc); put_parent: mdt_object_put(info->mti_env, mp); out: return rc; }
/* * VBR: save parent version in reply and child version getting by its name. * Version of child is getting and checking during its lookup. If */ static int mdt_reint_unlink(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); struct md_attr *ma = &info->mti_attr; struct lu_fid *child_fid = &info->mti_tmp_fid1; struct mdt_object *mp; struct mdt_object *mc; struct mdt_lock_handle *parent_lh; struct mdt_lock_handle *child_lh; struct lu_name *lname; int rc; ENTRY; DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1), rr->rr_name); if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) RETURN(err_serious(-ENOENT)); /* * step 1: lock the parent. Note, this may be child in case of * remote operation denoted by ->mti_cross_ref flag. */ parent_lh = &info->mti_lh[MDT_LH_PARENT]; if (info->mti_cross_ref) { /* * Init reg lock for cross ref case when we need to do only * ref del locally. */ mdt_lock_reg_init(parent_lh, LCK_PW); } else { mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name, rr->rr_namelen); } mp = mdt_object_find_lock(info, rr->rr_fid1, parent_lh, MDS_INODELOCK_UPDATE, MDT_OBJ_MUST_EXIST); if (IS_ERR(mp)) { rc = PTR_ERR(mp); /* errors are possible here in cross-ref cases, see below */ if (info->mti_cross_ref) rc = 0; GOTO(out, rc); } rc = mdt_version_get_check_save(info, mp, 0); if (rc) GOTO(out_unlock_parent, rc); mdt_reint_init_ma(info, ma); if (!ma->ma_lmm || !ma->ma_cookie) GOTO(out_unlock_parent, rc = -EINVAL); if (info->mti_cross_ref) { /* * Remote partial operation. It is possible that replay may * happen on parent MDT and this operation will be repeated. * Therefore the object absense is allowed case and nothing * should be done here. */ if (mdt_object_exists(mp) > 0) { mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA); rc = mo_ref_del(info->mti_env, mdt_object_child(mp), ma); if (rc == 0) mdt_handle_last_unlink(info, mp, ma); } else rc = 0; GOTO(out_unlock_parent, rc); } /* step 2: find & lock the child */ lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); /* lookup child object along with version checking */ rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1); if (rc != 0) GOTO(out_unlock_parent, rc); /* We will lock the child regardless it is local or remote. No harm. */ mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid, MDT_OBJ_MUST_EXIST); if (IS_ERR(mc)) GOTO(out_unlock_parent, rc = PTR_ERR(mc)); child_lh = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_reg_init(child_lh, LCK_EX); rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL, MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, mc); GOTO(out_unlock_parent, rc); } mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_UNLINK_WRITE); /* save version when object is locked */ mdt_version_get_save(info, mc, 1); /* * Now we can only make sure we need MA_INODE, in mdd layer, will check * whether need MA_LOV and MA_COOKIE. */ ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), mdt_object_child(mc), lname, ma); if (rc == 0) mdt_handle_last_unlink(info, mc, ma); if (ma->ma_valid & MA_INODE) { switch (ma->ma_attr.la_mode & S_IFMT) { case S_IFDIR: mdt_counter_incr(req->rq_export, LPROC_MDT_RMDIR); break; case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: mdt_counter_incr(req->rq_export, LPROC_MDT_UNLINK); break; default: LASSERTF(0, "bad file type %o unlinking\n", ma->ma_attr.la_mode); } } EXIT; mdt_object_unlock_put(info, mc, child_lh, rc); out_unlock_parent: mdt_object_unlock_put(info, mp, parent_lh, rc); out: return rc; }