int mdd_local_file_create(const struct lu_env *env, struct mdd_device *mdd, const struct lu_fid *pfid, const char *name, __u32 mode, struct lu_fid *fid) { struct dt_object *parent, *dto; int rc; ENTRY; LASSERT(!fid_is_zero(pfid)); parent = dt_locate(env, mdd->mdd_bottom, pfid); if (unlikely(IS_ERR(parent))) RETURN(PTR_ERR(parent)); /* create local file/dir, if @fid is passed then try to use it */ if (fid_is_zero(fid)) dto = local_file_find_or_create(env, mdd->mdd_los, parent, name, mode); else dto = local_file_find_or_create_with_fid(env, mdd->mdd_bottom, fid, parent, name, mode); if (IS_ERR(dto)) GOTO(out_put, rc = PTR_ERR(dto)); *fid = *lu_object_fid(&dto->do_lu); /* since stack is not fully set up the local_storage uses own stack * and we should drop its object from cache */ lu_object_put_nocache(env, &dto->do_lu); EXIT; out_put: lu_object_put(env, &parent->do_lu); return 0; }
/* Allocate new fid on passed client @seq and save it to @fid. */ int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, struct lu_fid *fid) { cfs_waitlink_t link; int rc; ENTRY; LASSERT(seq != NULL); LASSERT(fid != NULL); cfs_waitlink_init(&link); cfs_mutex_lock(&seq->lcs_mutex); while (1) { seqno_t seqnr; if (!fid_is_zero(&seq->lcs_fid) && fid_oid(&seq->lcs_fid) < seq->lcs_width) { /* Just bump last allocated fid and return to caller. */ seq->lcs_fid.f_oid += 1; rc = 0; break; } rc = seq_fid_alloc_prep(seq, &link); if (rc) continue; rc = seq_client_alloc_seq(env, seq, &seqnr); if (rc) { CERROR("%s: Can't allocate new sequence, " "rc %d\n", seq->lcs_name, rc); seq_fid_alloc_fini(seq); cfs_mutex_unlock(&seq->lcs_mutex); RETURN(rc); } CDEBUG(D_INFO, "%s: Switch to sequence " "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr); seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID; seq->lcs_fid.f_seq = seqnr; seq->lcs_fid.f_ver = 0; /* * Inform caller that sequence switch is performed to allow it * to setup FLD for it. */ rc = 1; seq_fid_alloc_fini(seq); break; } *fid = seq->lcs_fid; cfs_mutex_unlock(&seq->lcs_mutex); CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name, PFID(fid)); RETURN(rc); }
static int osp_declare_attr_set(const struct lu_env *env, struct dt_object *dt, const struct lu_attr *attr, struct thandle *th) { struct osp_device *d = lu2osp_dev(dt->do_lu.lo_dev); struct osp_object *o = dt2osp_obj(dt); int rc = 0; ENTRY; /* * Usually we don't allow server stack to manipulate size * but there is a special case when striping is created * late, after stripless file got truncated to non-zero. * * In this case we do the following: * * 1) grab id in declare - this can lead to leaked OST objects * but we don't currently have proper mechanism and the only * options we have are to do truncate RPC holding transaction * open (very bad) or to grab id in declare at cost of leaked * OST object in same very rare unfortunate case (just bad) * notice 1.6-2.0 do assignment outside of running transaction * all the time, meaning many more chances for leaked objects. * * 2) send synchronous truncate RPC with just assigned id */ /* there are few places in MDD code still passing NULL * XXX: to be fixed soon */ if (attr == NULL) RETURN(0); if (attr->la_valid & LA_SIZE && attr->la_size > 0 && fid_is_zero(lu_object_fid(&o->opo_obj.do_lu))) { LASSERT(!dt_object_exists(dt)); osp_object_assign_fid(env, d, o); rc = osp_object_truncate(env, dt, attr->la_size); if (rc) RETURN(rc); } if (o->opo_new) { /* no need in logging for new objects being created */ RETURN(0); } if (!(attr->la_valid & (LA_UID | LA_GID))) RETURN(0); /* * track all UID/GID changes via llog */ rc = osp_sync_declare_add(env, o, MDS_SETATTR64_REC, th); RETURN(rc); }
/** * Allocate new fid on passed client @seq and save it to @fid. * * \param[in] env pointer to the thread context * \param[in,out] seq pointer to the client sequence manager * \param[out] fid to hold the new allocated fid * * \retval 1 for notify the caller that sequence switch * is performed to allow it to setup FLD for it. * \retval 0 for new FID allocated in current sequence. * \retval Negative error number on failure. */ int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, struct lu_fid *fid) { wait_queue_t link; int rc; ENTRY; LASSERT(seq != NULL); LASSERT(fid != NULL); init_waitqueue_entry(&link, current); mutex_lock(&seq->lcs_mutex); if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST)) seq->lcs_fid.f_oid = seq->lcs_width; while (1) { u64 seqnr; if (unlikely(!fid_is_zero(&seq->lcs_fid) && fid_oid(&seq->lcs_fid) < seq->lcs_width)) { /* Just bump last allocated fid and return to caller. */ seq->lcs_fid.f_oid++; rc = 0; break; } /* Release seq::lcs_mutex via seq_fid_alloc_prep() to avoid * deadlock during seq_client_alloc_seq(). */ rc = seq_fid_alloc_prep(seq, &link); if (rc) continue; rc = seq_client_alloc_seq(env, seq, &seqnr); /* Re-take seq::lcs_mutex via seq_fid_alloc_fini(). */ seq_fid_alloc_fini(seq, rc ? 0 : seqnr, false); if (rc) { CERROR("%s: Can't allocate new sequence: rc = %d\n", seq->lcs_name, rc); mutex_unlock(&seq->lcs_mutex); RETURN(rc); } rc = 1; break; } *fid = seq->lcs_fid; mutex_unlock(&seq->lcs_mutex); CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name, PFID(fid)); RETURN(rc); }
int osp_init_pre_fid(struct osp_device *osp) { struct lu_env env; struct osp_thread_info *osi; struct lu_client_seq *cli_seq; struct lu_fid *last_fid; int rc; ENTRY; LASSERT(osp->opd_pre != NULL); /* Return if last_used fid has been initialized */ if (!fid_is_zero(&osp->opd_last_used_fid)) RETURN(0); rc = lu_env_init(&env, osp->opd_dt_dev.dd_lu_dev.ld_type->ldt_ctx_tags); if (rc) { CERROR("%s: init env error: rc = %d\n", osp->opd_obd->obd_name, rc); RETURN(rc); } osi = osp_env_info(&env); last_fid = &osi->osi_fid; fid_zero(last_fid); /* For a freshed fs, it will allocate a new sequence first */ if (osp_is_fid_client(osp) && osp->opd_group != 0) { cli_seq = osp->opd_obd->u.cli.cl_seq; rc = seq_client_get_seq(&env, cli_seq, &last_fid->f_seq); if (rc != 0) { CERROR("%s: alloc fid error: rc = %d\n", osp->opd_obd->obd_name, rc); GOTO(out, rc); } } else { last_fid->f_seq = fid_idif_seq(0, osp->opd_index); } last_fid->f_oid = 1; last_fid->f_ver = 0; spin_lock(&osp->opd_pre_lock); osp->opd_last_used_fid = *last_fid; osp->opd_pre_used_fid = *last_fid; osp->opd_pre_last_created_fid = *last_fid; spin_unlock(&osp->opd_pre_lock); rc = osp_write_last_oid_seq_files(&env, osp, last_fid, 1); if (rc != 0) { CERROR("%s: write fid error: rc = %d\n", osp->opd_obd->obd_name, rc); GOTO(out, rc); } out: lu_env_fini(&env); RETURN(rc); }
static void osp_object_assign_fid(const struct lu_env *env, struct osp_device *d, struct osp_object *o) { struct osp_thread_info *osi = osp_env_info(env); LASSERT(fid_is_zero(lu_object_fid(&o->opo_obj.do_lu))); LASSERT(o->opo_reserved); o->opo_reserved = 0; osp_precreate_get_fid(env, d, &osi->osi_fid); lu_object_assign_fid(env, &o->opo_obj.do_lu, &osi->osi_fid); }
/** * asks OST to clean precreate orphans * and gets next id for new objects */ static int osp_precreate_cleanup_orphans(struct lu_env *env, struct osp_device *d) { struct osp_thread_info *osi = osp_env_info(env); struct lu_fid *last_fid = &osi->osi_fid; struct ptlrpc_request *req = NULL; struct obd_import *imp; struct ost_body *body; struct l_wait_info lwi = { 0 }; int update_status = 0; int rc; int diff; ENTRY; /* * wait for local recovery to finish, so we can cleanup orphans * orphans are all objects since "last used" (assigned), but * there might be objects reserved and in some cases they won't * be used. we can't cleanup them till we're sure they won't be * used. also can't we allow new reservations because they may * end up getting orphans being cleaned up below. so we block * new reservations and wait till all reserved objects either * user or released. */ spin_lock(&d->opd_pre_lock); d->opd_pre_recovering = 1; spin_unlock(&d->opd_pre_lock); /* * The locking above makes sure the opd_pre_reserved check below will * catch all osp_precreate_reserve() calls who find * "!opd_pre_recovering". */ l_wait_event(d->opd_pre_waitq, (!d->opd_pre_reserved && d->opd_recovery_completed) || !osp_precreate_running(d) || d->opd_got_disconnected, &lwi); if (!osp_precreate_running(d) || d->opd_got_disconnected) GOTO(out, rc = -EAGAIN); CDEBUG(D_HA, "%s: going to cleanup orphans since "DFID"\n", d->opd_obd->obd_name, PFID(&d->opd_last_used_fid)); *last_fid = d->opd_last_used_fid; /* The OSP should already get the valid seq now */ LASSERT(!fid_is_zero(last_fid)); if (fid_oid(&d->opd_last_used_fid) < 2) { /* lastfid looks strange... ask OST */ rc = osp_get_lastfid_from_ost(env, d); if (rc) GOTO(out, rc); } imp = d->opd_obd->u.cli.cl_import; LASSERT(imp); req = ptlrpc_request_alloc(imp, &RQF_OST_CREATE); if (req == NULL) GOTO(out, rc = -ENOMEM); rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE); if (rc) { ptlrpc_request_free(req); req = NULL; GOTO(out, rc); } body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); if (body == NULL) GOTO(out, rc = -EPROTO); body->oa.o_flags = OBD_FL_DELORPHAN; body->oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP; fid_to_ostid(&d->opd_last_used_fid, &body->oa.o_oi); ptlrpc_request_set_replen(req); /* Don't resend the delorphan req */ req->rq_no_resend = req->rq_no_delay = 1; rc = ptlrpc_queue_wait(req); if (rc) { update_status = 1; GOTO(out, rc); } body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); if (body == NULL) GOTO(out, rc = -EPROTO); /* * OST provides us with id new pool starts from in body->oa.o_id */ ostid_to_fid(last_fid, &body->oa.o_oi, d->opd_index); spin_lock(&d->opd_pre_lock); diff = lu_fid_diff(&d->opd_last_used_fid, last_fid); if (diff > 0) { d->opd_pre_grow_count = OST_MIN_PRECREATE + diff; d->opd_pre_last_created_fid = d->opd_last_used_fid; } else { d->opd_pre_grow_count = OST_MIN_PRECREATE; d->opd_pre_last_created_fid = *last_fid; } /* * This empties the pre-creation pool and effectively blocks any new * reservations. */ LASSERT(fid_oid(&d->opd_pre_last_created_fid) <= LUSTRE_DATA_SEQ_MAX_WIDTH); d->opd_pre_used_fid = d->opd_pre_last_created_fid; d->opd_pre_grow_slow = 0; spin_unlock(&d->opd_pre_lock); CDEBUG(D_HA, "%s: Got last_id "DFID" from OST, last_created "DFID "last_used is "DFID"\n", d->opd_obd->obd_name, PFID(last_fid), PFID(&d->opd_pre_last_created_fid), PFID(&d->opd_last_used_fid)); out: if (req) ptlrpc_req_finished(req); spin_lock(&d->opd_pre_lock); d->opd_pre_recovering = 0; spin_unlock(&d->opd_pre_lock); /* * If rc is zero, the pre-creation window should have been emptied. * Since waking up the herd would be useless without pre-created * objects, we defer the signal to osp_precreate_send() in that case. */ if (rc != 0) { if (update_status) { CERROR("%s: cannot cleanup orphans: rc = %d\n", d->opd_obd->obd_name, rc); /* we can't proceed from here, OST seem to * be in a bad shape, better to wait for * a new instance of the server and repeat * from the beginning. notify possible waiters * this OSP isn't quite functional yet */ osp_pre_update_status(d, rc); } else { wake_up(&d->opd_pre_user_waitq); } } RETURN(rc); }
/* * IT_OPEN is intended to open (and create, possible) an object. Parent (pid) * may be split dir. */ static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data, struct lookup_intent *it, struct ptlrpc_request **reqp, ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct lmv_tgt_desc *tgt; struct mdt_body *body; int rc; ENTRY; if (it->it_flags & MDS_OPEN_BY_FID) { LASSERT(fid_is_sane(&op_data->op_fid2)); /* for striped directory, we can't know parent stripe fid * without name, but we can set it to child fid, and MDT * will obtain it from linkea in open in such case. */ if (op_data->op_mea1 != NULL) op_data->op_fid1 = op_data->op_fid2; tgt = lmv_find_target(lmv, &op_data->op_fid2); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); op_data->op_mds = tgt->ltd_idx; } else { LASSERT(fid_is_sane(&op_data->op_fid1)); LASSERT(fid_is_zero(&op_data->op_fid2)); LASSERT(op_data->op_name != NULL); tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); } /* If it is ready to open the file by FID, do not need * allocate FID at all, otherwise it will confuse MDT */ if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) { /* * For lookup(IT_CREATE) cases allocate new fid and setup FLD * for it. */ rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data); if (rc != 0) RETURN(rc); } CDEBUG(D_INODE, "OPEN_INTENT with fid1="DFID", fid2="DFID"," " name='%s' -> mds #%u\n", PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_idx); rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking, extra_lock_flags); if (rc != 0) RETURN(rc); /* * Nothing is found, do not access body->fid1 as it is zero and thus * pointless. */ if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) && !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) && !(it->d.lustre.it_disposition & DISP_OPEN_OPEN)) RETURN(rc); body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); if (body == NULL) RETURN(-EPROTO); /* Not cross-ref case, just get out of here. */ if (unlikely((body->mbo_valid & OBD_MD_MDS))) { rc = lmv_intent_remote(exp, it, &op_data->op_fid1, reqp, cb_blocking, extra_lock_flags); if (rc != 0) RETURN(rc); body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); if (body == NULL) RETURN(-EPROTO); } RETURN(rc); }
/* Allocate new fid on passed client @seq and save it to @fid. */ int seq_client_alloc_fid(const struct lu_env *env, struct lu_client_seq *seq, struct lu_fid *fid) { wait_queue_entry_t link; int rc; LASSERT(seq); LASSERT(fid); init_waitqueue_entry(&link, current); mutex_lock(&seq->lcs_mutex); if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST)) seq->lcs_fid.f_oid = seq->lcs_width; while (1) { u64 seqnr; if (!fid_is_zero(&seq->lcs_fid) && fid_oid(&seq->lcs_fid) < seq->lcs_width) { /* Just bump last allocated fid and return to caller. */ seq->lcs_fid.f_oid += 1; rc = 0; break; } rc = seq_fid_alloc_prep(seq, &link); if (rc) continue; rc = seq_client_alloc_seq(env, seq, &seqnr); if (rc) { CERROR("%s: Can't allocate new sequence, rc %d\n", seq->lcs_name, rc); seq_fid_alloc_fini(seq); mutex_unlock(&seq->lcs_mutex); return rc; } CDEBUG(D_INFO, "%s: Switch to sequence [0x%16.16llx]\n", seq->lcs_name, seqnr); seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID; seq->lcs_fid.f_seq = seqnr; seq->lcs_fid.f_ver = 0; /* * Inform caller that sequence switch is performed to allow it * to setup FLD for it. */ rc = 1; seq_fid_alloc_fini(seq); break; } *fid = seq->lcs_fid; mutex_unlock(&seq->lcs_mutex); CDEBUG(D_INFO, "%s: Allocated FID " DFID "\n", seq->lcs_name, PFID(fid)); return rc; }
/* * VBR: save parent version in reply and child version getting by its name. * Version of child is getting and checking during its lookup. If */ static int mdt_reint_unlink(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); struct md_attr *ma = &info->mti_attr; struct lu_fid *child_fid = &info->mti_tmp_fid1; struct mdt_object *mp; struct mdt_object *mc; struct mdt_lock_handle *parent_lh; struct mdt_lock_handle *child_lh; struct lu_name *lname; int rc; ENTRY; DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1), rr->rr_name); if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) RETURN(err_serious(-ENOENT)); /* * step 1: Found the parent. */ mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1); if (IS_ERR(mp)) { rc = PTR_ERR(mp); GOTO(out, rc); } if (mdt_object_obf(mp)) GOTO(put_parent, rc = -EPERM); parent_lh = &info->mti_lh[MDT_LH_PARENT]; lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); if (mdt_object_remote(mp)) { mdt_lock_reg_init(parent_lh, LCK_EX); rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh, parent_lh->mlh_rreg_mode, MDS_INODELOCK_UPDATE); if (rc != ELDLM_OK) GOTO(put_parent, rc); } else { mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name, rr->rr_namelen); rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE, MDT_LOCAL_LOCK); if (rc) GOTO(put_parent, rc); rc = mdt_version_get_check_save(info, mp, 0); if (rc) GOTO(unlock_parent, rc); } /* step 2: find & lock the child */ /* lookup child object along with version checking */ fid_zero(child_fid); rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1); if (rc != 0) GOTO(unlock_parent, rc); mdt_reint_init_ma(info, ma); /* We will lock the child regardless it is local or remote. No harm. */ mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid); if (IS_ERR(mc)) GOTO(unlock_parent, rc = PTR_ERR(mc)); child_lh = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_reg_init(child_lh, LCK_EX); if (mdt_object_remote(mc)) { struct mdt_body *repbody; if (!fid_is_zero(rr->rr_fid2)) { CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name, PFID(mdt_object_fid(mc))); GOTO(unlock_parent, rc = -ENOENT); } CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name, PFID(mdt_object_fid(mc))); if (info->mti_spec.sp_rm_entry) { struct lu_ucred *uc = mdt_ucred(info); if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) { CERROR("%s: unlink remote entry is only " "permitted for administrator: rc = %d\n", mdt2obd_dev(info->mti_mdt)->obd_name, -EPERM); GOTO(unlock_parent, rc = -EPERM); } ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), NULL, lname, ma); mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc); } /* Revoke the LOOKUP lock of the remote object granted by * this MDT. Since the unlink will happen on another MDT, * it will release the LOOKUP lock right away. Then What * would happen if another client try to grab the LOOKUP * lock at the same time with unlink XXX */ mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP, MDT_CROSS_LOCK); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); LASSERT(repbody != NULL); repbody->fid1 = *mdt_object_fid(mc); repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); mdt_object_unlock_put(info, mc, child_lh, rc); GOTO(unlock_parent, rc = -EREMOTE); } else if (info->mti_spec.sp_rm_entry) { CERROR("%s: lfs rmdir should not be used on local dir %s\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name); mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc = -EPERM); } rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL, MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc); } mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_UNLINK_WRITE); /* save version when object is locked */ mdt_version_get_save(info, mc, 1); /* * Now we can only make sure we need MA_INODE, in mdd layer, will check * whether need MA_LOV and MA_COOKIE. */ ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), mdt_object_child(mc), lname, ma); if (rc == 0 && !lu_object_is_dying(&mc->mot_header)) rc = mdt_attr_get_complex(info, mc, ma); if (rc == 0) mdt_handle_last_unlink(info, mc, ma); if (ma->ma_valid & MA_INODE) { switch (ma->ma_attr.la_mode & S_IFMT) { case S_IFDIR: mdt_counter_incr(req, LPROC_MDT_RMDIR); break; case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: mdt_counter_incr(req, LPROC_MDT_UNLINK); break; default: LASSERTF(0, "bad file type %o unlinking\n", ma->ma_attr.la_mode); } } EXIT; mdt_object_unlock_put(info, mc, child_lh, rc); unlock_parent: mdt_object_unlock(info, mp, parent_lh, rc); put_parent: mdt_object_put(info->mti_env, mp); out: return rc; }
/* * VBR: save parent version in reply and child version getting by its name. * Version of child is getting and checking during its lookup. If */ static int mdt_reint_unlink(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); struct md_attr *ma = &info->mti_attr; struct lu_fid *child_fid = &info->mti_tmp_fid1; struct mdt_object *mp; struct mdt_object *mc; struct mdt_lock_handle *parent_lh; struct mdt_lock_handle *child_lh; struct lu_name *lname; int rc; int no_name = 0; ENTRY; DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1), rr->rr_name); if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) RETURN(err_serious(-ENOENT)); if (fid_is_obf(rr->rr_fid1) || fid_is_dot_lustre(rr->rr_fid1)) RETURN(-EPERM); /* * step 1: Found the parent. */ mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1); if (IS_ERR(mp)) { rc = PTR_ERR(mp); GOTO(out, rc); } parent_lh = &info->mti_lh[MDT_LH_PARENT]; lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); if (mdt_object_remote(mp)) { mdt_lock_reg_init(parent_lh, LCK_EX); rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh, parent_lh->mlh_rreg_mode, MDS_INODELOCK_UPDATE); if (rc != ELDLM_OK) GOTO(put_parent, rc); } else { mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name, rr->rr_namelen); rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE, MDT_LOCAL_LOCK); if (rc) GOTO(put_parent, rc); rc = mdt_version_get_check_save(info, mp, 0); if (rc) GOTO(unlock_parent, rc); } /* step 2: find & lock the child */ /* lookup child object along with version checking */ fid_zero(child_fid); rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1); if (rc != 0) { /* Name might not be able to find during resend of * remote unlink, considering following case. * dir_A is a remote directory, the name entry of * dir_A is on MDT0, the directory is on MDT1, * * 1. client sends unlink req to MDT1. * 2. MDT1 sends name delete update to MDT0. * 3. name entry is being deleted in MDT0 synchronously. * 4. MDT1 is restarted. * 5. client resends unlink req to MDT1. So it can not * find the name entry on MDT0 anymore. * In this case, MDT1 only needs to destory the local * directory. * */ if (mdt_object_remote(mp) && rc == -ENOENT && !fid_is_zero(rr->rr_fid2) && lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { no_name = 1; *child_fid = *rr->rr_fid2; } else { GOTO(unlock_parent, rc); } } if (fid_is_obf(child_fid) || fid_is_dot_lustre(child_fid)) GOTO(unlock_parent, rc = -EPERM); mdt_reint_init_ma(info, ma); /* We will lock the child regardless it is local or remote. No harm. */ mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid); if (IS_ERR(mc)) GOTO(unlock_parent, rc = PTR_ERR(mc)); child_lh = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_reg_init(child_lh, LCK_EX); if (mdt_object_remote(mc)) { struct mdt_body *repbody; if (!fid_is_zero(rr->rr_fid2)) { CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, PFID(mdt_object_fid(mc))); GOTO(put_child, rc = -ENOENT); } CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, PFID(mdt_object_fid(mc))); if (!mdt_is_dne_client(req->rq_export)) /* Return -EIO for old client */ GOTO(put_child, rc = -EIO); if (info->mti_spec.sp_rm_entry) { struct lu_ucred *uc = mdt_ucred(info); if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) { CERROR("%s: unlink remote entry is only " "permitted for administrator: rc = %d\n", mdt_obd_name(info->mti_mdt), -EPERM); GOTO(put_child, rc = -EPERM); } ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), NULL, lname, ma, no_name); GOTO(put_child, rc); } /* Revoke the LOOKUP lock of the remote object granted by * this MDT. Since the unlink will happen on another MDT, * it will release the LOOKUP lock right away. Then What * would happen if another client try to grab the LOOKUP * lock at the same time with unlink XXX */ mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP, MDT_CROSS_LOCK); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); LASSERT(repbody != NULL); repbody->fid1 = *mdt_object_fid(mc); repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); GOTO(unlock_child, rc = -EREMOTE); } else if (info->mti_spec.sp_rm_entry) { rc = -EPERM; CDEBUG(D_INFO, "%s: no rm_entry on local dir '%s': rc = %d\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, rc); GOTO(put_child, rc); } /* We used to acquire MDS_INODELOCK_FULL here but we can't do * this now because a running HSM restore on the child (unlink * victim) will hold the layout lock. See LU-4002. */ rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE, MDT_CROSS_LOCK); if (rc != 0) GOTO(put_child, rc); mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_UNLINK_WRITE); /* save version when object is locked */ mdt_version_get_save(info, mc, 1); /* * Now we can only make sure we need MA_INODE, in mdd layer, will check * whether need MA_LOV and MA_COOKIE. */ ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); mutex_lock(&mc->mot_lov_mutex); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), mdt_object_child(mc), lname, ma, no_name); mutex_unlock(&mc->mot_lov_mutex); if (rc == 0 && !lu_object_is_dying(&mc->mot_header)) rc = mdt_attr_get_complex(info, mc, ma); if (rc == 0) mdt_handle_last_unlink(info, mc, ma); if (ma->ma_valid & MA_INODE) { switch (ma->ma_attr.la_mode & S_IFMT) { case S_IFDIR: mdt_counter_incr(req, LPROC_MDT_RMDIR); break; case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: mdt_counter_incr(req, LPROC_MDT_UNLINK); break; default: LASSERTF(0, "bad file type %o unlinking\n", ma->ma_attr.la_mode); } } EXIT; unlock_child: mdt_object_unlock(info, mc, child_lh, rc); put_child: mdt_object_put(info->mti_env, mc); unlock_parent: mdt_object_unlock(info, mp, parent_lh, rc); put_parent: mdt_object_put(info->mti_env, mp); out: return rc; }
static int osp_declare_object_create(const struct lu_env *env, struct dt_object *dt, struct lu_attr *attr, struct dt_allocation_hint *hint, struct dt_object_format *dof, struct thandle *th) { struct osp_thread_info *osi = osp_env_info(env); struct osp_device *d = lu2osp_dev(dt->do_lu.lo_dev); struct osp_object *o = dt2osp_obj(dt); const struct lu_fid *fid; int rc = 0; ENTRY; /* should happen to non-0 OSP only so that at least one object * has been already declared in the scenario and LOD should * cleanup that */ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_CREATE_FAIL) && d->opd_index == 1) RETURN(-ENOSPC); LASSERT(d->opd_last_used_oid_file); fid = lu_object_fid(&dt->do_lu); /* * There can be gaps in precreated ids and record to unlink llog * XXX: we do not handle gaps yet, implemented before solution * was found to be racy, so we disabled that. there is no * point in making useless but expensive llog declaration. */ /* rc = osp_sync_declare_add(env, o, MDS_UNLINK64_REC, th); */ if (unlikely(!fid_is_zero(fid))) { /* replay case: caller knows fid */ osi->osi_off = sizeof(osi->osi_id) * d->opd_index; rc = dt_declare_record_write(env, d->opd_last_used_oid_file, sizeof(osi->osi_id), osi->osi_off, th); RETURN(rc); } /* * in declaration we need to reserve object so that we don't block * awaiting precreation RPC to complete */ rc = osp_precreate_reserve(env, d); /* * we also need to declare update to local "last used id" file for * recovery if object isn't used for a reason, we need to release * reservation, this can be made in osd_object_release() */ if (rc == 0) { /* mark id is reserved: in create we don't want to talk * to OST */ LASSERT(o->opo_reserved == 0); o->opo_reserved = 1; /* common for all OSPs file hystorically */ osi->osi_off = sizeof(osi->osi_id) * d->opd_index; rc = dt_declare_record_write(env, d->opd_last_used_oid_file, sizeof(osi->osi_id), osi->osi_off, th); } else { /* not needed in the cache anymore */ set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags); } RETURN(rc); }