int osp_init_precreate(struct osp_device *d) { struct l_wait_info lwi = { 0 }; struct task_struct *task; ENTRY; OBD_ALLOC_PTR(d->opd_pre); if (d->opd_pre == NULL) RETURN(-ENOMEM); /* initially precreation isn't ready */ d->opd_pre_status = -EAGAIN; fid_zero(&d->opd_pre_used_fid); d->opd_pre_used_fid.f_oid = 1; fid_zero(&d->opd_pre_last_created_fid); d->opd_pre_last_created_fid.f_oid = 1; d->opd_pre_reserved = 0; d->opd_got_disconnected = 1; d->opd_pre_grow_slow = 0; d->opd_pre_grow_count = OST_MIN_PRECREATE; d->opd_pre_min_grow_count = OST_MIN_PRECREATE; d->opd_pre_max_grow_count = OST_MAX_PRECREATE; spin_lock_init(&d->opd_pre_lock); init_waitqueue_head(&d->opd_pre_waitq); init_waitqueue_head(&d->opd_pre_user_waitq); init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq); /* * Initialize statfs-related things */ d->opd_statfs_maxage = 5; /* default update interval */ d->opd_statfs_fresh_till = cfs_time_shift(-1000); CDEBUG(D_OTHER, "current %llu, fresh till %llu\n", (unsigned long long)cfs_time_current(), (unsigned long long)d->opd_statfs_fresh_till); cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d); /* * start thread handling precreation and statfs updates */ task = kthread_run(osp_precreate_thread, d, "osp-pre-%u-%u", d->opd_index, d->opd_group); if (IS_ERR(task)) { CERROR("can't start precreate thread %ld\n", PTR_ERR(task)); RETURN(PTR_ERR(task)); } l_wait_event(d->opd_pre_thread.t_ctl_waitq, osp_precreate_running(d) || osp_precreate_stopped(d), &lwi); RETURN(0); }
int osp_init_precreate(struct osp_device *d) { struct l_wait_info lwi = { 0 }; int rc; ENTRY; /* initially precreation isn't ready */ d->opd_pre_status = -EAGAIN; fid_zero(&d->opd_pre_used_fid); d->opd_pre_used_fid.f_oid = 1; fid_zero(&d->opd_pre_last_created_fid); d->opd_pre_last_created_fid.f_oid = 1; d->opd_pre_reserved = 0; d->opd_got_disconnected = 1; d->opd_pre_grow_slow = 0; d->opd_pre_grow_count = OST_MIN_PRECREATE; d->opd_pre_min_grow_count = OST_MIN_PRECREATE; d->opd_pre_max_grow_count = OST_MAX_PRECREATE; spin_lock_init(&d->opd_pre_lock); cfs_waitq_init(&d->opd_pre_waitq); cfs_waitq_init(&d->opd_pre_user_waitq); cfs_waitq_init(&d->opd_pre_thread.t_ctl_waitq); /* * Initialize statfs-related things */ d->opd_statfs_maxage = 5; /* default update interval */ d->opd_statfs_fresh_till = cfs_time_shift(-1000); CDEBUG(D_OTHER, "current %llu, fresh till %llu\n", (unsigned long long)cfs_time_current(), (unsigned long long)d->opd_statfs_fresh_till); cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d); /* * start thread handling precreation and statfs updates */ rc = cfs_create_thread(osp_precreate_thread, d, 0); if (rc < 0) { CERROR("can't start precreate thread %d\n", rc); RETURN(rc); } l_wait_event(d->opd_pre_thread.t_ctl_waitq, osp_precreate_running(d) || osp_precreate_stopped(d), &lwi); RETURN(0); }
/* * Finish the current sequence due to disconnect. * See mdc_import_event() */ void seq_client_flush(struct lu_client_seq *seq) { wait_queue_t link; LASSERT(seq != NULL); init_waitqueue_entry(&link, current); mutex_lock(&seq->lcs_mutex); while (seq->lcs_update) { add_wait_queue(&seq->lcs_waitq, &link); set_current_state(TASK_UNINTERRUPTIBLE); mutex_unlock(&seq->lcs_mutex); schedule(); mutex_lock(&seq->lcs_mutex); remove_wait_queue(&seq->lcs_waitq, &link); set_current_state(TASK_RUNNING); } fid_zero(&seq->lcs_fid); /** * this id shld not be used for seq range allocation. * set to -1 for dgb check. */ seq->lcs_space.lsr_index = -1; lu_seq_range_init(&seq->lcs_space); mutex_unlock(&seq->lcs_mutex); }
int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo) { struct ofd_thread_info *info = ofd_info(env); struct filter_fid_old *ff = &info->fti_mds_fid_old; struct lu_buf *buf = &info->fti_buf; struct lu_fid *pfid = &fo->ofo_pfid; int rc = 0; if (fid_is_sane(pfid)) return 0; buf->lb_buf = ff; buf->lb_len = sizeof(*ff); rc = dt_xattr_get(env, ofd_object_child(fo), buf, XATTR_NAME_FID, BYPASS_CAPA); if (rc < 0) return rc; if (rc < sizeof(struct lu_fid)) { fid_zero(pfid); return -ENODATA; } pfid->f_seq = le64_to_cpu(ff->ff_parent.f_seq); pfid->f_oid = le32_to_cpu(ff->ff_parent.f_oid); /* Currently, the filter_fid::ff_parent::f_ver is not the real parent * MDT-object's FID::f_ver, instead it is the OST-object index in its * parent MDT-object's layout EA. */ pfid->f_stripe_idx = le32_to_cpu(ff->ff_parent.f_stripe_idx); return 0; }
/** * open the PENDING directory for device \a mdd * * The PENDING directory persistently tracks files and directories that were * unlinked from the namespace (nlink == 0) but are still held open by clients. * Those inodes shouldn't be deleted if the MDS crashes, because the clients * would not be able to recover and reopen those files. Instead, these inodes * are linked into the PENDING directory on disk, and only deleted if all * clients close them, or the MDS finishes client recovery without any client * reopening them (i.e. former clients didn't join recovery). * \param d mdd device being started. * * \retval 0 success * \retval -ve index operation error. * */ int orph_index_init(const struct lu_env *env, struct mdd_device *mdd) { struct lu_fid fid; struct dt_object *d; int rc = 0; ENTRY; /* create PENDING dir */ fid_zero(&fid); rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid, orph_index_name, S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO, &fid); if (rc < 0) RETURN(rc); d = dt_locate(env, mdd->mdd_child, &fid); if (IS_ERR(d)) RETURN(PTR_ERR(d)); LASSERT(lu_object_exists(&d->do_lu)); if (!dt_try_as_dir(env, d)) { CERROR("%s: \"%s\" is not an index: rc = %d\n", mdd2obd_dev(mdd)->obd_name, orph_index_name, rc); lu_object_put(env, &d->do_lu); RETURN(-ENOTDIR); } mdd->mdd_orphans = d; RETURN(0); }
void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, struct inode *i2, const char *name, int namelen, int mode, __u32 opc) { LASSERT(i1 != NULL || i2 != NULL); LASSERT(op_data); if (i1) { ll_i2gids(op_data->op_suppgids, i1, i2); op_data->op_fid1 = *ll_inode2fid(i1); }else { ll_i2gids(op_data->op_suppgids, i2, i1); op_data->op_fid1 = *ll_inode2fid(i2); } if (i2) op_data->op_fid2 = *ll_inode2fid(i2); else fid_zero(&op_data->op_fid2); op_data->op_opc = opc; op_data->op_name = name; op_data->op_mode = mode; op_data->op_namelen = namelen; op_data->op_mod_time = CFS_CURRENT_TIME; op_data->op_data = NULL; }
/* * Finish the current sequence due to disconnect. * See mdc_import_event() */ void seq_client_flush(struct lu_client_seq *seq) { cfs_waitlink_t link; LASSERT(seq != NULL); cfs_waitlink_init(&link); mutex_lock(&seq->lcs_mutex); while (seq->lcs_update) { cfs_waitq_add(&seq->lcs_waitq, &link); cfs_set_current_state(CFS_TASK_UNINT); mutex_unlock(&seq->lcs_mutex); cfs_waitq_wait(&link, CFS_TASK_UNINT); mutex_lock(&seq->lcs_mutex); cfs_waitq_del(&seq->lcs_waitq, &link); cfs_set_current_state(CFS_TASK_RUNNING); } fid_zero(&seq->lcs_fid); /** * this id shld not be used for seq range allocation. * set to -1 for dgb check. */ seq->lcs_space.lsr_index = -1; range_init(&seq->lcs_space); mutex_unlock(&seq->lcs_mutex); }
int osp_init_pre_fid(struct osp_device *osp) { struct lu_env env; struct osp_thread_info *osi; struct lu_client_seq *cli_seq; struct lu_fid *last_fid; int rc; ENTRY; LASSERT(osp->opd_pre != NULL); /* Return if last_used fid has been initialized */ if (!fid_is_zero(&osp->opd_last_used_fid)) RETURN(0); rc = lu_env_init(&env, osp->opd_dt_dev.dd_lu_dev.ld_type->ldt_ctx_tags); if (rc) { CERROR("%s: init env error: rc = %d\n", osp->opd_obd->obd_name, rc); RETURN(rc); } osi = osp_env_info(&env); last_fid = &osi->osi_fid; fid_zero(last_fid); /* For a freshed fs, it will allocate a new sequence first */ if (osp_is_fid_client(osp) && osp->opd_group != 0) { cli_seq = osp->opd_obd->u.cli.cl_seq; rc = seq_client_get_seq(&env, cli_seq, &last_fid->f_seq); if (rc != 0) { CERROR("%s: alloc fid error: rc = %d\n", osp->opd_obd->obd_name, rc); GOTO(out, rc); } } else { last_fid->f_seq = fid_idif_seq(0, osp->opd_index); } last_fid->f_oid = 1; last_fid->f_ver = 0; spin_lock(&osp->opd_pre_lock); osp->opd_last_used_fid = *last_fid; osp->opd_pre_used_fid = *last_fid; osp->opd_pre_last_created_fid = *last_fid; spin_unlock(&osp->opd_pre_lock); rc = osp_write_last_oid_seq_files(&env, osp, last_fid, 1); if (rc != 0) { CERROR("%s: write fid error: rc = %d\n", osp->opd_obd->obd_name, rc); GOTO(out, rc); } out: lu_env_fini(&env); RETURN(rc); }
/* * Handler for: getattr, lookup and revalidate cases. */ int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, void *lmm, int lmmsize, struct lookup_intent *it, int flags, struct ptlrpc_request **reqp, ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct lmv_tgt_desc *tgt = NULL; struct mdt_body *body; int rc = 0; tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); if (IS_ERR(tgt)) return PTR_ERR(tgt); if (!fid_is_sane(&op_data->op_fid2)) fid_zero(&op_data->op_fid2); CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID ", name='%s' -> mds #%d\n", PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), op_data->op_name ? op_data->op_name : "<NULL>", tgt->ltd_idx); op_data->op_bias &= ~MDS_CROSS_REF; rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, flags, reqp, cb_blocking, extra_lock_flags); if (rc < 0 || *reqp == NULL) return rc; /* * MDS has returned success. Probably name has been resolved in * remote inode. Let's check this. */ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); if (body == NULL) return -EPROTO; /* Not cross-ref case, just get out of here. */ if (likely(!(body->valid & OBD_MD_MDS))) return 0; rc = lmv_intent_remote(exp, lmm, lmmsize, it, NULL, flags, reqp, cb_blocking, extra_lock_flags); return rc; }
int osd_oi_delete(struct osd_thread_info *info, struct osd_device *osd, const struct lu_fid *fid, handle_t *th, enum oi_check_flags flags) { struct lu_fid *oi_fid = &info->oti_fid2; /* clear idmap cache */ if (lu_fid_eq(fid, &info->oti_cache.oic_fid)) fid_zero(&info->oti_cache.oic_fid); if (fid_is_last_id(fid)) return 0; if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid)) return osd_obj_map_delete(info, osd, fid, th); fid_cpu_to_be(oi_fid, fid); return osd_oi_iam_delete(info, osd_fid2oi(osd, fid), (const struct dt_key *)oi_fid, th); }
static int lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, char *buf, int len) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_namespace *ns = com->lc_file_ram; int save = len; int ret = -ENOSPC; int rc; down_read(&com->lc_sem); rc = snprintf(buf, len, "name: lfsck_namespace\n" "magic: %#x\n" "version: %d\n" "status: %s\n", ns->ln_magic, bk->lb_version, lfsck_status2names(ns->ln_status)); if (rc <= 0) goto out; buf += rc; len -= rc; rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names, "flags"); if (rc < 0) goto out; rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names, "param"); if (rc < 0) goto out; rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete, "time_since_last_completed"); if (rc < 0) goto out; rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start, "time_since_latest_start"); if (rc < 0) goto out; rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint, "time_since_last_checkpoint"); if (rc < 0) goto out; rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start, "latest_start_position"); if (rc < 0) goto out; rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint, "last_checkpoint_position"); if (rc < 0) goto out; rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent, "first_failure_position"); if (rc < 0) goto out; if (ns->ln_status == LS_SCANNING_PHASE1) { struct lfsck_position pos; const struct dt_it_ops *iops; cfs_duration_t duration = cfs_time_current() - lfsck->li_time_last_checkpoint; __u64 checked = ns->ln_items_checked + com->lc_new_checked; __u64 speed = checked; __u64 new_checked = com->lc_new_checked * HZ; __u32 rtime = ns->ln_run_time_phase1 + cfs_duration_sec(duration + HALF_SEC); if (duration != 0) do_div(new_checked, duration); if (rtime != 0) do_div(speed, rtime); rc = snprintf(buf, len, "checked_phase1: "LPU64"\n" "checked_phase2: "LPU64"\n" "updated_phase1: "LPU64"\n" "updated_phase2: "LPU64"\n" "failed_phase1: "LPU64"\n" "failed_phase2: "LPU64"\n" "dirs: "LPU64"\n" "M-linked: "LPU64"\n" "nlinks_repaired: "LPU64"\n" "lost_found: "LPU64"\n" "success_count: %u\n" "run_time_phase1: %u seconds\n" "run_time_phase2: %u seconds\n" "average_speed_phase1: "LPU64" items/sec\n" "average_speed_phase2: N/A\n" "real-time_speed_phase1: "LPU64" items/sec\n" "real-time_speed_phase2: N/A\n", checked, ns->ln_objs_checked_phase2, ns->ln_items_repaired, ns->ln_objs_repaired_phase2, ns->ln_items_failed, ns->ln_objs_failed_phase2, ns->ln_dirs_checked, ns->ln_mlinked_checked, ns->ln_objs_nlink_repaired, ns->ln_objs_lost_found, ns->ln_success_count, rtime, ns->ln_run_time_phase2, speed, new_checked); if (rc <= 0) goto out; buf += rc; len -= rc; LASSERT(lfsck->li_di_oit != NULL); iops = &lfsck->li_obj_oit->do_index_ops->dio_it; /* The low layer otable-based iteration position may NOT * exactly match the namespace-based directory traversal * cookie. Generally, it is not a serious issue. But the * caller should NOT make assumption on that. */ pos.lp_oit_cookie = iops->store(env, lfsck->li_di_oit); if (!lfsck->li_current_oit_processed) pos.lp_oit_cookie--; spin_lock(&lfsck->li_lock); if (lfsck->li_di_dir != NULL) { pos.lp_dir_cookie = lfsck->li_cookie_dir; if (pos.lp_dir_cookie >= MDS_DIR_END_OFF) { fid_zero(&pos.lp_dir_parent); pos.lp_dir_cookie = 0; } else { pos.lp_dir_parent = *lfsck_dto2fid(lfsck->li_obj_dir); } } else { fid_zero(&pos.lp_dir_parent); pos.lp_dir_cookie = 0; } spin_unlock(&lfsck->li_lock); rc = lfsck_pos_dump(&buf, &len, &pos, "current_position"); if (rc <= 0) goto out; } else if (ns->ln_status == LS_SCANNING_PHASE2) { cfs_duration_t duration = cfs_time_current() - lfsck->li_time_last_checkpoint; __u64 checked = ns->ln_objs_checked_phase2 + com->lc_new_checked; __u64 speed1 = ns->ln_items_checked; __u64 speed2 = checked; __u64 new_checked = com->lc_new_checked * HZ; __u32 rtime = ns->ln_run_time_phase2 + cfs_duration_sec(duration + HALF_SEC); if (duration != 0) do_div(new_checked, duration); if (ns->ln_run_time_phase1 != 0) do_div(speed1, ns->ln_run_time_phase1); if (rtime != 0) do_div(speed2, rtime); rc = snprintf(buf, len, "checked_phase1: "LPU64"\n" "checked_phase2: "LPU64"\n" "updated_phase1: "LPU64"\n" "updated_phase2: "LPU64"\n" "failed_phase1: "LPU64"\n" "failed_phase2: "LPU64"\n" "dirs: "LPU64"\n" "M-linked: "LPU64"\n" "nlinks_repaired: "LPU64"\n" "lost_found: "LPU64"\n" "success_count: %u\n" "run_time_phase1: %u seconds\n" "run_time_phase2: %u seconds\n" "average_speed_phase1: "LPU64" items/sec\n" "average_speed_phase2: "LPU64" objs/sec\n" "real-time_speed_phase1: N/A\n" "real-time_speed_phase2: "LPU64" objs/sec\n" "current_position: "DFID"\n", ns->ln_items_checked, checked, ns->ln_items_repaired, ns->ln_objs_repaired_phase2, ns->ln_items_failed, ns->ln_objs_failed_phase2, ns->ln_dirs_checked, ns->ln_mlinked_checked, ns->ln_objs_nlink_repaired, ns->ln_objs_lost_found, ns->ln_success_count, ns->ln_run_time_phase1, rtime, speed1, speed2, new_checked, PFID(&ns->ln_fid_latest_scanned_phase2)); if (rc <= 0) goto out; buf += rc; len -= rc; } else { __u64 speed1 = ns->ln_items_checked; __u64 speed2 = ns->ln_objs_checked_phase2; if (ns->ln_run_time_phase1 != 0) do_div(speed1, ns->ln_run_time_phase1); if (ns->ln_run_time_phase2 != 0) do_div(speed2, ns->ln_run_time_phase2); rc = snprintf(buf, len, "checked_phase1: "LPU64"\n" "checked_phase2: "LPU64"\n" "updated_phase1: "LPU64"\n" "updated_phase2: "LPU64"\n" "failed_phase1: "LPU64"\n" "failed_phase2: "LPU64"\n" "dirs: "LPU64"\n" "M-linked: "LPU64"\n" "nlinks_repaired: "LPU64"\n" "lost_found: "LPU64"\n" "success_count: %u\n" "run_time_phase1: %u seconds\n" "run_time_phase2: %u seconds\n" "average_speed_phase1: "LPU64" items/sec\n" "average_speed_phase2: "LPU64" objs/sec\n" "real-time_speed_phase1: N/A\n" "real-time_speed_phase2: N/A\n" "current_position: N/A\n", ns->ln_items_checked, ns->ln_objs_checked_phase2, ns->ln_items_repaired, ns->ln_objs_repaired_phase2, ns->ln_items_failed, ns->ln_objs_failed_phase2, ns->ln_dirs_checked, ns->ln_mlinked_checked, ns->ln_objs_nlink_repaired, ns->ln_objs_lost_found, ns->ln_success_count, ns->ln_run_time_phase1, ns->ln_run_time_phase2, speed1, speed2); if (rc <= 0) goto out; buf += rc; len -= rc; } ret = save - len; out: up_read(&com->lc_sem); return ret; }
/* * Handler for: getattr, lookup and revalidate cases. */ static int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, struct lookup_intent *it, struct ptlrpc_request **reqp, ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct lmv_tgt_desc *tgt = NULL; struct mdt_body *body; struct lmv_stripe_md *lsm = op_data->op_mea1; int rc = 0; ENTRY; /* If it returns ERR_PTR(-EBADFD) then it is an unknown hash type * it will try all stripes to locate the object */ tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD)) RETURN(PTR_ERR(tgt)); /* Both migrating dir and unknown hash dir need to try * all of sub-stripes */ if (lsm != NULL && !lmv_is_known_hash_type(lsm->lsm_md_hash_type)) { struct lmv_oinfo *oinfo; oinfo = &lsm->lsm_md_oinfo[0]; op_data->op_fid1 = oinfo->lmo_fid; op_data->op_mds = oinfo->lmo_mds; tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); } if (!fid_is_sane(&op_data->op_fid2)) fid_zero(&op_data->op_fid2); CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID ", name='%s' -> mds #%u lsm=%p lsm_magic=%x\n", PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), op_data->op_name ? op_data->op_name : "<NULL>", tgt->ltd_idx, lsm, lsm == NULL ? -1 : lsm->lsm_md_magic); op_data->op_bias &= ~MDS_CROSS_REF; rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking, extra_lock_flags); if (rc < 0) RETURN(rc); if (*reqp == NULL) { /* If RPC happens, lsm information will be revalidated * during update_inode process (see ll_update_lsm_md) */ if (op_data->op_mea2 != NULL) { rc = lmv_revalidate_slaves(exp, op_data->op_mea2, cb_blocking, extra_lock_flags); if (rc != 0) RETURN(rc); } RETURN(rc); } else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm != NULL && lmv_need_try_all_stripes(lsm)) { /* For migrating and unknown hash type directory, it will * try to target the entry on other stripes */ int stripe_index; for (stripe_index = 1; stripe_index < lsm->lsm_md_stripe_count && it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) { struct lmv_oinfo *oinfo; /* release the previous request */ ptlrpc_req_finished(*reqp); it->d.lustre.it_data = NULL; *reqp = NULL; oinfo = &lsm->lsm_md_oinfo[stripe_index]; tgt = lmv_find_target(lmv, &oinfo->lmo_fid); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); CDEBUG(D_INODE, "Try other stripes " DFID"\n", PFID(&oinfo->lmo_fid)); op_data->op_fid1 = oinfo->lmo_fid; it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE; rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking, extra_lock_flags); if (rc != 0) RETURN(rc); } } /* * MDS has returned success. Probably name has been resolved in * remote inode. Let's check this. */ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); if (body == NULL) RETURN(-EPROTO); /* Not cross-ref case, just get out of here. */ if (unlikely((body->mbo_valid & OBD_MD_MDS))) { rc = lmv_intent_remote(exp, it, NULL, reqp, cb_blocking, extra_lock_flags); if (rc != 0) RETURN(rc); body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); if (body == NULL) RETURN(-EPROTO); } RETURN(rc); }
static int lfsck_namespace_prep(const struct lu_env *env, struct lfsck_component *com, struct lfsck_start_param *lsp) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_namespace *ns = com->lc_file_ram; struct lfsck_position *pos = &com->lc_pos_start; if (ns->ln_status == LS_COMPLETED) { int rc; rc = lfsck_namespace_reset(env, com, false); if (rc != 0) return rc; } down_write(&com->lc_sem); ns->ln_time_latest_start = cfs_time_current_sec(); spin_lock(&lfsck->li_lock); if (ns->ln_flags & LF_SCANNED_ONCE) { if (!lfsck->li_drop_dryrun || lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) { ns->ln_status = LS_SCANNING_PHASE2; cfs_list_del_init(&com->lc_link); cfs_list_add_tail(&com->lc_link, &lfsck->li_list_double_scan); if (!cfs_list_empty(&com->lc_link_dir)) cfs_list_del_init(&com->lc_link_dir); lfsck_pos_set_zero(pos); } else { ns->ln_status = LS_SCANNING_PHASE1; ns->ln_run_time_phase1 = 0; ns->ln_run_time_phase2 = 0; ns->ln_items_checked = 0; ns->ln_items_repaired = 0; ns->ln_items_failed = 0; ns->ln_dirs_checked = 0; ns->ln_mlinked_checked = 0; ns->ln_objs_checked_phase2 = 0; ns->ln_objs_repaired_phase2 = 0; ns->ln_objs_failed_phase2 = 0; ns->ln_objs_nlink_repaired = 0; ns->ln_objs_lost_found = 0; fid_zero(&ns->ln_fid_latest_scanned_phase2); if (cfs_list_empty(&com->lc_link_dir)) cfs_list_add_tail(&com->lc_link_dir, &lfsck->li_list_dir); *pos = ns->ln_pos_first_inconsistent; } } else { ns->ln_status = LS_SCANNING_PHASE1; if (cfs_list_empty(&com->lc_link_dir)) cfs_list_add_tail(&com->lc_link_dir, &lfsck->li_list_dir); if (!lfsck->li_drop_dryrun || lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) { *pos = ns->ln_pos_last_checkpoint; pos->lp_oit_cookie++; } else { *pos = ns->ln_pos_first_inconsistent; } } spin_unlock(&lfsck->li_lock); up_write(&com->lc_sem); return 0; }
/* * VBR: save parent version in reply and child version getting by its name. * Version of child is getting and checking during its lookup. If */ static int mdt_reint_unlink(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); struct md_attr *ma = &info->mti_attr; struct lu_fid *child_fid = &info->mti_tmp_fid1; struct mdt_object *mp; struct mdt_object *mc; struct mdt_lock_handle *parent_lh; struct mdt_lock_handle *child_lh; struct lu_name *lname; int rc; ENTRY; DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1), rr->rr_name); if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) RETURN(err_serious(-ENOENT)); /* * step 1: Found the parent. */ mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1); if (IS_ERR(mp)) { rc = PTR_ERR(mp); GOTO(out, rc); } if (mdt_object_obf(mp)) GOTO(put_parent, rc = -EPERM); parent_lh = &info->mti_lh[MDT_LH_PARENT]; lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); if (mdt_object_remote(mp)) { mdt_lock_reg_init(parent_lh, LCK_EX); rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh, parent_lh->mlh_rreg_mode, MDS_INODELOCK_UPDATE); if (rc != ELDLM_OK) GOTO(put_parent, rc); } else { mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name, rr->rr_namelen); rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE, MDT_LOCAL_LOCK); if (rc) GOTO(put_parent, rc); rc = mdt_version_get_check_save(info, mp, 0); if (rc) GOTO(unlock_parent, rc); } /* step 2: find & lock the child */ /* lookup child object along with version checking */ fid_zero(child_fid); rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1); if (rc != 0) GOTO(unlock_parent, rc); mdt_reint_init_ma(info, ma); /* We will lock the child regardless it is local or remote. No harm. */ mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid); if (IS_ERR(mc)) GOTO(unlock_parent, rc = PTR_ERR(mc)); child_lh = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_reg_init(child_lh, LCK_EX); if (mdt_object_remote(mc)) { struct mdt_body *repbody; if (!fid_is_zero(rr->rr_fid2)) { CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name, PFID(mdt_object_fid(mc))); GOTO(unlock_parent, rc = -ENOENT); } CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name, PFID(mdt_object_fid(mc))); if (info->mti_spec.sp_rm_entry) { struct lu_ucred *uc = mdt_ucred(info); if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) { CERROR("%s: unlink remote entry is only " "permitted for administrator: rc = %d\n", mdt2obd_dev(info->mti_mdt)->obd_name, -EPERM); GOTO(unlock_parent, rc = -EPERM); } ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), NULL, lname, ma); mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc); } /* Revoke the LOOKUP lock of the remote object granted by * this MDT. Since the unlink will happen on another MDT, * it will release the LOOKUP lock right away. Then What * would happen if another client try to grab the LOOKUP * lock at the same time with unlink XXX */ mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP, MDT_CROSS_LOCK); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); LASSERT(repbody != NULL); repbody->fid1 = *mdt_object_fid(mc); repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); mdt_object_unlock_put(info, mc, child_lh, rc); GOTO(unlock_parent, rc = -EREMOTE); } else if (info->mti_spec.sp_rm_entry) { CERROR("%s: lfs rmdir should not be used on local dir %s\n", mdt2obd_dev(info->mti_mdt)->obd_name, (char *)rr->rr_name); mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc = -EPERM); } rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL, MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, mc); GOTO(unlock_parent, rc); } mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_UNLINK_WRITE); /* save version when object is locked */ mdt_version_get_save(info, mc, 1); /* * Now we can only make sure we need MA_INODE, in mdd layer, will check * whether need MA_LOV and MA_COOKIE. */ ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), mdt_object_child(mc), lname, ma); if (rc == 0 && !lu_object_is_dying(&mc->mot_header)) rc = mdt_attr_get_complex(info, mc, ma); if (rc == 0) mdt_handle_last_unlink(info, mc, ma); if (ma->ma_valid & MA_INODE) { switch (ma->ma_attr.la_mode & S_IFMT) { case S_IFDIR: mdt_counter_incr(req, LPROC_MDT_RMDIR); break; case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: mdt_counter_incr(req, LPROC_MDT_UNLINK); break; default: LASSERTF(0, "bad file type %o unlinking\n", ma->ma_attr.la_mode); } } EXIT; mdt_object_unlock_put(info, mc, child_lh, rc); unlock_parent: mdt_object_unlock(info, mp, parent_lh, rc); put_parent: mdt_object_put(info->mti_env, mp); out: return rc; }
/* * VBR: rename versions in reply: 0 - src parent; 1 - tgt parent; * 2 - src child; 3 - tgt child. * Update on disk version of src child. */ static int mdt_reint_rename(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct mdt_reint_record *rr = &info->mti_rr; struct md_attr *ma = &info->mti_attr; struct ptlrpc_request *req = mdt_info_req(info); struct mdt_object *msrcdir; struct mdt_object *mtgtdir; struct mdt_object *mold; struct mdt_object *mnew = NULL; struct mdt_lock_handle *lh_srcdirp; struct mdt_lock_handle *lh_tgtdirp; struct mdt_lock_handle *lh_oldp; struct mdt_lock_handle *lh_newp; struct lu_fid *old_fid = &info->mti_tmp_fid1; struct lu_fid *new_fid = &info->mti_tmp_fid2; struct lustre_handle rename_lh = { 0 }; struct lu_name slname = { 0 }; struct lu_name *lname; int rc; ENTRY; if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); DEBUG_REQ(D_INODE, req, "rename "DFID"/%s to "DFID"/%s", PFID(rr->rr_fid1), rr->rr_name, PFID(rr->rr_fid2), rr->rr_tgt); rc = mdt_rename_lock(info, &rename_lh); if (rc) { CERROR("Can't lock FS for rename, rc %d\n", rc); RETURN(rc); } lh_newp = &info->mti_lh[MDT_LH_NEW]; /* step 1: lock the source dir. */ lh_srcdirp = &info->mti_lh[MDT_LH_PARENT]; mdt_lock_pdo_init(lh_srcdirp, LCK_PW, rr->rr_name, rr->rr_namelen); msrcdir = mdt_object_find_lock(info, rr->rr_fid1, lh_srcdirp, MDS_INODELOCK_UPDATE); if (IS_ERR(msrcdir)) GOTO(out_rename_lock, rc = PTR_ERR(msrcdir)); if (mdt_object_obf(msrcdir)) GOTO(out_unlock_source, rc = -EPERM); rc = mdt_version_get_check_save(info, msrcdir, 0); if (rc) GOTO(out_unlock_source, rc); /* step 2: find & lock the target dir. */ lh_tgtdirp = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_pdo_init(lh_tgtdirp, LCK_PW, rr->rr_tgt, rr->rr_tgtlen); if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) { mdt_object_get(info->mti_env, msrcdir); mtgtdir = msrcdir; if (lh_tgtdirp->mlh_pdo_hash != lh_srcdirp->mlh_pdo_hash) { rc = mdt_pdir_hash_lock(info, lh_tgtdirp, mtgtdir, MDS_INODELOCK_UPDATE); if (rc) GOTO(out_unlock_source, rc); OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK2, 10); } } else { mtgtdir = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid2); if (IS_ERR(mtgtdir)) GOTO(out_unlock_source, rc = PTR_ERR(mtgtdir)); if (mdt_object_obf(mtgtdir)) GOTO(out_put_target, rc = -EPERM); /* check early, the real version will be saved after locking */ rc = mdt_version_get_check(info, mtgtdir, 1); if (rc) GOTO(out_put_target, rc); if (unlikely(mdt_object_remote(mtgtdir))) { CDEBUG(D_INFO, "Source dir "DFID" target dir "DFID "on different MDTs\n", PFID(rr->rr_fid1), PFID(rr->rr_fid2)); GOTO(out_put_target, rc = -EXDEV); } else { if (likely(mdt_object_exists(mtgtdir))) { /* we lock the target dir if it is local */ rc = mdt_object_lock(info, mtgtdir, lh_tgtdirp, MDS_INODELOCK_UPDATE, MDT_LOCAL_LOCK); if (rc != 0) GOTO(out_put_target, rc); /* get and save correct version after locking */ mdt_version_get_save(info, mtgtdir, 1); } else { GOTO(out_put_target, rc = -ESTALE); } } } /* step 3: find & lock the old object. */ lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); mdt_name_copy(&slname, lname); fid_zero(old_fid); rc = mdt_lookup_version_check(info, msrcdir, &slname, old_fid, 2); if (rc != 0) GOTO(out_unlock_target, rc); if (lu_fid_eq(old_fid, rr->rr_fid1) || lu_fid_eq(old_fid, rr->rr_fid2)) GOTO(out_unlock_target, rc = -EINVAL); mold = mdt_object_find(info->mti_env, info->mti_mdt, old_fid); if (IS_ERR(mold)) GOTO(out_unlock_target, rc = PTR_ERR(mold)); if (mdt_object_remote(mold)) { mdt_object_put(info->mti_env, mold); CDEBUG(D_INFO, "Source child "DFID" is on another MDT\n", PFID(old_fid)); GOTO(out_unlock_target, rc = -EXDEV); } if (mdt_object_obf(mold)) { mdt_object_put(info->mti_env, mold); GOTO(out_unlock_target, rc = -EPERM); } lh_oldp = &info->mti_lh[MDT_LH_OLD]; mdt_lock_reg_init(lh_oldp, LCK_EX); rc = mdt_object_lock(info, mold, lh_oldp, MDS_INODELOCK_LOOKUP, MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, mold); GOTO(out_unlock_target, rc); } info->mti_mos = mold; /* save version after locking */ mdt_version_get_save(info, mold, 2); mdt_set_capainfo(info, 2, old_fid, BYPASS_CAPA); /* step 4: find & lock the new object. */ /* new target object may not exist now */ lname = mdt_name(info->mti_env, (char *)rr->rr_tgt, rr->rr_tgtlen); /* lookup with version checking */ fid_zero(new_fid); rc = mdt_lookup_version_check(info, mtgtdir, lname, new_fid, 3); if (rc == 0) { /* the new_fid should have been filled at this moment */ if (lu_fid_eq(old_fid, new_fid)) GOTO(out_unlock_old, rc); if (lu_fid_eq(new_fid, rr->rr_fid1) || lu_fid_eq(new_fid, rr->rr_fid2)) GOTO(out_unlock_old, rc = -EINVAL); mdt_lock_reg_init(lh_newp, LCK_EX); mnew = mdt_object_find(info->mti_env, info->mti_mdt, new_fid); if (IS_ERR(mnew)) GOTO(out_unlock_old, rc = PTR_ERR(mnew)); if (mdt_object_obf(mnew)) { mdt_object_put(info->mti_env, mnew); GOTO(out_unlock_old, rc = -EPERM); } if (mdt_object_remote(mnew)) { mdt_object_put(info->mti_env, mnew); CDEBUG(D_INFO, "src child "DFID" is on another MDT\n", PFID(new_fid)); GOTO(out_unlock_old, rc = -EXDEV); } rc = mdt_object_lock(info, mnew, lh_newp, MDS_INODELOCK_FULL, MDT_CROSS_LOCK); if (rc != 0) { mdt_object_put(info->mti_env, mnew); GOTO(out_unlock_old, rc); } /* get and save version after locking */ mdt_version_get_save(info, mnew, 3); mdt_set_capainfo(info, 3, new_fid, BYPASS_CAPA); } else if (rc != -EREMOTE && rc != -ENOENT) { GOTO(out_unlock_old, rc); } else { mdt_enoent_version_save(info, 3); } /* step 5: rename it */ mdt_reint_init_ma(info, ma); mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_RENAME_WRITE); /* Check if @dst is subdir of @src. */ rc = mdt_rename_sanity(info, old_fid); if (rc) GOTO(out_unlock_new, rc); rc = mdo_rename(info->mti_env, mdt_object_child(msrcdir), mdt_object_child(mtgtdir), old_fid, &slname, (mnew ? mdt_object_child(mnew) : NULL), lname, ma); /* handle last link of tgt object */ if (rc == 0) { mdt_counter_incr(req, LPROC_MDT_RENAME); if (mnew) mdt_handle_last_unlink(info, mnew, ma); mdt_rename_counter_tally(info, info->mti_mdt, req, msrcdir, mtgtdir); } EXIT; out_unlock_new: if (mnew) mdt_object_unlock_put(info, mnew, lh_newp, rc); out_unlock_old: mdt_object_unlock_put(info, mold, lh_oldp, rc); out_unlock_target: mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc); out_put_target: mdt_object_put(info->mti_env, mtgtdir); out_unlock_source: mdt_object_unlock_put(info, msrcdir, lh_srcdirp, rc); out_rename_lock: if (lustre_handle_is_used(&rename_lh)) mdt_rename_unlock(&rename_lh); return rc; }
/* * VBR: save parent version in reply and child version getting by its name. * Version of child is getting and checking during its lookup. If */ static int mdt_reint_unlink(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { struct mdt_reint_record *rr = &info->mti_rr; struct ptlrpc_request *req = mdt_info_req(info); struct md_attr *ma = &info->mti_attr; struct lu_fid *child_fid = &info->mti_tmp_fid1; struct mdt_object *mp; struct mdt_object *mc; struct mdt_lock_handle *parent_lh; struct mdt_lock_handle *child_lh; struct lu_name *lname; int rc; int no_name = 0; ENTRY; DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1), rr->rr_name); if (info->mti_dlm_req) ldlm_request_cancel(req, info->mti_dlm_req, 0); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) RETURN(err_serious(-ENOENT)); if (fid_is_obf(rr->rr_fid1) || fid_is_dot_lustre(rr->rr_fid1)) RETURN(-EPERM); /* * step 1: Found the parent. */ mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1); if (IS_ERR(mp)) { rc = PTR_ERR(mp); GOTO(out, rc); } parent_lh = &info->mti_lh[MDT_LH_PARENT]; lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen); if (mdt_object_remote(mp)) { mdt_lock_reg_init(parent_lh, LCK_EX); rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh, parent_lh->mlh_rreg_mode, MDS_INODELOCK_UPDATE); if (rc != ELDLM_OK) GOTO(put_parent, rc); } else { mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name, rr->rr_namelen); rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE, MDT_LOCAL_LOCK); if (rc) GOTO(put_parent, rc); rc = mdt_version_get_check_save(info, mp, 0); if (rc) GOTO(unlock_parent, rc); } /* step 2: find & lock the child */ /* lookup child object along with version checking */ fid_zero(child_fid); rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1); if (rc != 0) { /* Name might not be able to find during resend of * remote unlink, considering following case. * dir_A is a remote directory, the name entry of * dir_A is on MDT0, the directory is on MDT1, * * 1. client sends unlink req to MDT1. * 2. MDT1 sends name delete update to MDT0. * 3. name entry is being deleted in MDT0 synchronously. * 4. MDT1 is restarted. * 5. client resends unlink req to MDT1. So it can not * find the name entry on MDT0 anymore. * In this case, MDT1 only needs to destory the local * directory. * */ if (mdt_object_remote(mp) && rc == -ENOENT && !fid_is_zero(rr->rr_fid2) && lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { no_name = 1; *child_fid = *rr->rr_fid2; } else { GOTO(unlock_parent, rc); } } if (fid_is_obf(child_fid) || fid_is_dot_lustre(child_fid)) GOTO(unlock_parent, rc = -EPERM); mdt_reint_init_ma(info, ma); /* We will lock the child regardless it is local or remote. No harm. */ mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid); if (IS_ERR(mc)) GOTO(unlock_parent, rc = PTR_ERR(mc)); child_lh = &info->mti_lh[MDT_LH_CHILD]; mdt_lock_reg_init(child_lh, LCK_EX); if (mdt_object_remote(mc)) { struct mdt_body *repbody; if (!fid_is_zero(rr->rr_fid2)) { CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, PFID(mdt_object_fid(mc))); GOTO(put_child, rc = -ENOENT); } CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, PFID(mdt_object_fid(mc))); if (!mdt_is_dne_client(req->rq_export)) /* Return -EIO for old client */ GOTO(put_child, rc = -EIO); if (info->mti_spec.sp_rm_entry) { struct lu_ucred *uc = mdt_ucred(info); if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) { CERROR("%s: unlink remote entry is only " "permitted for administrator: rc = %d\n", mdt_obd_name(info->mti_mdt), -EPERM); GOTO(put_child, rc = -EPERM); } ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), NULL, lname, ma, no_name); GOTO(put_child, rc); } /* Revoke the LOOKUP lock of the remote object granted by * this MDT. Since the unlink will happen on another MDT, * it will release the LOOKUP lock right away. Then What * would happen if another client try to grab the LOOKUP * lock at the same time with unlink XXX */ mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP, MDT_CROSS_LOCK); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); LASSERT(repbody != NULL); repbody->fid1 = *mdt_object_fid(mc); repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS); GOTO(unlock_child, rc = -EREMOTE); } else if (info->mti_spec.sp_rm_entry) { rc = -EPERM; CDEBUG(D_INFO, "%s: no rm_entry on local dir '%s': rc = %d\n", mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, rc); GOTO(put_child, rc); } /* We used to acquire MDS_INODELOCK_FULL here but we can't do * this now because a running HSM restore on the child (unlink * victim) will hold the layout lock. See LU-4002. */ rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE, MDT_CROSS_LOCK); if (rc != 0) GOTO(put_child, rc); mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom, OBD_FAIL_MDS_REINT_UNLINK_WRITE); /* save version when object is locked */ mdt_version_get_save(info, mc, 1); /* * Now we can only make sure we need MA_INODE, in mdd layer, will check * whether need MA_LOV and MA_COOKIE. */ ma->ma_need = MA_INODE; ma->ma_valid = 0; mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA); mutex_lock(&mc->mot_lov_mutex); rc = mdo_unlink(info->mti_env, mdt_object_child(mp), mdt_object_child(mc), lname, ma, no_name); mutex_unlock(&mc->mot_lov_mutex); if (rc == 0 && !lu_object_is_dying(&mc->mot_header)) rc = mdt_attr_get_complex(info, mc, ma); if (rc == 0) mdt_handle_last_unlink(info, mc, ma); if (ma->ma_valid & MA_INODE) { switch (ma->ma_attr.la_mode & S_IFMT) { case S_IFDIR: mdt_counter_incr(req, LPROC_MDT_RMDIR); break; case S_IFREG: case S_IFLNK: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: mdt_counter_incr(req, LPROC_MDT_UNLINK); break; default: LASSERTF(0, "bad file type %o unlinking\n", ma->ma_attr.la_mode); } } EXIT; unlock_child: mdt_object_unlock(info, mc, child_lh, rc); put_child: mdt_object_put(info->mti_env, mc); unlock_parent: mdt_object_unlock(info, mp, parent_lh, rc); put_parent: mdt_object_put(info->mti_env, mp); out: return rc; }
/* * Handler for: getattr, lookup and revalidate cases. */ int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data, void *lmm, int lmmsize, struct lookup_intent *it, int flags, struct ptlrpc_request **reqp, ldlm_blocking_callback cb_blocking, __u64 extra_lock_flags) { struct obd_device *obd = exp->exp_obd; struct lmv_obd *lmv = &obd->u.lmv; struct lmv_tgt_desc *tgt = NULL; struct mdt_body *body; struct lmv_stripe_md *lsm = op_data->op_mea1; int rc = 0; ENTRY; tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); if (!fid_is_sane(&op_data->op_fid2)) fid_zero(&op_data->op_fid2); CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID ", name='%s' -> mds #%d lsm=%p lsm_magic=%x\n", PFID(&op_data->op_fid1), PFID(&op_data->op_fid2), op_data->op_name ? op_data->op_name : "<NULL>", tgt->ltd_idx, lsm, lsm == NULL ? -1 : lsm->lsm_md_magic); op_data->op_bias &= ~MDS_CROSS_REF; rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, flags, reqp, cb_blocking, extra_lock_flags); if (rc < 0) RETURN(rc); if (*reqp == NULL) { /* If RPC happens, lsm information will be revalidated * during update_inode process (see ll_update_lsm_md) */ if (op_data->op_mea2 != NULL) { rc = lmv_revalidate_slaves(exp, NULL, op_data->op_mea2, cb_blocking, extra_lock_flags); if (rc != 0) RETURN(rc); } RETURN(rc); } else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm != NULL && lsm->lsm_md_magic == LMV_MAGIC_MIGRATE) { /* For migrating directory, if it can not find the child in * the source directory(master stripe), try the targeting * directory(stripe 1) */ tgt = lmv_find_target(lmv, &lsm->lsm_md_oinfo[1].lmo_fid); if (IS_ERR(tgt)) RETURN(PTR_ERR(tgt)); ptlrpc_req_finished(*reqp); it->d.lustre.it_data = NULL; *reqp = NULL; CDEBUG(D_INODE, "For migrating dir, try target dir "DFID"\n", PFID(&lsm->lsm_md_oinfo[1].lmo_fid)); op_data->op_fid1 = lsm->lsm_md_oinfo[1].lmo_fid; it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE; rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it, flags, reqp, cb_blocking, extra_lock_flags); } /* * MDS has returned success. Probably name has been resolved in * remote inode. Let's check this. */ body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); if (body == NULL) RETURN(-EPROTO); /* Not cross-ref case, just get out of here. */ if (unlikely((body->valid & OBD_MD_MDS))) { rc = lmv_intent_remote(exp, lmm, lmmsize, it, NULL, flags, reqp, cb_blocking, extra_lock_flags); if (rc != 0) RETURN(rc); body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY); if (body == NULL) RETURN(-EPROTO); } RETURN(rc); }