Пример #1
0
int osp_init_precreate(struct osp_device *d)
{
	struct l_wait_info	 lwi = { 0 };
	struct task_struct		*task;

	ENTRY;

	OBD_ALLOC_PTR(d->opd_pre);
	if (d->opd_pre == NULL)
		RETURN(-ENOMEM);

	/* initially precreation isn't ready */
	d->opd_pre_status = -EAGAIN;
	fid_zero(&d->opd_pre_used_fid);
	d->opd_pre_used_fid.f_oid = 1;
	fid_zero(&d->opd_pre_last_created_fid);
	d->opd_pre_last_created_fid.f_oid = 1;
	d->opd_pre_reserved = 0;
	d->opd_got_disconnected = 1;
	d->opd_pre_grow_slow = 0;
	d->opd_pre_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_max_grow_count = OST_MAX_PRECREATE;

	spin_lock_init(&d->opd_pre_lock);
	init_waitqueue_head(&d->opd_pre_waitq);
	init_waitqueue_head(&d->opd_pre_user_waitq);
	init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq);

	/*
	 * Initialize statfs-related things
	 */
	d->opd_statfs_maxage = 5; /* default update interval */
	d->opd_statfs_fresh_till = cfs_time_shift(-1000);
	CDEBUG(D_OTHER, "current %llu, fresh till %llu\n",
	       (unsigned long long)cfs_time_current(),
	       (unsigned long long)d->opd_statfs_fresh_till);
	cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d);

	/*
	 * start thread handling precreation and statfs updates
	 */
	task = kthread_run(osp_precreate_thread, d,
			   "osp-pre-%u-%u", d->opd_index, d->opd_group);
	if (IS_ERR(task)) {
		CERROR("can't start precreate thread %ld\n", PTR_ERR(task));
		RETURN(PTR_ERR(task));
	}

	l_wait_event(d->opd_pre_thread.t_ctl_waitq,
		     osp_precreate_running(d) || osp_precreate_stopped(d),
		     &lwi);

	RETURN(0);
}
int osp_init_precreate(struct osp_device *d)
{
	struct l_wait_info	 lwi = { 0 };
	int			 rc;

	ENTRY;

	/* initially precreation isn't ready */
	d->opd_pre_status = -EAGAIN;
	fid_zero(&d->opd_pre_used_fid);
	d->opd_pre_used_fid.f_oid = 1;
	fid_zero(&d->opd_pre_last_created_fid);
	d->opd_pre_last_created_fid.f_oid = 1;
	d->opd_pre_reserved = 0;
	d->opd_got_disconnected = 1;
	d->opd_pre_grow_slow = 0;
	d->opd_pre_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_min_grow_count = OST_MIN_PRECREATE;
	d->opd_pre_max_grow_count = OST_MAX_PRECREATE;

	spin_lock_init(&d->opd_pre_lock);
	cfs_waitq_init(&d->opd_pre_waitq);
	cfs_waitq_init(&d->opd_pre_user_waitq);
	cfs_waitq_init(&d->opd_pre_thread.t_ctl_waitq);

	/*
	 * Initialize statfs-related things
	 */
	d->opd_statfs_maxage = 5; /* default update interval */
	d->opd_statfs_fresh_till = cfs_time_shift(-1000);
	CDEBUG(D_OTHER, "current %llu, fresh till %llu\n",
	       (unsigned long long)cfs_time_current(),
	       (unsigned long long)d->opd_statfs_fresh_till);
	cfs_timer_init(&d->opd_statfs_timer, osp_statfs_timer_cb, d);

	/*
	 * start thread handling precreation and statfs updates
	 */
	rc = cfs_create_thread(osp_precreate_thread, d, 0);
	if (rc < 0) {
		CERROR("can't start precreate thread %d\n", rc);
		RETURN(rc);
	}

	l_wait_event(d->opd_pre_thread.t_ctl_waitq,
		     osp_precreate_running(d) || osp_precreate_stopped(d),
		     &lwi);

	RETURN(0);
}
Пример #3
0
/*
 * Finish the current sequence due to disconnect.
 * See mdc_import_event()
 */
void seq_client_flush(struct lu_client_seq *seq)
{
	wait_queue_t link;

	LASSERT(seq != NULL);
	init_waitqueue_entry(&link, current);
	mutex_lock(&seq->lcs_mutex);

	while (seq->lcs_update) {
		add_wait_queue(&seq->lcs_waitq, &link);
		set_current_state(TASK_UNINTERRUPTIBLE);
		mutex_unlock(&seq->lcs_mutex);

		schedule();

		mutex_lock(&seq->lcs_mutex);
		remove_wait_queue(&seq->lcs_waitq, &link);
		set_current_state(TASK_RUNNING);
	}

        fid_zero(&seq->lcs_fid);
        /**
         * this id shld not be used for seq range allocation.
         * set to -1 for dgb check.
         */

        seq->lcs_space.lsr_index = -1;

	lu_seq_range_init(&seq->lcs_space);
	mutex_unlock(&seq->lcs_mutex);
}
Пример #4
0
int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo)
{
	struct ofd_thread_info	*info = ofd_info(env);
	struct filter_fid_old	*ff   = &info->fti_mds_fid_old;
	struct lu_buf		*buf  = &info->fti_buf;
	struct lu_fid		*pfid = &fo->ofo_pfid;
	int			 rc   = 0;

	if (fid_is_sane(pfid))
		return 0;

	buf->lb_buf = ff;
	buf->lb_len = sizeof(*ff);
	rc = dt_xattr_get(env, ofd_object_child(fo), buf, XATTR_NAME_FID,
			  BYPASS_CAPA);
	if (rc < 0)
		return rc;

	if (rc < sizeof(struct lu_fid)) {
		fid_zero(pfid);

		return -ENODATA;
	}

	pfid->f_seq = le64_to_cpu(ff->ff_parent.f_seq);
	pfid->f_oid = le32_to_cpu(ff->ff_parent.f_oid);
	/* Currently, the filter_fid::ff_parent::f_ver is not the real parent
	 * MDT-object's FID::f_ver, instead it is the OST-object index in its
	 * parent MDT-object's layout EA. */
	pfid->f_stripe_idx = le32_to_cpu(ff->ff_parent.f_stripe_idx);

	return 0;
}
Пример #5
0
/**
 * open the PENDING directory for device \a mdd
 *
 * The PENDING directory persistently tracks files and directories that were
 * unlinked from the namespace (nlink == 0) but are still held open by clients.
 * Those inodes shouldn't be deleted if the MDS crashes, because the clients
 * would not be able to recover and reopen those files.  Instead, these inodes
 * are linked into the PENDING directory on disk, and only deleted if all
 * clients close them, or the MDS finishes client recovery without any client
 * reopening them (i.e. former clients didn't join recovery).
 *  \param d   mdd device being started.
 *
 *  \retval 0  success
 *  \retval  -ve index operation error.
 *
 */
int orph_index_init(const struct lu_env *env, struct mdd_device *mdd)
{
	struct lu_fid		 fid;
	struct dt_object	*d;
	int			 rc = 0;

	ENTRY;

	/* create PENDING dir */
	fid_zero(&fid);
	rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid,
				   orph_index_name, S_IFDIR | S_IRUGO |
				   S_IWUSR | S_IXUGO, &fid);
	if (rc < 0)
		RETURN(rc);

	d = dt_locate(env, mdd->mdd_child, &fid);
	if (IS_ERR(d))
		RETURN(PTR_ERR(d));
	LASSERT(lu_object_exists(&d->do_lu));
	if (!dt_try_as_dir(env, d)) {
		CERROR("%s: \"%s\" is not an index: rc = %d\n",
		       mdd2obd_dev(mdd)->obd_name, orph_index_name, rc);
		lu_object_put(env, &d->do_lu);
		RETURN(-ENOTDIR);
	}
	mdd->mdd_orphans = d;
	RETURN(0);
}
Пример #6
0
void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1,
                         struct inode *i2, const char *name, int namelen,
                         int mode, __u32 opc)
{
        LASSERT(i1 != NULL || i2 != NULL);
        LASSERT(op_data);

        if (i1) {
                ll_i2gids(op_data->op_suppgids, i1, i2);
                op_data->op_fid1 = *ll_inode2fid(i1);
        }else {
                ll_i2gids(op_data->op_suppgids, i2, i1);
                op_data->op_fid1 = *ll_inode2fid(i2);
        }

        if (i2)
                op_data->op_fid2 = *ll_inode2fid(i2);
        else
                fid_zero(&op_data->op_fid2);

        op_data->op_opc = opc;
        op_data->op_name = name;
        op_data->op_mode = mode;
        op_data->op_namelen = namelen;
        op_data->op_mod_time = CFS_CURRENT_TIME;
        op_data->op_data = NULL;
}
Пример #7
0
/*
 * Finish the current sequence due to disconnect.
 * See mdc_import_event()
 */
void seq_client_flush(struct lu_client_seq *seq)
{
        cfs_waitlink_t link;

        LASSERT(seq != NULL);
        cfs_waitlink_init(&link);
	mutex_lock(&seq->lcs_mutex);

        while (seq->lcs_update) {
                cfs_waitq_add(&seq->lcs_waitq, &link);
                cfs_set_current_state(CFS_TASK_UNINT);
		mutex_unlock(&seq->lcs_mutex);

                cfs_waitq_wait(&link, CFS_TASK_UNINT);

		mutex_lock(&seq->lcs_mutex);
                cfs_waitq_del(&seq->lcs_waitq, &link);
                cfs_set_current_state(CFS_TASK_RUNNING);
        }

        fid_zero(&seq->lcs_fid);
        /**
         * this id shld not be used for seq range allocation.
         * set to -1 for dgb check.
         */

        seq->lcs_space.lsr_index = -1;

        range_init(&seq->lcs_space);
	mutex_unlock(&seq->lcs_mutex);
}
Пример #8
0
int osp_init_pre_fid(struct osp_device *osp)
{
	struct lu_env		env;
	struct osp_thread_info	*osi;
	struct lu_client_seq	*cli_seq;
	struct lu_fid		*last_fid;
	int			rc;
	ENTRY;

	LASSERT(osp->opd_pre != NULL);

	/* Return if last_used fid has been initialized */
	if (!fid_is_zero(&osp->opd_last_used_fid))
		RETURN(0);

	rc = lu_env_init(&env, osp->opd_dt_dev.dd_lu_dev.ld_type->ldt_ctx_tags);
	if (rc) {
		CERROR("%s: init env error: rc = %d\n",
		       osp->opd_obd->obd_name, rc);
		RETURN(rc);
	}

	osi = osp_env_info(&env);
	last_fid = &osi->osi_fid;
	fid_zero(last_fid);
	/* For a freshed fs, it will allocate a new sequence first */
	if (osp_is_fid_client(osp) && osp->opd_group != 0) {
		cli_seq = osp->opd_obd->u.cli.cl_seq;
		rc = seq_client_get_seq(&env, cli_seq, &last_fid->f_seq);
		if (rc != 0) {
			CERROR("%s: alloc fid error: rc = %d\n",
			       osp->opd_obd->obd_name, rc);
			GOTO(out, rc);
		}
	} else {
		last_fid->f_seq = fid_idif_seq(0, osp->opd_index);
	}
	last_fid->f_oid = 1;
	last_fid->f_ver = 0;

	spin_lock(&osp->opd_pre_lock);
	osp->opd_last_used_fid = *last_fid;
	osp->opd_pre_used_fid = *last_fid;
	osp->opd_pre_last_created_fid = *last_fid;
	spin_unlock(&osp->opd_pre_lock);
	rc = osp_write_last_oid_seq_files(&env, osp, last_fid, 1);
	if (rc != 0) {
		CERROR("%s: write fid error: rc = %d\n",
		       osp->opd_obd->obd_name, rc);
		GOTO(out, rc);
	}
out:
	lu_env_fini(&env);
	RETURN(rc);
}
Пример #9
0
/*
 * Handler for: getattr, lookup and revalidate cases.
 */
int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
		      void *lmm, int lmmsize, struct lookup_intent *it,
		      int flags, struct ptlrpc_request **reqp,
		      ldlm_blocking_callback cb_blocking,
		      __u64 extra_lock_flags)
{
	struct obd_device      *obd = exp->exp_obd;
	struct lmv_obd	 *lmv = &obd->u.lmv;
	struct lmv_tgt_desc    *tgt = NULL;
	struct mdt_body	*body;
	int		     rc = 0;

	tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
	if (IS_ERR(tgt))
		return PTR_ERR(tgt);

	if (!fid_is_sane(&op_data->op_fid2))
		fid_zero(&op_data->op_fid2);

	CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID
	       ", name='%s' -> mds #%d\n", PFID(&op_data->op_fid1),
	       PFID(&op_data->op_fid2),
	       op_data->op_name ? op_data->op_name : "<NULL>",
	       tgt->ltd_idx);

	op_data->op_bias &= ~MDS_CROSS_REF;

	rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
			     flags, reqp, cb_blocking, extra_lock_flags);

	if (rc < 0 || *reqp == NULL)
		return rc;

	/*
	 * MDS has returned success. Probably name has been resolved in
	 * remote inode. Let's check this.
	 */
	body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
	if (body == NULL)
		return -EPROTO;
	/* Not cross-ref case, just get out of here. */
	if (likely(!(body->valid & OBD_MD_MDS)))
		return 0;

	rc = lmv_intent_remote(exp, lmm, lmmsize, it, NULL, flags, reqp,
			       cb_blocking, extra_lock_flags);

	return rc;
}
Пример #10
0
int osd_oi_delete(struct osd_thread_info *info,
		  struct osd_device *osd, const struct lu_fid *fid,
		  handle_t *th, enum oi_check_flags flags)
{
	struct lu_fid *oi_fid = &info->oti_fid2;

	/* clear idmap cache */
	if (lu_fid_eq(fid, &info->oti_cache.oic_fid))
		fid_zero(&info->oti_cache.oic_fid);

	if (fid_is_last_id(fid))
		return 0;

	if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
		return osd_obj_map_delete(info, osd, fid, th);

	fid_cpu_to_be(oi_fid, fid);
	return osd_oi_iam_delete(info, osd_fid2oi(osd, fid),
				 (const struct dt_key *)oi_fid, th);
}
Пример #11
0
static int
lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
		     char *buf, int len)
{
	struct lfsck_instance	*lfsck = com->lc_lfsck;
	struct lfsck_bookmark	*bk    = &lfsck->li_bookmark_ram;
	struct lfsck_namespace	*ns    = com->lc_file_ram;
	int			 save  = len;
	int			 ret   = -ENOSPC;
	int			 rc;

	down_read(&com->lc_sem);
	rc = snprintf(buf, len,
		      "name: lfsck_namespace\n"
		      "magic: %#x\n"
		      "version: %d\n"
		      "status: %s\n",
		      ns->ln_magic,
		      bk->lb_version,
		      lfsck_status2names(ns->ln_status));
	if (rc <= 0)
		goto out;

	buf += rc;
	len -= rc;
	rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names,
			     "flags");
	if (rc < 0)
		goto out;

	rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
			     "param");
	if (rc < 0)
		goto out;

	rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete,
			     "time_since_last_completed");
	if (rc < 0)
		goto out;

	rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start,
			     "time_since_latest_start");
	if (rc < 0)
		goto out;

	rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint,
			     "time_since_last_checkpoint");
	if (rc < 0)
		goto out;

	rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start,
			    "latest_start_position");
	if (rc < 0)
		goto out;

	rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint,
			    "last_checkpoint_position");
	if (rc < 0)
		goto out;

	rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent,
			    "first_failure_position");
	if (rc < 0)
		goto out;

	if (ns->ln_status == LS_SCANNING_PHASE1) {
		struct lfsck_position pos;
		const struct dt_it_ops *iops;
		cfs_duration_t duration = cfs_time_current() -
					  lfsck->li_time_last_checkpoint;
		__u64 checked = ns->ln_items_checked + com->lc_new_checked;
		__u64 speed = checked;
		__u64 new_checked = com->lc_new_checked * HZ;
		__u32 rtime = ns->ln_run_time_phase1 +
			      cfs_duration_sec(duration + HALF_SEC);

		if (duration != 0)
			do_div(new_checked, duration);
		if (rtime != 0)
			do_div(speed, rtime);
		rc = snprintf(buf, len,
			      "checked_phase1: "LPU64"\n"
			      "checked_phase2: "LPU64"\n"
			      "updated_phase1: "LPU64"\n"
			      "updated_phase2: "LPU64"\n"
			      "failed_phase1: "LPU64"\n"
			      "failed_phase2: "LPU64"\n"
			      "dirs: "LPU64"\n"
			      "M-linked: "LPU64"\n"
			      "nlinks_repaired: "LPU64"\n"
			      "lost_found: "LPU64"\n"
			      "success_count: %u\n"
			      "run_time_phase1: %u seconds\n"
			      "run_time_phase2: %u seconds\n"
			      "average_speed_phase1: "LPU64" items/sec\n"
			      "average_speed_phase2: N/A\n"
			      "real-time_speed_phase1: "LPU64" items/sec\n"
			      "real-time_speed_phase2: N/A\n",
			      checked,
			      ns->ln_objs_checked_phase2,
			      ns->ln_items_repaired,
			      ns->ln_objs_repaired_phase2,
			      ns->ln_items_failed,
			      ns->ln_objs_failed_phase2,
			      ns->ln_dirs_checked,
			      ns->ln_mlinked_checked,
			      ns->ln_objs_nlink_repaired,
			      ns->ln_objs_lost_found,
			      ns->ln_success_count,
			      rtime,
			      ns->ln_run_time_phase2,
			      speed,
			      new_checked);
		if (rc <= 0)
			goto out;

		buf += rc;
		len -= rc;

		LASSERT(lfsck->li_di_oit != NULL);

		iops = &lfsck->li_obj_oit->do_index_ops->dio_it;

		/* The low layer otable-based iteration position may NOT
		 * exactly match the namespace-based directory traversal
		 * cookie. Generally, it is not a serious issue. But the
		 * caller should NOT make assumption on that. */
		pos.lp_oit_cookie = iops->store(env, lfsck->li_di_oit);
		if (!lfsck->li_current_oit_processed)
			pos.lp_oit_cookie--;

		spin_lock(&lfsck->li_lock);
		if (lfsck->li_di_dir != NULL) {
			pos.lp_dir_cookie = lfsck->li_cookie_dir;
			if (pos.lp_dir_cookie >= MDS_DIR_END_OFF) {
				fid_zero(&pos.lp_dir_parent);
				pos.lp_dir_cookie = 0;
			} else {
				pos.lp_dir_parent =
					*lfsck_dto2fid(lfsck->li_obj_dir);
			}
		} else {
			fid_zero(&pos.lp_dir_parent);
			pos.lp_dir_cookie = 0;
		}
		spin_unlock(&lfsck->li_lock);
		rc = lfsck_pos_dump(&buf, &len, &pos, "current_position");
		if (rc <= 0)
			goto out;
	} else if (ns->ln_status == LS_SCANNING_PHASE2) {
		cfs_duration_t duration = cfs_time_current() -
					  lfsck->li_time_last_checkpoint;
		__u64 checked = ns->ln_objs_checked_phase2 +
				com->lc_new_checked;
		__u64 speed1 = ns->ln_items_checked;
		__u64 speed2 = checked;
		__u64 new_checked = com->lc_new_checked * HZ;
		__u32 rtime = ns->ln_run_time_phase2 +
			      cfs_duration_sec(duration + HALF_SEC);

		if (duration != 0)
			do_div(new_checked, duration);
		if (ns->ln_run_time_phase1 != 0)
			do_div(speed1, ns->ln_run_time_phase1);
		if (rtime != 0)
			do_div(speed2, rtime);
		rc = snprintf(buf, len,
			      "checked_phase1: "LPU64"\n"
			      "checked_phase2: "LPU64"\n"
			      "updated_phase1: "LPU64"\n"
			      "updated_phase2: "LPU64"\n"
			      "failed_phase1: "LPU64"\n"
			      "failed_phase2: "LPU64"\n"
			      "dirs: "LPU64"\n"
			      "M-linked: "LPU64"\n"
			      "nlinks_repaired: "LPU64"\n"
			      "lost_found: "LPU64"\n"
			      "success_count: %u\n"
			      "run_time_phase1: %u seconds\n"
			      "run_time_phase2: %u seconds\n"
			      "average_speed_phase1: "LPU64" items/sec\n"
			      "average_speed_phase2: "LPU64" objs/sec\n"
			      "real-time_speed_phase1: N/A\n"
			      "real-time_speed_phase2: "LPU64" objs/sec\n"
			      "current_position: "DFID"\n",
			      ns->ln_items_checked,
			      checked,
			      ns->ln_items_repaired,
			      ns->ln_objs_repaired_phase2,
			      ns->ln_items_failed,
			      ns->ln_objs_failed_phase2,
			      ns->ln_dirs_checked,
			      ns->ln_mlinked_checked,
			      ns->ln_objs_nlink_repaired,
			      ns->ln_objs_lost_found,
			      ns->ln_success_count,
			      ns->ln_run_time_phase1,
			      rtime,
			      speed1,
			      speed2,
			      new_checked,
			      PFID(&ns->ln_fid_latest_scanned_phase2));
		if (rc <= 0)
			goto out;

		buf += rc;
		len -= rc;
	} else {
		__u64 speed1 = ns->ln_items_checked;
		__u64 speed2 = ns->ln_objs_checked_phase2;

		if (ns->ln_run_time_phase1 != 0)
			do_div(speed1, ns->ln_run_time_phase1);
		if (ns->ln_run_time_phase2 != 0)
			do_div(speed2, ns->ln_run_time_phase2);
		rc = snprintf(buf, len,
			      "checked_phase1: "LPU64"\n"
			      "checked_phase2: "LPU64"\n"
			      "updated_phase1: "LPU64"\n"
			      "updated_phase2: "LPU64"\n"
			      "failed_phase1: "LPU64"\n"
			      "failed_phase2: "LPU64"\n"
			      "dirs: "LPU64"\n"
			      "M-linked: "LPU64"\n"
			      "nlinks_repaired: "LPU64"\n"
			      "lost_found: "LPU64"\n"
			      "success_count: %u\n"
			      "run_time_phase1: %u seconds\n"
			      "run_time_phase2: %u seconds\n"
			      "average_speed_phase1: "LPU64" items/sec\n"
			      "average_speed_phase2: "LPU64" objs/sec\n"
			      "real-time_speed_phase1: N/A\n"
			      "real-time_speed_phase2: N/A\n"
			      "current_position: N/A\n",
			      ns->ln_items_checked,
			      ns->ln_objs_checked_phase2,
			      ns->ln_items_repaired,
			      ns->ln_objs_repaired_phase2,
			      ns->ln_items_failed,
			      ns->ln_objs_failed_phase2,
			      ns->ln_dirs_checked,
			      ns->ln_mlinked_checked,
			      ns->ln_objs_nlink_repaired,
			      ns->ln_objs_lost_found,
			      ns->ln_success_count,
			      ns->ln_run_time_phase1,
			      ns->ln_run_time_phase2,
			      speed1,
			      speed2);
		if (rc <= 0)
			goto out;

		buf += rc;
		len -= rc;
	}
	ret = save - len;

out:
	up_read(&com->lc_sem);
	return ret;
}
Пример #12
0
/*
 * Handler for: getattr, lookup and revalidate cases.
 */
static int
lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
		  struct lookup_intent *it, struct ptlrpc_request **reqp,
		  ldlm_blocking_callback cb_blocking,
		  __u64 extra_lock_flags)
{
	struct obd_device	*obd = exp->exp_obd;
	struct lmv_obd		*lmv = &obd->u.lmv;
	struct lmv_tgt_desc	*tgt = NULL;
	struct mdt_body		*body;
	struct lmv_stripe_md	*lsm = op_data->op_mea1;
	int			rc = 0;
	ENTRY;

	/* If it returns ERR_PTR(-EBADFD) then it is an unknown hash type
	 * it will try all stripes to locate the object */
	tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
	if (IS_ERR(tgt) && (PTR_ERR(tgt) != -EBADFD))
		RETURN(PTR_ERR(tgt));

	/* Both migrating dir and unknown hash dir need to try
	 * all of sub-stripes */
	if (lsm != NULL && !lmv_is_known_hash_type(lsm->lsm_md_hash_type)) {
		struct lmv_oinfo *oinfo;

		oinfo = &lsm->lsm_md_oinfo[0];

		op_data->op_fid1 = oinfo->lmo_fid;
		op_data->op_mds = oinfo->lmo_mds;
		tgt = lmv_get_target(lmv, oinfo->lmo_mds, NULL);
		if (IS_ERR(tgt))
			RETURN(PTR_ERR(tgt));
	}

	if (!fid_is_sane(&op_data->op_fid2))
		fid_zero(&op_data->op_fid2);

	CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID
	       ", name='%s' -> mds #%u lsm=%p lsm_magic=%x\n",
	       PFID(&op_data->op_fid1), PFID(&op_data->op_fid2),
	       op_data->op_name ? op_data->op_name : "<NULL>",
	       tgt->ltd_idx, lsm, lsm == NULL ? -1 : lsm->lsm_md_magic);

	op_data->op_bias &= ~MDS_CROSS_REF;

	rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
			    extra_lock_flags);
	if (rc < 0)
		RETURN(rc);

	if (*reqp == NULL) {
		/* If RPC happens, lsm information will be revalidated
		 * during update_inode process (see ll_update_lsm_md) */
		if (op_data->op_mea2 != NULL) {
			rc = lmv_revalidate_slaves(exp, op_data->op_mea2,
						   cb_blocking,
						   extra_lock_flags);
			if (rc != 0)
				RETURN(rc);
		}
		RETURN(rc);
	} else if (it_disposition(it, DISP_LOOKUP_NEG) && lsm != NULL &&
		   lmv_need_try_all_stripes(lsm)) {
		/* For migrating and unknown hash type directory, it will
		 * try to target the entry on other stripes */
		int stripe_index;

		for (stripe_index = 1;
		     stripe_index < lsm->lsm_md_stripe_count &&
		     it_disposition(it, DISP_LOOKUP_NEG); stripe_index++) {
			struct lmv_oinfo *oinfo;

			/* release the previous request */
			ptlrpc_req_finished(*reqp);
			it->d.lustre.it_data = NULL;
			*reqp = NULL;

			oinfo = &lsm->lsm_md_oinfo[stripe_index];
			tgt = lmv_find_target(lmv, &oinfo->lmo_fid);
			if (IS_ERR(tgt))
				RETURN(PTR_ERR(tgt));

			CDEBUG(D_INODE, "Try other stripes " DFID"\n",
			       PFID(&oinfo->lmo_fid));

			op_data->op_fid1 = oinfo->lmo_fid;
			it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
			rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp,
					    cb_blocking, extra_lock_flags);
			if (rc != 0)
				RETURN(rc);
		}
	}

	/*
	 * MDS has returned success. Probably name has been resolved in
	 * remote inode. Let's check this.
	 */
	body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
	if (body == NULL)
		RETURN(-EPROTO);

	/* Not cross-ref case, just get out of here. */
	if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
		rc = lmv_intent_remote(exp, it, NULL, reqp, cb_blocking,
				       extra_lock_flags);
		if (rc != 0)
			RETURN(rc);
		body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
		if (body == NULL)
			RETURN(-EPROTO);
	}

	RETURN(rc);
}
Пример #13
0
static int lfsck_namespace_prep(const struct lu_env *env,
				struct lfsck_component *com,
				struct lfsck_start_param *lsp)
{
	struct lfsck_instance	*lfsck	= com->lc_lfsck;
	struct lfsck_namespace	*ns	= com->lc_file_ram;
	struct lfsck_position	*pos	= &com->lc_pos_start;

	if (ns->ln_status == LS_COMPLETED) {
		int rc;

		rc = lfsck_namespace_reset(env, com, false);
		if (rc != 0)
			return rc;
	}

	down_write(&com->lc_sem);

	ns->ln_time_latest_start = cfs_time_current_sec();

	spin_lock(&lfsck->li_lock);
	if (ns->ln_flags & LF_SCANNED_ONCE) {
		if (!lfsck->li_drop_dryrun ||
		    lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
			ns->ln_status = LS_SCANNING_PHASE2;
			cfs_list_del_init(&com->lc_link);
			cfs_list_add_tail(&com->lc_link,
					  &lfsck->li_list_double_scan);
			if (!cfs_list_empty(&com->lc_link_dir))
				cfs_list_del_init(&com->lc_link_dir);
			lfsck_pos_set_zero(pos);
		} else {
			ns->ln_status = LS_SCANNING_PHASE1;
			ns->ln_run_time_phase1 = 0;
			ns->ln_run_time_phase2 = 0;
			ns->ln_items_checked = 0;
			ns->ln_items_repaired = 0;
			ns->ln_items_failed = 0;
			ns->ln_dirs_checked = 0;
			ns->ln_mlinked_checked = 0;
			ns->ln_objs_checked_phase2 = 0;
			ns->ln_objs_repaired_phase2 = 0;
			ns->ln_objs_failed_phase2 = 0;
			ns->ln_objs_nlink_repaired = 0;
			ns->ln_objs_lost_found = 0;
			fid_zero(&ns->ln_fid_latest_scanned_phase2);
			if (cfs_list_empty(&com->lc_link_dir))
				cfs_list_add_tail(&com->lc_link_dir,
						  &lfsck->li_list_dir);
			*pos = ns->ln_pos_first_inconsistent;
		}
	} else {
		ns->ln_status = LS_SCANNING_PHASE1;
		if (cfs_list_empty(&com->lc_link_dir))
			cfs_list_add_tail(&com->lc_link_dir,
					  &lfsck->li_list_dir);
		if (!lfsck->li_drop_dryrun ||
		    lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
			*pos = ns->ln_pos_last_checkpoint;
			pos->lp_oit_cookie++;
		} else {
			*pos = ns->ln_pos_first_inconsistent;
		}
	}
	spin_unlock(&lfsck->li_lock);

	up_write(&com->lc_sem);
	return 0;
}
Пример #14
0
/*
 * VBR: save parent version in reply and child version getting by its name.
 * Version of child is getting and checking during its lookup. If
 */
static int mdt_reint_unlink(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_attr          *ma = &info->mti_attr;
        struct lu_fid           *child_fid = &info->mti_tmp_fid1;
        struct mdt_object       *mp;
        struct mdt_object       *mc;
        struct mdt_lock_handle  *parent_lh;
        struct mdt_lock_handle  *child_lh;
        struct lu_name          *lname;
        int                      rc;
        ENTRY;

        DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1),
                  rr->rr_name);

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                RETURN(err_serious(-ENOENT));

        /*
	 * step 1: Found the parent.
         */
	mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1);
	if (IS_ERR(mp)) {
		rc = PTR_ERR(mp);
		GOTO(out, rc);
	}

	if (mdt_object_obf(mp))
		GOTO(put_parent, rc = -EPERM);

	parent_lh = &info->mti_lh[MDT_LH_PARENT];
	lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
	if (mdt_object_remote(mp)) {
		mdt_lock_reg_init(parent_lh, LCK_EX);
		rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh,
					    parent_lh->mlh_rreg_mode,
					    MDS_INODELOCK_UPDATE);
		if (rc != ELDLM_OK)
			GOTO(put_parent, rc);

	} else {
		mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name,
				  rr->rr_namelen);
		rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE,
				     MDT_LOCAL_LOCK);
		if (rc)
			GOTO(put_parent, rc);

		rc = mdt_version_get_check_save(info, mp, 0);
		if (rc)
			GOTO(unlock_parent, rc);
	}

	/* step 2: find & lock the child */
	/* lookup child object along with version checking */
	fid_zero(child_fid);
	rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1);
	if (rc != 0)
		GOTO(unlock_parent, rc);

        mdt_reint_init_ma(info, ma);

	/* We will lock the child regardless it is local or remote. No harm. */
	mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid);
	if (IS_ERR(mc))
		GOTO(unlock_parent, rc = PTR_ERR(mc));

        child_lh = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(child_lh, LCK_EX);
	if (mdt_object_remote(mc)) {
		struct mdt_body	 *repbody;

		if (!fid_is_zero(rr->rr_fid2)) {
			CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n",
			       mdt2obd_dev(info->mti_mdt)->obd_name,
			       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));
			GOTO(unlock_parent, rc = -ENOENT);
		}
		CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n",
		       mdt2obd_dev(info->mti_mdt)->obd_name,
		       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));

		if (info->mti_spec.sp_rm_entry) {
			struct lu_ucred *uc  = mdt_ucred(info);

			if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) {
				CERROR("%s: unlink remote entry is only "
				       "permitted for administrator: rc = %d\n",
					mdt2obd_dev(info->mti_mdt)->obd_name,
					-EPERM);
				GOTO(unlock_parent, rc = -EPERM);
			}

			ma->ma_need = MA_INODE;
			ma->ma_valid = 0;
			mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
			rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
					NULL, lname, ma);
			mdt_object_put(info->mti_env, mc);
			GOTO(unlock_parent, rc);
		}
		/* Revoke the LOOKUP lock of the remote object granted by
		 * this MDT. Since the unlink will happen on another MDT,
		 * it will release the LOOKUP lock right away. Then What
		 * would happen if another client try to grab the LOOKUP
		 * lock at the same time with unlink XXX */
		mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP,
				MDT_CROSS_LOCK);
		repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
		LASSERT(repbody != NULL);
		repbody->fid1 = *mdt_object_fid(mc);
		repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS);
		mdt_object_unlock_put(info, mc, child_lh, rc);
		GOTO(unlock_parent, rc = -EREMOTE);
	} else if (info->mti_spec.sp_rm_entry) {
		CERROR("%s: lfs rmdir should not be used on local dir %s\n",
		       mdt2obd_dev(info->mti_mdt)->obd_name,
		       (char *)rr->rr_name);
		mdt_object_put(info->mti_env, mc);
		GOTO(unlock_parent, rc = -EPERM);
	}

        rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL,
                             MDT_CROSS_LOCK);
	if (rc != 0) {
		mdt_object_put(info->mti_env, mc);
		GOTO(unlock_parent, rc);
	}

        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_UNLINK_WRITE);
        /* save version when object is locked */
        mdt_version_get_save(info, mc, 1);
        /*
         * Now we can only make sure we need MA_INODE, in mdd layer, will check
         * whether need MA_LOV and MA_COOKIE.
         */
        ma->ma_need = MA_INODE;
        ma->ma_valid = 0;
        mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
        rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
                        mdt_object_child(mc), lname, ma);
	if (rc == 0 && !lu_object_is_dying(&mc->mot_header))
		rc = mdt_attr_get_complex(info, mc, ma);
        if (rc == 0)
                mdt_handle_last_unlink(info, mc, ma);

        if (ma->ma_valid & MA_INODE) {
                switch (ma->ma_attr.la_mode & S_IFMT) {
                case S_IFDIR:
			mdt_counter_incr(req, LPROC_MDT_RMDIR);
                        break;
                case S_IFREG:
                case S_IFLNK:
                case S_IFCHR:
                case S_IFBLK:
                case S_IFIFO:
                case S_IFSOCK:
			mdt_counter_incr(req, LPROC_MDT_UNLINK);
                        break;
                default:
                        LASSERTF(0, "bad file type %o unlinking\n",
                                 ma->ma_attr.la_mode);
                }
        }

        EXIT;

        mdt_object_unlock_put(info, mc, child_lh, rc);
unlock_parent:
	mdt_object_unlock(info, mp, parent_lh, rc);
put_parent:
	mdt_object_put(info->mti_env, mp);
out:
        return rc;
}
Пример #15
0
/*
 * VBR: rename versions in reply: 0 - src parent; 1 - tgt parent;
 * 2 - src child; 3 - tgt child.
 * Update on disk version of src child.
 */
static int mdt_reint_rename(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct md_attr          *ma = &info->mti_attr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct mdt_object       *msrcdir;
        struct mdt_object       *mtgtdir;
        struct mdt_object       *mold;
        struct mdt_object       *mnew = NULL;
        struct mdt_lock_handle  *lh_srcdirp;
        struct mdt_lock_handle  *lh_tgtdirp;
        struct mdt_lock_handle  *lh_oldp;
        struct mdt_lock_handle  *lh_newp;
        struct lu_fid           *old_fid = &info->mti_tmp_fid1;
        struct lu_fid           *new_fid = &info->mti_tmp_fid2;
        struct lustre_handle     rename_lh = { 0 };
        struct lu_name           slname = { 0 };
        struct lu_name          *lname;
        int                      rc;
        ENTRY;

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        DEBUG_REQ(D_INODE, req, "rename "DFID"/%s to "DFID"/%s",
                  PFID(rr->rr_fid1), rr->rr_name,
                  PFID(rr->rr_fid2), rr->rr_tgt);

	rc = mdt_rename_lock(info, &rename_lh);
	if (rc) {
		CERROR("Can't lock FS for rename, rc %d\n", rc);
		RETURN(rc);
	}

        lh_newp = &info->mti_lh[MDT_LH_NEW];

        /* step 1: lock the source dir. */
        lh_srcdirp = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lh_srcdirp, LCK_PW, rr->rr_name,
                          rr->rr_namelen);
        msrcdir = mdt_object_find_lock(info, rr->rr_fid1, lh_srcdirp,
                                       MDS_INODELOCK_UPDATE);
        if (IS_ERR(msrcdir))
                GOTO(out_rename_lock, rc = PTR_ERR(msrcdir));

        if (mdt_object_obf(msrcdir))
                GOTO(out_unlock_source, rc = -EPERM);

        rc = mdt_version_get_check_save(info, msrcdir, 0);
        if (rc)
                GOTO(out_unlock_source, rc);

        /* step 2: find & lock the target dir. */
        lh_tgtdirp = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_pdo_init(lh_tgtdirp, LCK_PW, rr->rr_tgt,
                          rr->rr_tgtlen);
        if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) {
                mdt_object_get(info->mti_env, msrcdir);
                mtgtdir = msrcdir;
                if (lh_tgtdirp->mlh_pdo_hash != lh_srcdirp->mlh_pdo_hash) {
                         rc = mdt_pdir_hash_lock(info, lh_tgtdirp, mtgtdir,
                                                 MDS_INODELOCK_UPDATE);
                         if (rc)
                                 GOTO(out_unlock_source, rc);
                         OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK2, 10);
                }
        } else {
                mtgtdir = mdt_object_find(info->mti_env, info->mti_mdt,
                                          rr->rr_fid2);
                if (IS_ERR(mtgtdir))
                        GOTO(out_unlock_source, rc = PTR_ERR(mtgtdir));

                if (mdt_object_obf(mtgtdir))
                        GOTO(out_put_target, rc = -EPERM);

                /* check early, the real version will be saved after locking */
                rc = mdt_version_get_check(info, mtgtdir, 1);
                if (rc)
                        GOTO(out_put_target, rc);

		if (unlikely(mdt_object_remote(mtgtdir))) {
			CDEBUG(D_INFO, "Source dir "DFID" target dir "DFID
			       "on different MDTs\n", PFID(rr->rr_fid1),
			       PFID(rr->rr_fid2));
			GOTO(out_put_target, rc = -EXDEV);
		} else {
			if (likely(mdt_object_exists(mtgtdir))) {
				/* we lock the target dir if it is local */
				rc = mdt_object_lock(info, mtgtdir, lh_tgtdirp,
						     MDS_INODELOCK_UPDATE,
						     MDT_LOCAL_LOCK);
				if (rc != 0)
					GOTO(out_put_target, rc);
				/* get and save correct version after locking */
				mdt_version_get_save(info, mtgtdir, 1);
			} else {
				GOTO(out_put_target, rc = -ESTALE);
			}
		}
	}

        /* step 3: find & lock the old object. */
        lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
        mdt_name_copy(&slname, lname);
        fid_zero(old_fid);
        rc = mdt_lookup_version_check(info, msrcdir, &slname, old_fid, 2);
        if (rc != 0)
                GOTO(out_unlock_target, rc);

        if (lu_fid_eq(old_fid, rr->rr_fid1) || lu_fid_eq(old_fid, rr->rr_fid2))
                GOTO(out_unlock_target, rc = -EINVAL);

	mold = mdt_object_find(info->mti_env, info->mti_mdt, old_fid);
	if (IS_ERR(mold))
		GOTO(out_unlock_target, rc = PTR_ERR(mold));
	if (mdt_object_remote(mold)) {
		mdt_object_put(info->mti_env, mold);
		CDEBUG(D_INFO, "Source child "DFID" is on another MDT\n",
		       PFID(old_fid));
		GOTO(out_unlock_target, rc = -EXDEV);
	}

	if (mdt_object_obf(mold)) {
		mdt_object_put(info->mti_env, mold);
		GOTO(out_unlock_target, rc = -EPERM);
	}

        lh_oldp = &info->mti_lh[MDT_LH_OLD];
        mdt_lock_reg_init(lh_oldp, LCK_EX);
        rc = mdt_object_lock(info, mold, lh_oldp, MDS_INODELOCK_LOOKUP,
                             MDT_CROSS_LOCK);
        if (rc != 0) {
                mdt_object_put(info->mti_env, mold);
                GOTO(out_unlock_target, rc);
        }

        info->mti_mos = mold;
        /* save version after locking */
        mdt_version_get_save(info, mold, 2);
        mdt_set_capainfo(info, 2, old_fid, BYPASS_CAPA);

        /* step 4: find & lock the new object. */
        /* new target object may not exist now */
        lname = mdt_name(info->mti_env, (char *)rr->rr_tgt, rr->rr_tgtlen);
        /* lookup with version checking */
        fid_zero(new_fid);
        rc = mdt_lookup_version_check(info, mtgtdir, lname, new_fid, 3);
        if (rc == 0) {
                /* the new_fid should have been filled at this moment */
                if (lu_fid_eq(old_fid, new_fid))
                       GOTO(out_unlock_old, rc);

                if (lu_fid_eq(new_fid, rr->rr_fid1) ||
                    lu_fid_eq(new_fid, rr->rr_fid2))
                        GOTO(out_unlock_old, rc = -EINVAL);

                mdt_lock_reg_init(lh_newp, LCK_EX);
                mnew = mdt_object_find(info->mti_env, info->mti_mdt, new_fid);
                if (IS_ERR(mnew))
                        GOTO(out_unlock_old, rc = PTR_ERR(mnew));

		if (mdt_object_obf(mnew)) {
			mdt_object_put(info->mti_env, mnew);
			GOTO(out_unlock_old, rc = -EPERM);
		}

		if (mdt_object_remote(mnew)) {
			mdt_object_put(info->mti_env, mnew);
			CDEBUG(D_INFO, "src child "DFID" is on another MDT\n",
			       PFID(new_fid));
			GOTO(out_unlock_old, rc = -EXDEV);
		}

                rc = mdt_object_lock(info, mnew, lh_newp,
                                     MDS_INODELOCK_FULL, MDT_CROSS_LOCK);
                if (rc != 0) {
                        mdt_object_put(info->mti_env, mnew);
                        GOTO(out_unlock_old, rc);
                }
                /* get and save version after locking */
                mdt_version_get_save(info, mnew, 3);
                mdt_set_capainfo(info, 3, new_fid, BYPASS_CAPA);
        } else if (rc != -EREMOTE && rc != -ENOENT) {
                GOTO(out_unlock_old, rc);
        } else {
                mdt_enoent_version_save(info, 3);
        }

        /* step 5: rename it */
        mdt_reint_init_ma(info, ma);

        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_RENAME_WRITE);


        /* Check if @dst is subdir of @src. */
        rc = mdt_rename_sanity(info, old_fid);
        if (rc)
                GOTO(out_unlock_new, rc);

        rc = mdo_rename(info->mti_env, mdt_object_child(msrcdir),
                        mdt_object_child(mtgtdir), old_fid, &slname,
                        (mnew ? mdt_object_child(mnew) : NULL),
                        lname, ma);

        /* handle last link of tgt object */
        if (rc == 0) {
		mdt_counter_incr(req, LPROC_MDT_RENAME);
                if (mnew)
                        mdt_handle_last_unlink(info, mnew, ma);

		mdt_rename_counter_tally(info, info->mti_mdt, req,
                                         msrcdir, mtgtdir);
        }

        EXIT;
out_unlock_new:
        if (mnew)
                mdt_object_unlock_put(info, mnew, lh_newp, rc);
out_unlock_old:
        mdt_object_unlock_put(info, mold, lh_oldp, rc);
out_unlock_target:
        mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc);
out_put_target:
        mdt_object_put(info->mti_env, mtgtdir);
out_unlock_source:
        mdt_object_unlock_put(info, msrcdir, lh_srcdirp, rc);
out_rename_lock:
	if (lustre_handle_is_used(&rename_lh))
		mdt_rename_unlock(&rename_lh);
	return rc;
}
Пример #16
0
/*
 * VBR: save parent version in reply and child version getting by its name.
 * Version of child is getting and checking during its lookup. If
 */
static int mdt_reint_unlink(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_attr          *ma = &info->mti_attr;
        struct lu_fid           *child_fid = &info->mti_tmp_fid1;
        struct mdt_object       *mp;
        struct mdt_object       *mc;
        struct mdt_lock_handle  *parent_lh;
        struct mdt_lock_handle  *child_lh;
        struct lu_name          *lname;
        int                      rc;
	int			 no_name = 0;
	ENTRY;

        DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1),
                  rr->rr_name);

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                RETURN(err_serious(-ENOENT));

	if (fid_is_obf(rr->rr_fid1) || fid_is_dot_lustre(rr->rr_fid1))
		RETURN(-EPERM);
        /*
	 * step 1: Found the parent.
         */
	mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1);
	if (IS_ERR(mp)) {
		rc = PTR_ERR(mp);
		GOTO(out, rc);
	}

	parent_lh = &info->mti_lh[MDT_LH_PARENT];
	lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
	if (mdt_object_remote(mp)) {
		mdt_lock_reg_init(parent_lh, LCK_EX);
		rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh,
					    parent_lh->mlh_rreg_mode,
					    MDS_INODELOCK_UPDATE);
		if (rc != ELDLM_OK)
			GOTO(put_parent, rc);

	} else {
		mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name,
				  rr->rr_namelen);
		rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE,
				     MDT_LOCAL_LOCK);
		if (rc)
			GOTO(put_parent, rc);

		rc = mdt_version_get_check_save(info, mp, 0);
		if (rc)
			GOTO(unlock_parent, rc);
	}

	/* step 2: find & lock the child */
	/* lookup child object along with version checking */
	fid_zero(child_fid);
	rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1);
	if (rc != 0) {
		/* Name might not be able to find during resend of
		 * remote unlink, considering following case.
		 * dir_A is a remote directory, the name entry of
		 * dir_A is on MDT0, the directory is on MDT1,
		 *
		 * 1. client sends unlink req to MDT1.
		 * 2. MDT1 sends name delete update to MDT0.
		 * 3. name entry is being deleted in MDT0 synchronously.
		 * 4. MDT1 is restarted.
		 * 5. client resends unlink req to MDT1. So it can not
		 *    find the name entry on MDT0 anymore.
		 * In this case, MDT1 only needs to destory the local
		 * directory.
		 * */
		if (mdt_object_remote(mp) && rc == -ENOENT &&
		    !fid_is_zero(rr->rr_fid2) &&
		    lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
			no_name = 1;
			*child_fid = *rr->rr_fid2;
		 } else {
			GOTO(unlock_parent, rc);
		 }
	}

	if (fid_is_obf(child_fid) || fid_is_dot_lustre(child_fid))
		GOTO(unlock_parent, rc = -EPERM);

        mdt_reint_init_ma(info, ma);

	/* We will lock the child regardless it is local or remote. No harm. */
	mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid);
	if (IS_ERR(mc))
		GOTO(unlock_parent, rc = PTR_ERR(mc));

        child_lh = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(child_lh, LCK_EX);
	if (mdt_object_remote(mc)) {
		struct mdt_body	 *repbody;

		if (!fid_is_zero(rr->rr_fid2)) {
			CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n",
			       mdt_obd_name(info->mti_mdt),
			       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));
			GOTO(put_child, rc = -ENOENT);
		}
		CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n",
		       mdt_obd_name(info->mti_mdt),
		       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));

		if (!mdt_is_dne_client(req->rq_export))
			/* Return -EIO for old client */
			GOTO(put_child, rc = -EIO);

		if (info->mti_spec.sp_rm_entry) {
			struct lu_ucred *uc  = mdt_ucred(info);

			if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) {
				CERROR("%s: unlink remote entry is only "
				       "permitted for administrator: rc = %d\n",
					mdt_obd_name(info->mti_mdt),
					-EPERM);
				GOTO(put_child, rc = -EPERM);
			}

			ma->ma_need = MA_INODE;
			ma->ma_valid = 0;
			mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
			rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
					NULL, lname, ma, no_name);
			GOTO(put_child, rc);
		}
		/* Revoke the LOOKUP lock of the remote object granted by
		 * this MDT. Since the unlink will happen on another MDT,
		 * it will release the LOOKUP lock right away. Then What
		 * would happen if another client try to grab the LOOKUP
		 * lock at the same time with unlink XXX */
		mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP,
				MDT_CROSS_LOCK);
		repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
		LASSERT(repbody != NULL);
		repbody->fid1 = *mdt_object_fid(mc);
		repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS);
		GOTO(unlock_child, rc = -EREMOTE);
	} else if (info->mti_spec.sp_rm_entry) {
		rc = -EPERM;
		CDEBUG(D_INFO, "%s: no rm_entry on local dir '%s': rc = %d\n",
		       mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, rc);
		GOTO(put_child, rc);
	}

	/* We used to acquire MDS_INODELOCK_FULL here but we can't do
	 * this now because a running HSM restore on the child (unlink
	 * victim) will hold the layout lock. See LU-4002. */
	rc = mdt_object_lock(info, mc, child_lh,
			     MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE,
			     MDT_CROSS_LOCK);
	if (rc != 0)
		GOTO(put_child, rc);

        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_UNLINK_WRITE);
        /* save version when object is locked */
        mdt_version_get_save(info, mc, 1);
	/*
	 * Now we can only make sure we need MA_INODE, in mdd layer, will check
	 * whether need MA_LOV and MA_COOKIE.
	 */
	ma->ma_need = MA_INODE;
	ma->ma_valid = 0;
	mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);

	mutex_lock(&mc->mot_lov_mutex);

	rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
			mdt_object_child(mc), lname, ma, no_name);

	mutex_unlock(&mc->mot_lov_mutex);

	if (rc == 0 && !lu_object_is_dying(&mc->mot_header))
		rc = mdt_attr_get_complex(info, mc, ma);
	if (rc == 0)
		mdt_handle_last_unlink(info, mc, ma);

        if (ma->ma_valid & MA_INODE) {
                switch (ma->ma_attr.la_mode & S_IFMT) {
                case S_IFDIR:
			mdt_counter_incr(req, LPROC_MDT_RMDIR);
                        break;
                case S_IFREG:
                case S_IFLNK:
                case S_IFCHR:
                case S_IFBLK:
                case S_IFIFO:
                case S_IFSOCK:
			mdt_counter_incr(req, LPROC_MDT_UNLINK);
                        break;
                default:
                        LASSERTF(0, "bad file type %o unlinking\n",
                                 ma->ma_attr.la_mode);
                }
        }

        EXIT;
unlock_child:
	mdt_object_unlock(info, mc, child_lh, rc);
put_child:
	mdt_object_put(info->mti_env, mc);
unlock_parent:
	mdt_object_unlock(info, mp, parent_lh, rc);
put_parent:
	mdt_object_put(info->mti_env, mp);
out:
        return rc;
}
Пример #17
0
/*
 * Handler for: getattr, lookup and revalidate cases.
 */
int lmv_intent_lookup(struct obd_export *exp, struct md_op_data *op_data,
                      void *lmm, int lmmsize, struct lookup_intent *it,
                      int flags, struct ptlrpc_request **reqp,
                      ldlm_blocking_callback cb_blocking,
		      __u64 extra_lock_flags)
{
	struct obd_device	*obd = exp->exp_obd;
	struct lmv_obd		*lmv = &obd->u.lmv;
	struct lmv_tgt_desc	*tgt = NULL;
	struct mdt_body		*body;
	struct lmv_stripe_md	*lsm = op_data->op_mea1;
	int			rc = 0;
	ENTRY;

	tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
	if (IS_ERR(tgt))
		RETURN(PTR_ERR(tgt));

	if (!fid_is_sane(&op_data->op_fid2))
		fid_zero(&op_data->op_fid2);

	CDEBUG(D_INODE, "LOOKUP_INTENT with fid1="DFID", fid2="DFID
	       ", name='%s' -> mds #%d lsm=%p lsm_magic=%x\n",
	       PFID(&op_data->op_fid1), PFID(&op_data->op_fid2),
	       op_data->op_name ? op_data->op_name : "<NULL>",
	       tgt->ltd_idx, lsm, lsm == NULL ? -1 : lsm->lsm_md_magic);

	op_data->op_bias &= ~MDS_CROSS_REF;

	rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
			     flags, reqp, cb_blocking, extra_lock_flags);
	if (rc < 0)
		RETURN(rc);

	if (*reqp == NULL) {
		/* If RPC happens, lsm information will be revalidated
		 * during update_inode process (see ll_update_lsm_md) */
		if (op_data->op_mea2 != NULL) {
			rc = lmv_revalidate_slaves(exp, NULL, op_data->op_mea2,
						   cb_blocking,
						   extra_lock_flags);
			if (rc != 0)
				RETURN(rc);
		}
		RETURN(rc);
	} else if (it_disposition(it, DISP_LOOKUP_NEG) &&
		   lsm != NULL && lsm->lsm_md_magic == LMV_MAGIC_MIGRATE) {
		/* For migrating directory, if it can not find the child in
		 * the source directory(master stripe), try the targeting
		 * directory(stripe 1) */
		tgt = lmv_find_target(lmv, &lsm->lsm_md_oinfo[1].lmo_fid);
		if (IS_ERR(tgt))
			RETURN(PTR_ERR(tgt));

		ptlrpc_req_finished(*reqp);
		it->d.lustre.it_data = NULL;
		*reqp = NULL;

		CDEBUG(D_INODE, "For migrating dir, try target dir "DFID"\n",
		       PFID(&lsm->lsm_md_oinfo[1].lmo_fid));

		op_data->op_fid1 = lsm->lsm_md_oinfo[1].lmo_fid;
		it->d.lustre.it_disposition &= ~DISP_ENQ_COMPLETE;
		rc = md_intent_lock(tgt->ltd_exp, op_data, lmm, lmmsize, it,
				    flags, reqp, cb_blocking, extra_lock_flags);
	}
	/*
	 * MDS has returned success. Probably name has been resolved in
	 * remote inode. Let's check this.
	 */
	body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
	if (body == NULL)
		RETURN(-EPROTO);

	/* Not cross-ref case, just get out of here. */
	if (unlikely((body->valid & OBD_MD_MDS))) {
		rc = lmv_intent_remote(exp, lmm, lmmsize, it, NULL, flags,
				       reqp, cb_blocking, extra_lock_flags);
		if (rc != 0)
			RETURN(rc);
		body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
		if (body == NULL)
			RETURN(-EPROTO);
	}

	RETURN(rc);
}