Beispiel #1
0
int mdd_local_file_create(const struct lu_env *env, struct mdd_device *mdd,
			  const struct lu_fid *pfid, const char *name, __u32 mode,
			  struct lu_fid *fid)
{
	struct dt_object	*parent, *dto;
	int			 rc;

	ENTRY;

	LASSERT(!fid_is_zero(pfid));
	parent = dt_locate(env, mdd->mdd_bottom, pfid);
	if (unlikely(IS_ERR(parent)))
		RETURN(PTR_ERR(parent));

	/* create local file/dir, if @fid is passed then try to use it */
	if (fid_is_zero(fid))
		dto = local_file_find_or_create(env, mdd->mdd_los, parent,
						name, mode);
	else
		dto = local_file_find_or_create_with_fid(env, mdd->mdd_bottom,
							 fid, parent, name,
							 mode);
	if (IS_ERR(dto))
		GOTO(out_put, rc = PTR_ERR(dto));
	*fid = *lu_object_fid(&dto->do_lu);
	/* since stack is not fully set up the local_storage uses own stack
	 * and we should drop its object from cache */
	lu_object_put_nocache(env, &dto->do_lu);
	EXIT;
out_put:
	lu_object_put(env, &parent->do_lu);
	return 0;
}
Beispiel #2
0
/* Allocate new fid on passed client @seq and save it to @fid. */
int seq_client_alloc_fid(const struct lu_env *env,
                         struct lu_client_seq *seq, struct lu_fid *fid)
{
        cfs_waitlink_t link;
        int rc;
        ENTRY;

        LASSERT(seq != NULL);
        LASSERT(fid != NULL);

        cfs_waitlink_init(&link);
        cfs_mutex_lock(&seq->lcs_mutex);

        while (1) {
                seqno_t seqnr;

                if (!fid_is_zero(&seq->lcs_fid) &&
                    fid_oid(&seq->lcs_fid) < seq->lcs_width) {
                        /* Just bump last allocated fid and return to caller. */
                        seq->lcs_fid.f_oid += 1;
                        rc = 0;
                        break;
                }

                rc = seq_fid_alloc_prep(seq, &link);
                if (rc)
                        continue;

                rc = seq_client_alloc_seq(env, seq, &seqnr);
                if (rc) {
                        CERROR("%s: Can't allocate new sequence, "
                               "rc %d\n", seq->lcs_name, rc);
                        seq_fid_alloc_fini(seq);
                        cfs_mutex_unlock(&seq->lcs_mutex);
                        RETURN(rc);
                }

                CDEBUG(D_INFO, "%s: Switch to sequence "
                       "[0x%16.16"LPF64"x]\n", seq->lcs_name, seqnr);

                seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID;
                seq->lcs_fid.f_seq = seqnr;
                seq->lcs_fid.f_ver = 0;

                /*
                 * Inform caller that sequence switch is performed to allow it
                 * to setup FLD for it.
                 */
                rc = 1;

                seq_fid_alloc_fini(seq);
                break;
        }

        *fid = seq->lcs_fid;
        cfs_mutex_unlock(&seq->lcs_mutex);

        CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name,  PFID(fid));
        RETURN(rc);
}
Beispiel #3
0
static int osp_declare_attr_set(const struct lu_env *env, struct dt_object *dt,
				const struct lu_attr *attr, struct thandle *th)
{
	struct osp_device	*d = lu2osp_dev(dt->do_lu.lo_dev);
	struct osp_object	*o = dt2osp_obj(dt);
	int			 rc = 0;

	ENTRY;

	/*
	 * Usually we don't allow server stack to manipulate size
	 * but there is a special case when striping is created
	 * late, after stripless file got truncated to non-zero.
	 *
	 * In this case we do the following:
	 *
	 * 1) grab id in declare - this can lead to leaked OST objects
	 *    but we don't currently have proper mechanism and the only
	 *    options we have are to do truncate RPC holding transaction
	 *    open (very bad) or to grab id in declare at cost of leaked
	 *    OST object in same very rare unfortunate case (just bad)
	 *    notice 1.6-2.0 do assignment outside of running transaction
	 *    all the time, meaning many more chances for leaked objects.
	 *
	 * 2) send synchronous truncate RPC with just assigned id
	 */

	/* there are few places in MDD code still passing NULL
	 * XXX: to be fixed soon */
	if (attr == NULL)
		RETURN(0);

	if (attr->la_valid & LA_SIZE && attr->la_size > 0 &&
	    fid_is_zero(lu_object_fid(&o->opo_obj.do_lu))) {
		LASSERT(!dt_object_exists(dt));
		osp_object_assign_fid(env, d, o);
		rc = osp_object_truncate(env, dt, attr->la_size);
		if (rc)
			RETURN(rc);
	}

	if (o->opo_new) {
		/* no need in logging for new objects being created */
		RETURN(0);
	}

	if (!(attr->la_valid & (LA_UID | LA_GID)))
		RETURN(0);

	/*
	 * track all UID/GID changes via llog
	 */
	rc = osp_sync_declare_add(env, o, MDS_SETATTR64_REC, th);

	RETURN(rc);
}
Beispiel #4
0
/**
 * Allocate new fid on passed client @seq and save it to @fid.
 *
 * \param[in] env	pointer to the thread context
 * \param[in,out] seq	pointer to the client sequence manager
 * \param[out] fid	to hold the new allocated fid
 *
 * \retval		1 for notify the caller that sequence switch
 *			is performed to allow it to setup FLD for it.
 * \retval		0 for new FID allocated in current sequence.
 * \retval		Negative error number on failure.
 */
int seq_client_alloc_fid(const struct lu_env *env,
			 struct lu_client_seq *seq, struct lu_fid *fid)
{
	wait_queue_t link;
	int rc;
	ENTRY;

	LASSERT(seq != NULL);
	LASSERT(fid != NULL);

	init_waitqueue_entry(&link, current);
	mutex_lock(&seq->lcs_mutex);

	if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST))
		seq->lcs_fid.f_oid = seq->lcs_width;

	while (1) {
		u64 seqnr;

		if (unlikely(!fid_is_zero(&seq->lcs_fid) &&
			     fid_oid(&seq->lcs_fid) < seq->lcs_width)) {
			/* Just bump last allocated fid and return to caller. */
			seq->lcs_fid.f_oid++;
			rc = 0;
			break;
		}

		/* Release seq::lcs_mutex via seq_fid_alloc_prep() to avoid
		 * deadlock during seq_client_alloc_seq(). */
		rc = seq_fid_alloc_prep(seq, &link);
		if (rc)
			continue;

		rc = seq_client_alloc_seq(env, seq, &seqnr);
		/* Re-take seq::lcs_mutex via seq_fid_alloc_fini(). */
		seq_fid_alloc_fini(seq, rc ? 0 : seqnr, false);
		if (rc) {
			CERROR("%s: Can't allocate new sequence: rc = %d\n",
			       seq->lcs_name, rc);
			mutex_unlock(&seq->lcs_mutex);

			RETURN(rc);
		}

		rc = 1;
		break;
	}

	*fid = seq->lcs_fid;
	mutex_unlock(&seq->lcs_mutex);

	CDEBUG(D_INFO, "%s: Allocated FID "DFID"\n", seq->lcs_name,  PFID(fid));

	RETURN(rc);
}
Beispiel #5
0
int osp_init_pre_fid(struct osp_device *osp)
{
	struct lu_env		env;
	struct osp_thread_info	*osi;
	struct lu_client_seq	*cli_seq;
	struct lu_fid		*last_fid;
	int			rc;
	ENTRY;

	LASSERT(osp->opd_pre != NULL);

	/* Return if last_used fid has been initialized */
	if (!fid_is_zero(&osp->opd_last_used_fid))
		RETURN(0);

	rc = lu_env_init(&env, osp->opd_dt_dev.dd_lu_dev.ld_type->ldt_ctx_tags);
	if (rc) {
		CERROR("%s: init env error: rc = %d\n",
		       osp->opd_obd->obd_name, rc);
		RETURN(rc);
	}

	osi = osp_env_info(&env);
	last_fid = &osi->osi_fid;
	fid_zero(last_fid);
	/* For a freshed fs, it will allocate a new sequence first */
	if (osp_is_fid_client(osp) && osp->opd_group != 0) {
		cli_seq = osp->opd_obd->u.cli.cl_seq;
		rc = seq_client_get_seq(&env, cli_seq, &last_fid->f_seq);
		if (rc != 0) {
			CERROR("%s: alloc fid error: rc = %d\n",
			       osp->opd_obd->obd_name, rc);
			GOTO(out, rc);
		}
	} else {
		last_fid->f_seq = fid_idif_seq(0, osp->opd_index);
	}
	last_fid->f_oid = 1;
	last_fid->f_ver = 0;

	spin_lock(&osp->opd_pre_lock);
	osp->opd_last_used_fid = *last_fid;
	osp->opd_pre_used_fid = *last_fid;
	osp->opd_pre_last_created_fid = *last_fid;
	spin_unlock(&osp->opd_pre_lock);
	rc = osp_write_last_oid_seq_files(&env, osp, last_fid, 1);
	if (rc != 0) {
		CERROR("%s: write fid error: rc = %d\n",
		       osp->opd_obd->obd_name, rc);
		GOTO(out, rc);
	}
out:
	lu_env_fini(&env);
	RETURN(rc);
}
Beispiel #6
0
static void osp_object_assign_fid(const struct lu_env *env,
				 struct osp_device *d, struct osp_object *o)
{
	struct osp_thread_info *osi = osp_env_info(env);

	LASSERT(fid_is_zero(lu_object_fid(&o->opo_obj.do_lu)));
	LASSERT(o->opo_reserved);
	o->opo_reserved = 0;

	osp_precreate_get_fid(env, d, &osi->osi_fid);

	lu_object_assign_fid(env, &o->opo_obj.do_lu, &osi->osi_fid);
}
Beispiel #7
0
/**
 * asks OST to clean precreate orphans
 * and gets next id for new objects
 */
static int osp_precreate_cleanup_orphans(struct lu_env *env,
					 struct osp_device *d)
{
	struct osp_thread_info	*osi = osp_env_info(env);
	struct lu_fid		*last_fid = &osi->osi_fid;
	struct ptlrpc_request	*req = NULL;
	struct obd_import	*imp;
	struct ost_body		*body;
	struct l_wait_info	 lwi = { 0 };
	int			 update_status = 0;
	int			 rc;
	int			 diff;

	ENTRY;

	/*
	 * wait for local recovery to finish, so we can cleanup orphans
	 * orphans are all objects since "last used" (assigned), but
	 * there might be objects reserved and in some cases they won't
	 * be used. we can't cleanup them till we're sure they won't be
	 * used. also can't we allow new reservations because they may
	 * end up getting orphans being cleaned up below. so we block
	 * new reservations and wait till all reserved objects either
	 * user or released.
	 */
	spin_lock(&d->opd_pre_lock);
	d->opd_pre_recovering = 1;
	spin_unlock(&d->opd_pre_lock);
	/*
	 * The locking above makes sure the opd_pre_reserved check below will
	 * catch all osp_precreate_reserve() calls who find
	 * "!opd_pre_recovering".
	 */
	l_wait_event(d->opd_pre_waitq,
		     (!d->opd_pre_reserved && d->opd_recovery_completed) ||
		     !osp_precreate_running(d) || d->opd_got_disconnected,
		     &lwi);
	if (!osp_precreate_running(d) || d->opd_got_disconnected)
		GOTO(out, rc = -EAGAIN);

	CDEBUG(D_HA, "%s: going to cleanup orphans since "DFID"\n",
	       d->opd_obd->obd_name, PFID(&d->opd_last_used_fid));

	*last_fid = d->opd_last_used_fid;
	/* The OSP should already get the valid seq now */
	LASSERT(!fid_is_zero(last_fid));
	if (fid_oid(&d->opd_last_used_fid) < 2) {
		/* lastfid looks strange... ask OST */
		rc = osp_get_lastfid_from_ost(env, d);
		if (rc)
			GOTO(out, rc);
	}

	imp = d->opd_obd->u.cli.cl_import;
	LASSERT(imp);

	req = ptlrpc_request_alloc(imp, &RQF_OST_CREATE);
	if (req == NULL)
		GOTO(out, rc = -ENOMEM);

	rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_CREATE);
	if (rc) {
		ptlrpc_request_free(req);
		req = NULL;
		GOTO(out, rc);
	}

	body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY);
	if (body == NULL)
		GOTO(out, rc = -EPROTO);

	body->oa.o_flags = OBD_FL_DELORPHAN;
	body->oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP;

	fid_to_ostid(&d->opd_last_used_fid, &body->oa.o_oi);

	ptlrpc_request_set_replen(req);

	/* Don't resend the delorphan req */
	req->rq_no_resend = req->rq_no_delay = 1;

	rc = ptlrpc_queue_wait(req);
	if (rc) {
		update_status = 1;
		GOTO(out, rc);
	}

	body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY);
	if (body == NULL)
		GOTO(out, rc = -EPROTO);

	/*
	 * OST provides us with id new pool starts from in body->oa.o_id
	 */
	ostid_to_fid(last_fid, &body->oa.o_oi, d->opd_index);

	spin_lock(&d->opd_pre_lock);
	diff = lu_fid_diff(&d->opd_last_used_fid, last_fid);
	if (diff > 0) {
		d->opd_pre_grow_count = OST_MIN_PRECREATE + diff;
		d->opd_pre_last_created_fid = d->opd_last_used_fid;
	} else {
		d->opd_pre_grow_count = OST_MIN_PRECREATE;
		d->opd_pre_last_created_fid = *last_fid;
	}
	/*
	 * This empties the pre-creation pool and effectively blocks any new
	 * reservations.
	 */
	LASSERT(fid_oid(&d->opd_pre_last_created_fid) <=
		LUSTRE_DATA_SEQ_MAX_WIDTH);
	d->opd_pre_used_fid = d->opd_pre_last_created_fid;
	d->opd_pre_grow_slow = 0;
	spin_unlock(&d->opd_pre_lock);

	CDEBUG(D_HA, "%s: Got last_id "DFID" from OST, last_created "DFID
	       "last_used is "DFID"\n", d->opd_obd->obd_name, PFID(last_fid),
	       PFID(&d->opd_pre_last_created_fid), PFID(&d->opd_last_used_fid));
out:
	if (req)
		ptlrpc_req_finished(req);

	spin_lock(&d->opd_pre_lock);
	d->opd_pre_recovering = 0;
	spin_unlock(&d->opd_pre_lock);

	/*
	 * If rc is zero, the pre-creation window should have been emptied.
	 * Since waking up the herd would be useless without pre-created
	 * objects, we defer the signal to osp_precreate_send() in that case.
	 */
	if (rc != 0) {
		if (update_status) {
			CERROR("%s: cannot cleanup orphans: rc = %d\n",
			       d->opd_obd->obd_name, rc);
			/* we can't proceed from here, OST seem to
			 * be in a bad shape, better to wait for
			 * a new instance of the server and repeat
			 * from the beginning. notify possible waiters
			 * this OSP isn't quite functional yet */
			osp_pre_update_status(d, rc);
		} else {
			wake_up(&d->opd_pre_user_waitq);
		}
	}

	RETURN(rc);
}
Beispiel #8
0
/*
 * IT_OPEN is intended to open (and create, possible) an object. Parent (pid)
 * may be split dir.
 */
static int lmv_intent_open(struct obd_export *exp, struct md_op_data *op_data,
			   struct lookup_intent *it,
			   struct ptlrpc_request **reqp,
			   ldlm_blocking_callback cb_blocking,
			   __u64 extra_lock_flags)
{
	struct obd_device	*obd = exp->exp_obd;
	struct lmv_obd		*lmv = &obd->u.lmv;
	struct lmv_tgt_desc	*tgt;
	struct mdt_body		*body;
	int			rc;
	ENTRY;

	if (it->it_flags & MDS_OPEN_BY_FID) {
		LASSERT(fid_is_sane(&op_data->op_fid2));

		/* for striped directory, we can't know parent stripe fid
		 * without name, but we can set it to child fid, and MDT
		 * will obtain it from linkea in open in such case. */
		if (op_data->op_mea1 != NULL)
			op_data->op_fid1 = op_data->op_fid2;

		tgt = lmv_find_target(lmv, &op_data->op_fid2);
		if (IS_ERR(tgt))
			RETURN(PTR_ERR(tgt));

		op_data->op_mds = tgt->ltd_idx;
	} else {
		LASSERT(fid_is_sane(&op_data->op_fid1));
		LASSERT(fid_is_zero(&op_data->op_fid2));
		LASSERT(op_data->op_name != NULL);

		tgt = lmv_locate_mds(lmv, op_data, &op_data->op_fid1);
		if (IS_ERR(tgt))
			RETURN(PTR_ERR(tgt));
	}

	/* If it is ready to open the file by FID, do not need
	 * allocate FID at all, otherwise it will confuse MDT */
	if ((it->it_op & IT_CREAT) && !(it->it_flags & MDS_OPEN_BY_FID)) {
		/*
		 * For lookup(IT_CREATE) cases allocate new fid and setup FLD
		 * for it.
		 */
		rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
		if (rc != 0)
			RETURN(rc);
	}

	CDEBUG(D_INODE, "OPEN_INTENT with fid1="DFID", fid2="DFID","
	       " name='%s' -> mds #%u\n", PFID(&op_data->op_fid1),
	       PFID(&op_data->op_fid2), op_data->op_name, tgt->ltd_idx);

	rc = md_intent_lock(tgt->ltd_exp, op_data, it, reqp, cb_blocking,
			    extra_lock_flags);
	if (rc != 0)
		RETURN(rc);
	/*
	 * Nothing is found, do not access body->fid1 as it is zero and thus
	 * pointless.
	 */
	if ((it->d.lustre.it_disposition & DISP_LOOKUP_NEG) &&
	    !(it->d.lustre.it_disposition & DISP_OPEN_CREATE) &&
	    !(it->d.lustre.it_disposition & DISP_OPEN_OPEN))
		RETURN(rc);

	body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
	if (body == NULL)
		RETURN(-EPROTO);

	/* Not cross-ref case, just get out of here. */
	if (unlikely((body->mbo_valid & OBD_MD_MDS))) {
		rc = lmv_intent_remote(exp, it, &op_data->op_fid1, reqp,
				       cb_blocking, extra_lock_flags);
		if (rc != 0)
			RETURN(rc);

		body = req_capsule_server_get(&(*reqp)->rq_pill, &RMF_MDT_BODY);
		if (body == NULL)
			RETURN(-EPROTO);
	}

	RETURN(rc);
}
Beispiel #9
0
/* Allocate new fid on passed client @seq and save it to @fid. */
int seq_client_alloc_fid(const struct lu_env *env,
			 struct lu_client_seq *seq, struct lu_fid *fid)
{
	wait_queue_entry_t link;
	int rc;

	LASSERT(seq);
	LASSERT(fid);

	init_waitqueue_entry(&link, current);
	mutex_lock(&seq->lcs_mutex);

	if (OBD_FAIL_CHECK(OBD_FAIL_SEQ_EXHAUST))
		seq->lcs_fid.f_oid = seq->lcs_width;

	while (1) {
		u64 seqnr;

		if (!fid_is_zero(&seq->lcs_fid) &&
		    fid_oid(&seq->lcs_fid) < seq->lcs_width) {
			/* Just bump last allocated fid and return to caller. */
			seq->lcs_fid.f_oid += 1;
			rc = 0;
			break;
		}

		rc = seq_fid_alloc_prep(seq, &link);
		if (rc)
			continue;

		rc = seq_client_alloc_seq(env, seq, &seqnr);
		if (rc) {
			CERROR("%s: Can't allocate new sequence, rc %d\n",
			       seq->lcs_name, rc);
			seq_fid_alloc_fini(seq);
			mutex_unlock(&seq->lcs_mutex);
			return rc;
		}

		CDEBUG(D_INFO, "%s: Switch to sequence [0x%16.16llx]\n",
		       seq->lcs_name, seqnr);

		seq->lcs_fid.f_oid = LUSTRE_FID_INIT_OID;
		seq->lcs_fid.f_seq = seqnr;
		seq->lcs_fid.f_ver = 0;

		/*
		 * Inform caller that sequence switch is performed to allow it
		 * to setup FLD for it.
		 */
		rc = 1;

		seq_fid_alloc_fini(seq);
		break;
	}

	*fid = seq->lcs_fid;
	mutex_unlock(&seq->lcs_mutex);

	CDEBUG(D_INFO,
	       "%s: Allocated FID " DFID "\n", seq->lcs_name,  PFID(fid));
	return rc;
}
Beispiel #10
0
/*
 * VBR: save parent version in reply and child version getting by its name.
 * Version of child is getting and checking during its lookup. If
 */
static int mdt_reint_unlink(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_attr          *ma = &info->mti_attr;
        struct lu_fid           *child_fid = &info->mti_tmp_fid1;
        struct mdt_object       *mp;
        struct mdt_object       *mc;
        struct mdt_lock_handle  *parent_lh;
        struct mdt_lock_handle  *child_lh;
        struct lu_name          *lname;
        int                      rc;
        ENTRY;

        DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1),
                  rr->rr_name);

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                RETURN(err_serious(-ENOENT));

        /*
	 * step 1: Found the parent.
         */
	mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1);
	if (IS_ERR(mp)) {
		rc = PTR_ERR(mp);
		GOTO(out, rc);
	}

	if (mdt_object_obf(mp))
		GOTO(put_parent, rc = -EPERM);

	parent_lh = &info->mti_lh[MDT_LH_PARENT];
	lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
	if (mdt_object_remote(mp)) {
		mdt_lock_reg_init(parent_lh, LCK_EX);
		rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh,
					    parent_lh->mlh_rreg_mode,
					    MDS_INODELOCK_UPDATE);
		if (rc != ELDLM_OK)
			GOTO(put_parent, rc);

	} else {
		mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name,
				  rr->rr_namelen);
		rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE,
				     MDT_LOCAL_LOCK);
		if (rc)
			GOTO(put_parent, rc);

		rc = mdt_version_get_check_save(info, mp, 0);
		if (rc)
			GOTO(unlock_parent, rc);
	}

	/* step 2: find & lock the child */
	/* lookup child object along with version checking */
	fid_zero(child_fid);
	rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1);
	if (rc != 0)
		GOTO(unlock_parent, rc);

        mdt_reint_init_ma(info, ma);

	/* We will lock the child regardless it is local or remote. No harm. */
	mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid);
	if (IS_ERR(mc))
		GOTO(unlock_parent, rc = PTR_ERR(mc));

        child_lh = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(child_lh, LCK_EX);
	if (mdt_object_remote(mc)) {
		struct mdt_body	 *repbody;

		if (!fid_is_zero(rr->rr_fid2)) {
			CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n",
			       mdt2obd_dev(info->mti_mdt)->obd_name,
			       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));
			GOTO(unlock_parent, rc = -ENOENT);
		}
		CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n",
		       mdt2obd_dev(info->mti_mdt)->obd_name,
		       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));

		if (info->mti_spec.sp_rm_entry) {
			struct lu_ucred *uc  = mdt_ucred(info);

			if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) {
				CERROR("%s: unlink remote entry is only "
				       "permitted for administrator: rc = %d\n",
					mdt2obd_dev(info->mti_mdt)->obd_name,
					-EPERM);
				GOTO(unlock_parent, rc = -EPERM);
			}

			ma->ma_need = MA_INODE;
			ma->ma_valid = 0;
			mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
			rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
					NULL, lname, ma);
			mdt_object_put(info->mti_env, mc);
			GOTO(unlock_parent, rc);
		}
		/* Revoke the LOOKUP lock of the remote object granted by
		 * this MDT. Since the unlink will happen on another MDT,
		 * it will release the LOOKUP lock right away. Then What
		 * would happen if another client try to grab the LOOKUP
		 * lock at the same time with unlink XXX */
		mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP,
				MDT_CROSS_LOCK);
		repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
		LASSERT(repbody != NULL);
		repbody->fid1 = *mdt_object_fid(mc);
		repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS);
		mdt_object_unlock_put(info, mc, child_lh, rc);
		GOTO(unlock_parent, rc = -EREMOTE);
	} else if (info->mti_spec.sp_rm_entry) {
		CERROR("%s: lfs rmdir should not be used on local dir %s\n",
		       mdt2obd_dev(info->mti_mdt)->obd_name,
		       (char *)rr->rr_name);
		mdt_object_put(info->mti_env, mc);
		GOTO(unlock_parent, rc = -EPERM);
	}

        rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL,
                             MDT_CROSS_LOCK);
	if (rc != 0) {
		mdt_object_put(info->mti_env, mc);
		GOTO(unlock_parent, rc);
	}

        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_UNLINK_WRITE);
        /* save version when object is locked */
        mdt_version_get_save(info, mc, 1);
        /*
         * Now we can only make sure we need MA_INODE, in mdd layer, will check
         * whether need MA_LOV and MA_COOKIE.
         */
        ma->ma_need = MA_INODE;
        ma->ma_valid = 0;
        mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
        rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
                        mdt_object_child(mc), lname, ma);
	if (rc == 0 && !lu_object_is_dying(&mc->mot_header))
		rc = mdt_attr_get_complex(info, mc, ma);
        if (rc == 0)
                mdt_handle_last_unlink(info, mc, ma);

        if (ma->ma_valid & MA_INODE) {
                switch (ma->ma_attr.la_mode & S_IFMT) {
                case S_IFDIR:
			mdt_counter_incr(req, LPROC_MDT_RMDIR);
                        break;
                case S_IFREG:
                case S_IFLNK:
                case S_IFCHR:
                case S_IFBLK:
                case S_IFIFO:
                case S_IFSOCK:
			mdt_counter_incr(req, LPROC_MDT_UNLINK);
                        break;
                default:
                        LASSERTF(0, "bad file type %o unlinking\n",
                                 ma->ma_attr.la_mode);
                }
        }

        EXIT;

        mdt_object_unlock_put(info, mc, child_lh, rc);
unlock_parent:
	mdt_object_unlock(info, mp, parent_lh, rc);
put_parent:
	mdt_object_put(info->mti_env, mp);
out:
        return rc;
}
Beispiel #11
0
/*
 * VBR: save parent version in reply and child version getting by its name.
 * Version of child is getting and checking during its lookup. If
 */
static int mdt_reint_unlink(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_attr          *ma = &info->mti_attr;
        struct lu_fid           *child_fid = &info->mti_tmp_fid1;
        struct mdt_object       *mp;
        struct mdt_object       *mc;
        struct mdt_lock_handle  *parent_lh;
        struct mdt_lock_handle  *child_lh;
        struct lu_name          *lname;
        int                      rc;
	int			 no_name = 0;
	ENTRY;

        DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1),
                  rr->rr_name);

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                RETURN(err_serious(-ENOENT));

	if (fid_is_obf(rr->rr_fid1) || fid_is_dot_lustre(rr->rr_fid1))
		RETURN(-EPERM);
        /*
	 * step 1: Found the parent.
         */
	mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1);
	if (IS_ERR(mp)) {
		rc = PTR_ERR(mp);
		GOTO(out, rc);
	}

	parent_lh = &info->mti_lh[MDT_LH_PARENT];
	lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
	if (mdt_object_remote(mp)) {
		mdt_lock_reg_init(parent_lh, LCK_EX);
		rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh,
					    parent_lh->mlh_rreg_mode,
					    MDS_INODELOCK_UPDATE);
		if (rc != ELDLM_OK)
			GOTO(put_parent, rc);

	} else {
		mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name,
				  rr->rr_namelen);
		rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE,
				     MDT_LOCAL_LOCK);
		if (rc)
			GOTO(put_parent, rc);

		rc = mdt_version_get_check_save(info, mp, 0);
		if (rc)
			GOTO(unlock_parent, rc);
	}

	/* step 2: find & lock the child */
	/* lookup child object along with version checking */
	fid_zero(child_fid);
	rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1);
	if (rc != 0) {
		/* Name might not be able to find during resend of
		 * remote unlink, considering following case.
		 * dir_A is a remote directory, the name entry of
		 * dir_A is on MDT0, the directory is on MDT1,
		 *
		 * 1. client sends unlink req to MDT1.
		 * 2. MDT1 sends name delete update to MDT0.
		 * 3. name entry is being deleted in MDT0 synchronously.
		 * 4. MDT1 is restarted.
		 * 5. client resends unlink req to MDT1. So it can not
		 *    find the name entry on MDT0 anymore.
		 * In this case, MDT1 only needs to destory the local
		 * directory.
		 * */
		if (mdt_object_remote(mp) && rc == -ENOENT &&
		    !fid_is_zero(rr->rr_fid2) &&
		    lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
			no_name = 1;
			*child_fid = *rr->rr_fid2;
		 } else {
			GOTO(unlock_parent, rc);
		 }
	}

	if (fid_is_obf(child_fid) || fid_is_dot_lustre(child_fid))
		GOTO(unlock_parent, rc = -EPERM);

        mdt_reint_init_ma(info, ma);

	/* We will lock the child regardless it is local or remote. No harm. */
	mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid);
	if (IS_ERR(mc))
		GOTO(unlock_parent, rc = PTR_ERR(mc));

        child_lh = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(child_lh, LCK_EX);
	if (mdt_object_remote(mc)) {
		struct mdt_body	 *repbody;

		if (!fid_is_zero(rr->rr_fid2)) {
			CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n",
			       mdt_obd_name(info->mti_mdt),
			       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));
			GOTO(put_child, rc = -ENOENT);
		}
		CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n",
		       mdt_obd_name(info->mti_mdt),
		       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));

		if (!mdt_is_dne_client(req->rq_export))
			/* Return -EIO for old client */
			GOTO(put_child, rc = -EIO);

		if (info->mti_spec.sp_rm_entry) {
			struct lu_ucred *uc  = mdt_ucred(info);

			if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) {
				CERROR("%s: unlink remote entry is only "
				       "permitted for administrator: rc = %d\n",
					mdt_obd_name(info->mti_mdt),
					-EPERM);
				GOTO(put_child, rc = -EPERM);
			}

			ma->ma_need = MA_INODE;
			ma->ma_valid = 0;
			mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
			rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
					NULL, lname, ma, no_name);
			GOTO(put_child, rc);
		}
		/* Revoke the LOOKUP lock of the remote object granted by
		 * this MDT. Since the unlink will happen on another MDT,
		 * it will release the LOOKUP lock right away. Then What
		 * would happen if another client try to grab the LOOKUP
		 * lock at the same time with unlink XXX */
		mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP,
				MDT_CROSS_LOCK);
		repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
		LASSERT(repbody != NULL);
		repbody->fid1 = *mdt_object_fid(mc);
		repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS);
		GOTO(unlock_child, rc = -EREMOTE);
	} else if (info->mti_spec.sp_rm_entry) {
		rc = -EPERM;
		CDEBUG(D_INFO, "%s: no rm_entry on local dir '%s': rc = %d\n",
		       mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, rc);
		GOTO(put_child, rc);
	}

	/* We used to acquire MDS_INODELOCK_FULL here but we can't do
	 * this now because a running HSM restore on the child (unlink
	 * victim) will hold the layout lock. See LU-4002. */
	rc = mdt_object_lock(info, mc, child_lh,
			     MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE,
			     MDT_CROSS_LOCK);
	if (rc != 0)
		GOTO(put_child, rc);

        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_UNLINK_WRITE);
        /* save version when object is locked */
        mdt_version_get_save(info, mc, 1);
	/*
	 * Now we can only make sure we need MA_INODE, in mdd layer, will check
	 * whether need MA_LOV and MA_COOKIE.
	 */
	ma->ma_need = MA_INODE;
	ma->ma_valid = 0;
	mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);

	mutex_lock(&mc->mot_lov_mutex);

	rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
			mdt_object_child(mc), lname, ma, no_name);

	mutex_unlock(&mc->mot_lov_mutex);

	if (rc == 0 && !lu_object_is_dying(&mc->mot_header))
		rc = mdt_attr_get_complex(info, mc, ma);
	if (rc == 0)
		mdt_handle_last_unlink(info, mc, ma);

        if (ma->ma_valid & MA_INODE) {
                switch (ma->ma_attr.la_mode & S_IFMT) {
                case S_IFDIR:
			mdt_counter_incr(req, LPROC_MDT_RMDIR);
                        break;
                case S_IFREG:
                case S_IFLNK:
                case S_IFCHR:
                case S_IFBLK:
                case S_IFIFO:
                case S_IFSOCK:
			mdt_counter_incr(req, LPROC_MDT_UNLINK);
                        break;
                default:
                        LASSERTF(0, "bad file type %o unlinking\n",
                                 ma->ma_attr.la_mode);
                }
        }

        EXIT;
unlock_child:
	mdt_object_unlock(info, mc, child_lh, rc);
put_child:
	mdt_object_put(info->mti_env, mc);
unlock_parent:
	mdt_object_unlock(info, mp, parent_lh, rc);
put_parent:
	mdt_object_put(info->mti_env, mp);
out:
        return rc;
}
Beispiel #12
0
static int osp_declare_object_create(const struct lu_env *env,
				     struct dt_object *dt,
				     struct lu_attr *attr,
				     struct dt_allocation_hint *hint,
				     struct dt_object_format *dof,
				     struct thandle *th)
{
	struct osp_thread_info	*osi = osp_env_info(env);
	struct osp_device	*d = lu2osp_dev(dt->do_lu.lo_dev);
	struct osp_object	*o = dt2osp_obj(dt);
	const struct lu_fid	*fid;
	int			 rc = 0;

	ENTRY;

	/* should happen to non-0 OSP only so that at least one object
	 * has been already declared in the scenario and LOD should
	 * cleanup that */
	if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_CREATE_FAIL) && d->opd_index == 1)
		RETURN(-ENOSPC);

	LASSERT(d->opd_last_used_oid_file);
	fid = lu_object_fid(&dt->do_lu);

	/*
	 * There can be gaps in precreated ids and record to unlink llog
	 * XXX: we do not handle gaps yet, implemented before solution
	 *	was found to be racy, so we disabled that. there is no
	 *	point in making useless but expensive llog declaration.
	 */
	/* rc = osp_sync_declare_add(env, o, MDS_UNLINK64_REC, th); */

	if (unlikely(!fid_is_zero(fid))) {
		/* replay case: caller knows fid */
		osi->osi_off = sizeof(osi->osi_id) * d->opd_index;
		rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
					     sizeof(osi->osi_id), osi->osi_off,
					     th);
		RETURN(rc);
	}

	/*
	 * in declaration we need to reserve object so that we don't block
	 * awaiting precreation RPC to complete
	 */
	rc = osp_precreate_reserve(env, d);
	/*
	 * we also need to declare update to local "last used id" file for
	 * recovery if object isn't used for a reason, we need to release
	 * reservation, this can be made in osd_object_release()
	 */
	if (rc == 0) {
		/* mark id is reserved: in create we don't want to talk
		 * to OST */
		LASSERT(o->opo_reserved == 0);
		o->opo_reserved = 1;

		/* common for all OSPs file hystorically */
		osi->osi_off = sizeof(osi->osi_id) * d->opd_index;
		rc = dt_declare_record_write(env, d->opd_last_used_oid_file,
					     sizeof(osi->osi_id), osi->osi_off,
					     th);
	} else {
		/* not needed in the cache anymore */
		set_bit(LU_OBJECT_HEARD_BANSHEE,
			    &dt->do_lu.lo_header->loh_flags);
	}
	RETURN(rc);
}