Exemple #1
0
/**
 * lock the part of the directory according to the hash of the name
 * (lh->mlh_pdo_hash) in parallel directory lock.
 */
static int mdt_pdir_hash_lock(struct mdt_thread_info *info,
                       struct mdt_lock_handle *lh,
                       struct mdt_object *obj, __u64 ibits)
{
        struct ldlm_res_id *res_id = &info->mti_res_id;
        struct ldlm_namespace *ns = info->mti_mdt->mdt_namespace;
        ldlm_policy_data_t *policy = &info->mti_policy;
        int rc;

        /*
         * Finish res_id initializing by name hash marking part of
         * directory which is taking modification.
         */
        LASSERT(lh->mlh_pdo_hash != 0);
        fid_build_pdo_res_name(mdt_object_fid(obj), lh->mlh_pdo_hash, res_id);
        memset(policy, 0, sizeof(*policy));
        policy->l_inodebits.bits = ibits;
        /*
         * Use LDLM_FL_LOCAL_ONLY for this lock. We do not know yet if it is
         * going to be sent to client. If it is - mdt_intent_policy() path will
         * fix it up and turn FL_LOCAL flag off.
         */
        rc = mdt_fid_lock(ns, &lh->mlh_reg_lh, lh->mlh_reg_mode, policy,
                          res_id, LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB,
                          &info->mti_exp->exp_handle.h_cookie);
        return rc;
}
Exemple #2
0
/* if object is dying, pack the lov/llog data,
 * parameter info->mti_attr should be valid at this point! */
int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
                           const struct md_attr *ma)
{
        struct mdt_body       *repbody;
        const struct lu_attr *la = &ma->ma_attr;
        int rc;
        ENTRY;

        repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
        LASSERT(repbody != NULL);

        if (ma->ma_valid & MA_INODE)
                mdt_pack_attr2body(info, repbody, la, mdt_object_fid(mo));

        if (ma->ma_valid & MA_LOV) {
                __u32 mode;

                if (mdt_object_exists(mo) < 0)
                        /* If it is a remote object, and we do not retrieve
                         * EA back unlink reg file*/
                        mode = S_IFREG;
                else
                        mode = lu_object_attr(&mo->mot_obj.mo_lu);

                LASSERT(ma->ma_lmm_size);
                mdt_dump_lmm(D_INFO, ma->ma_lmm);
                repbody->eadatasize = ma->ma_lmm_size;
                if (S_ISREG(mode))
                        repbody->valid |= OBD_MD_FLEASIZE;
                else if (S_ISDIR(mode))
                        repbody->valid |= OBD_MD_FLDIREA;
                else
                        LBUG();
        }

        if (ma->ma_cookie_size && (ma->ma_valid & MA_COOKIE)) {
                repbody->aclsize = ma->ma_cookie_size;
                repbody->valid |= OBD_MD_FLCOOKIE;
        }

        if (info->mti_mdt->mdt_opts.mo_oss_capa &&
            info->mti_exp->exp_connect_flags & OBD_CONNECT_OSS_CAPA &&
            repbody->valid & OBD_MD_FLEASIZE) {
                struct lustre_capa *capa;

                capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2);
                LASSERT(capa);
                capa->lc_opc = CAPA_OPC_OSS_DESTROY;
                rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0);
                if (rc)
                        RETURN(rc);

                repbody->valid |= OBD_MD_FLOSSCAPA;
        }

        RETURN(0);
}
Exemple #3
0
/**
 * Get version of object by fid.
 *
 * Return real version or ENOENT_VERSION if object doesn't exist
 */
static void mdt_obj_version_get(struct mdt_thread_info *info,
                                struct mdt_object *o, __u64 *version)
{
        LASSERT(o);
	if (mdt_object_exists(o) && !mdt_object_remote(o) &&
	    !mdt_object_obf(o))
                *version = dt_version_get(info->mti_env, mdt_obj2dt(o));
        else
                *version = ENOENT_VERSION;
        CDEBUG(D_INODE, "FID "DFID" version is "LPX64"\n",
               PFID(mdt_object_fid(o)), *version);
}
Exemple #4
0
/**
 * Check HSM flags and add HS_DIRTY flag if relevant.
 *
 * A file could be set dirty only if it has a copy in the backend (HS_EXISTS)
 * and is not RELEASED.
 */
int mdt_add_dirty_flag(struct mdt_thread_info *info, struct mdt_object *mo,
			struct md_attr *ma)
{
	int rc;
	ENTRY;

	/* If the file was modified, add the dirty flag */
	ma->ma_need = MA_HSM;
	rc = mdt_attr_get_complex(info, mo, ma);
	if (rc) {
		CERROR("file attribute read error for "DFID": %d.\n",
			PFID(mdt_object_fid(mo)), rc);
		RETURN(rc);
	}

	/* If an up2date copy exists in the backend, add dirty flag */
	if ((ma->ma_valid & MA_HSM) && (ma->ma_hsm.mh_flags & HS_EXISTS)
	    && !(ma->ma_hsm.mh_flags & (HS_DIRTY|HS_RELEASED))) {
		struct mdt_lock_handle  *lh = &info->mti_lh[MDT_LH_CHILD];

		ma->ma_hsm.mh_flags |= HS_DIRTY;

		mdt_lock_reg_init(lh, LCK_PW);
		rc = mdt_object_lock(info, mo, lh, MDS_INODELOCK_XATTR,
				     MDT_LOCAL_LOCK);
		if (rc != 0)
			RETURN(rc);

		rc = mdt_hsm_attr_set(info, mo, &ma->ma_hsm);
		if (rc)
			CERROR("file attribute change error for "DFID": %d\n",
				PFID(mdt_object_fid(mo)), rc);
		mdt_object_unlock(info, mo, lh, rc);
	}

	RETURN(rc);
}
Exemple #5
0
/* Partial request to create object only */
static int mdt_md_mkobj(struct mdt_thread_info *info)
{
        struct mdt_device      *mdt = info->mti_mdt;
        struct mdt_object      *o;
        struct mdt_body        *repbody;
        struct md_attr         *ma = &info->mti_attr;
        int rc;
        ENTRY;

        DEBUG_REQ(D_INODE, mdt_info_req(info), "Partial create "DFID"",
                  PFID(info->mti_rr.rr_fid2));

        repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);

        o = mdt_object_find(info->mti_env, mdt, info->mti_rr.rr_fid2,
                            MDT_OBJ_MAY_NOT_EXIST);
        if (!IS_ERR(o)) {
                struct md_object *next = mdt_object_child(o);

                ma->ma_need = MA_INODE;
                ma->ma_valid = 0;

                /*
                 * Cross-ref create can encounter already created obj in case of
                 * recovery, just get attr in that case.
                 */
                if (mdt_object_exists(o) == 1) {
                        rc = mo_attr_get(info->mti_env, next, ma);
                } else {
                        /*
                         * Here, NO permission check for object_create,
                         * such check has been done on the original MDS.
                         */
                        rc = mo_object_create(info->mti_env, next,
                                              &info->mti_spec, ma);
                }
                if (rc == 0) {
                        /* Return fid & attr to client. */
                        if (ma->ma_valid & MA_INODE)
                                mdt_pack_attr2body(info, repbody, &ma->ma_attr,
                                                   mdt_object_fid(o));
                }
                mdt_object_put(info->mti_env, o);
        } else
                rc = PTR_ERR(o);

        mdt_create_pack_capa(info, rc, o, repbody);
        RETURN(rc);
}
/* if object is dying, pack the lov/llog data,
 * parameter info->mti_attr should be valid at this point! */
int mdt_handle_last_unlink(struct mdt_thread_info *info, struct mdt_object *mo,
                           const struct md_attr *ma)
{
        struct mdt_body       *repbody;
        const struct lu_attr *la = &ma->ma_attr;
        int rc;
        ENTRY;

        repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
        LASSERT(repbody != NULL);

        if (ma->ma_valid & MA_INODE)
                mdt_pack_attr2body(info, repbody, la, mdt_object_fid(mo));

        if (ma->ma_valid & MA_LOV) {
		CERROR("No need in LOV EA upon unlink\n");
		dump_stack();
        }
	repbody->eadatasize = 0;

        if (ma->ma_cookie_size && (ma->ma_valid & MA_COOKIE)) {
                repbody->aclsize = ma->ma_cookie_size;
                repbody->valid |= OBD_MD_FLCOOKIE;
        }

	if (info->mti_mdt->mdt_opts.mo_oss_capa &&
	    exp_connect_flags(info->mti_exp) & OBD_CONNECT_OSS_CAPA &&
	    repbody->valid & OBD_MD_FLEASIZE) {
                struct lustre_capa *capa;

                capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2);
                LASSERT(capa);
                capa->lc_opc = CAPA_OPC_OSS_DESTROY;
                rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0);
                if (rc)
                        RETURN(rc);

                repbody->valid |= OBD_MD_FLOSSCAPA;
        }

        RETURN(0);
}
Exemple #7
0
/*
 * VBR: save parent version in reply and child version getting by its name.
 * Version of child is getting and checking during its lookup. If
 */
static int mdt_reint_unlink(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_attr          *ma = &info->mti_attr;
        struct lu_fid           *child_fid = &info->mti_tmp_fid1;
        struct mdt_object       *mp;
        struct mdt_object       *mc;
        struct mdt_lock_handle  *parent_lh;
        struct mdt_lock_handle  *child_lh;
        struct lu_name          *lname;
        int                      rc;
        ENTRY;

        DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1),
                  rr->rr_name);

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                RETURN(err_serious(-ENOENT));

        /*
	 * step 1: Found the parent.
         */
	mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1);
	if (IS_ERR(mp)) {
		rc = PTR_ERR(mp);
		GOTO(out, rc);
	}

	if (mdt_object_obf(mp))
		GOTO(put_parent, rc = -EPERM);

	parent_lh = &info->mti_lh[MDT_LH_PARENT];
	lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
	if (mdt_object_remote(mp)) {
		mdt_lock_reg_init(parent_lh, LCK_EX);
		rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh,
					    parent_lh->mlh_rreg_mode,
					    MDS_INODELOCK_UPDATE);
		if (rc != ELDLM_OK)
			GOTO(put_parent, rc);

	} else {
		mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name,
				  rr->rr_namelen);
		rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE,
				     MDT_LOCAL_LOCK);
		if (rc)
			GOTO(put_parent, rc);

		rc = mdt_version_get_check_save(info, mp, 0);
		if (rc)
			GOTO(unlock_parent, rc);
	}

	/* step 2: find & lock the child */
	/* lookup child object along with version checking */
	fid_zero(child_fid);
	rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1);
	if (rc != 0)
		GOTO(unlock_parent, rc);

        mdt_reint_init_ma(info, ma);

	/* We will lock the child regardless it is local or remote. No harm. */
	mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid);
	if (IS_ERR(mc))
		GOTO(unlock_parent, rc = PTR_ERR(mc));

        child_lh = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(child_lh, LCK_EX);
	if (mdt_object_remote(mc)) {
		struct mdt_body	 *repbody;

		if (!fid_is_zero(rr->rr_fid2)) {
			CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n",
			       mdt2obd_dev(info->mti_mdt)->obd_name,
			       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));
			GOTO(unlock_parent, rc = -ENOENT);
		}
		CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n",
		       mdt2obd_dev(info->mti_mdt)->obd_name,
		       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));

		if (info->mti_spec.sp_rm_entry) {
			struct lu_ucred *uc  = mdt_ucred(info);

			if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) {
				CERROR("%s: unlink remote entry is only "
				       "permitted for administrator: rc = %d\n",
					mdt2obd_dev(info->mti_mdt)->obd_name,
					-EPERM);
				GOTO(unlock_parent, rc = -EPERM);
			}

			ma->ma_need = MA_INODE;
			ma->ma_valid = 0;
			mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
			rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
					NULL, lname, ma);
			mdt_object_put(info->mti_env, mc);
			GOTO(unlock_parent, rc);
		}
		/* Revoke the LOOKUP lock of the remote object granted by
		 * this MDT. Since the unlink will happen on another MDT,
		 * it will release the LOOKUP lock right away. Then What
		 * would happen if another client try to grab the LOOKUP
		 * lock at the same time with unlink XXX */
		mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP,
				MDT_CROSS_LOCK);
		repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
		LASSERT(repbody != NULL);
		repbody->fid1 = *mdt_object_fid(mc);
		repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS);
		mdt_object_unlock_put(info, mc, child_lh, rc);
		GOTO(unlock_parent, rc = -EREMOTE);
	} else if (info->mti_spec.sp_rm_entry) {
		CERROR("%s: lfs rmdir should not be used on local dir %s\n",
		       mdt2obd_dev(info->mti_mdt)->obd_name,
		       (char *)rr->rr_name);
		mdt_object_put(info->mti_env, mc);
		GOTO(unlock_parent, rc = -EPERM);
	}

        rc = mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_FULL,
                             MDT_CROSS_LOCK);
	if (rc != 0) {
		mdt_object_put(info->mti_env, mc);
		GOTO(unlock_parent, rc);
	}

        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_UNLINK_WRITE);
        /* save version when object is locked */
        mdt_version_get_save(info, mc, 1);
        /*
         * Now we can only make sure we need MA_INODE, in mdd layer, will check
         * whether need MA_LOV and MA_COOKIE.
         */
        ma->ma_need = MA_INODE;
        ma->ma_valid = 0;
        mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
        rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
                        mdt_object_child(mc), lname, ma);
	if (rc == 0 && !lu_object_is_dying(&mc->mot_header))
		rc = mdt_attr_get_complex(info, mc, ma);
        if (rc == 0)
                mdt_handle_last_unlink(info, mc, ma);

        if (ma->ma_valid & MA_INODE) {
                switch (ma->ma_attr.la_mode & S_IFMT) {
                case S_IFDIR:
			mdt_counter_incr(req, LPROC_MDT_RMDIR);
                        break;
                case S_IFREG:
                case S_IFLNK:
                case S_IFCHR:
                case S_IFBLK:
                case S_IFIFO:
                case S_IFSOCK:
			mdt_counter_incr(req, LPROC_MDT_UNLINK);
                        break;
                default:
                        LASSERTF(0, "bad file type %o unlinking\n",
                                 ma->ma_attr.la_mode);
                }
        }

        EXIT;

        mdt_object_unlock_put(info, mc, child_lh, rc);
unlock_parent:
	mdt_object_unlock(info, mp, parent_lh, rc);
put_parent:
	mdt_object_put(info->mti_env, mp);
out:
        return rc;
}
Exemple #8
0
static int mdt_reint_setattr(struct mdt_thread_info *info,
                             struct mdt_lock_handle *lhc)
{
        struct md_attr          *ma = &info->mti_attr;
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct mdt_export_data  *med = &req->rq_export->exp_mdt_data;
        struct mdt_file_data    *mfd;
        struct mdt_object       *mo;
        struct mdt_body         *repbody;
        int                      som_au, rc, rc2;
        ENTRY;

        DEBUG_REQ(D_INODE, req, "setattr "DFID" %x", PFID(rr->rr_fid1),
                  (unsigned int)ma->ma_attr.la_valid);

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
        mo = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1);
        if (IS_ERR(mo))
                GOTO(out, rc = PTR_ERR(mo));

	if (mdt_object_obf(mo))
		GOTO(out_put, rc = -EPERM);

        /* start a log jounal handle if needed */
        if (!(mdt_conn_flags(info) & OBD_CONNECT_SOM)) {
                if ((ma->ma_attr.la_valid & LA_SIZE) ||
                    (rr->rr_flags & MRF_OPEN_TRUNC)) {
                        /* Check write access for the O_TRUNC case */
                        if (mdt_write_read(mo) < 0)
                                GOTO(out_put, rc = -ETXTBSY);
                }
        } else if (info->mti_ioepoch &&
                   (info->mti_ioepoch->flags & MF_EPOCH_OPEN)) {
                /* Truncate case. IOEpoch is opened. */
                rc = mdt_write_get(mo);
                if (rc)
                        GOTO(out_put, rc);

                mfd = mdt_mfd_new();
                if (mfd == NULL) {
                        mdt_write_put(mo);
                        GOTO(out_put, rc = -ENOMEM);
                }

                mdt_ioepoch_open(info, mo, 0);
                repbody->ioepoch = mo->mot_ioepoch;

                mdt_object_get(info->mti_env, mo);
                mdt_mfd_set_mode(mfd, MDS_FMODE_TRUNC);
                mfd->mfd_object = mo;
                mfd->mfd_xid = req->rq_xid;

		spin_lock(&med->med_open_lock);
		cfs_list_add(&mfd->mfd_list, &med->med_open_head);
		spin_unlock(&med->med_open_lock);
                repbody->handle.cookie = mfd->mfd_handle.h_cookie;
        }

        som_au = info->mti_ioepoch && info->mti_ioepoch->flags & MF_SOM_CHANGE;
        if (som_au) {
                /* SOM Attribute update case. Find the proper mfd and update
                 * SOM attributes on the proper object. */
                LASSERT(mdt_conn_flags(info) & OBD_CONNECT_SOM);
                LASSERT(info->mti_ioepoch);

		spin_lock(&med->med_open_lock);
		mfd = mdt_handle2mfd(info, &info->mti_ioepoch->handle);
		if (mfd == NULL) {
			spin_unlock(&med->med_open_lock);
                        CDEBUG(D_INODE, "no handle for file close: "
                               "fid = "DFID": cookie = "LPX64"\n",
                               PFID(info->mti_rr.rr_fid1),
                               info->mti_ioepoch->handle.cookie);
                        GOTO(out_put, rc = -ESTALE);
                }
                LASSERT(mfd->mfd_mode == MDS_FMODE_SOM);
                LASSERT(!(info->mti_ioepoch->flags & MF_EPOCH_CLOSE));

                class_handle_unhash(&mfd->mfd_handle);
                cfs_list_del_init(&mfd->mfd_list);
		spin_unlock(&med->med_open_lock);

                mdt_mfd_close(info, mfd);
	} else if ((ma->ma_valid & MA_INODE) && ma->ma_attr.la_valid) {
		LASSERT((ma->ma_valid & MA_LOV) == 0);
                rc = mdt_attr_set(info, mo, ma, rr->rr_flags);
                if (rc)
                        GOTO(out_put, rc);
	} else if ((ma->ma_valid & MA_LOV) && (ma->ma_valid & MA_INODE)) {
		struct lu_buf *buf  = &info->mti_buf;
		LASSERT(ma->ma_attr.la_valid == 0);
		buf->lb_buf = ma->ma_lmm;
		buf->lb_len = ma->ma_lmm_size;
		rc = mo_xattr_set(info->mti_env, mdt_object_child(mo),
				  buf, XATTR_NAME_LOV, 0);
		if (rc)
			GOTO(out_put, rc);
	} else
		LBUG();

	/* If file data is modified, add the dirty flag */
	if (ma->ma_attr_flags & MDS_DATA_MODIFIED)
		rc = mdt_add_dirty_flag(info, mo, ma);

        ma->ma_need = MA_INODE;
        ma->ma_valid = 0;
	rc = mdt_attr_get_complex(info, mo, ma);
        if (rc != 0)
                GOTO(out_put, rc);

        mdt_pack_attr2body(info, repbody, &ma->ma_attr, mdt_object_fid(mo));

	if (info->mti_mdt->mdt_opts.mo_oss_capa &&
	    exp_connect_flags(info->mti_exp) & OBD_CONNECT_OSS_CAPA &&
	    S_ISREG(lu_object_attr(&mo->mot_obj.mo_lu)) &&
	    (ma->ma_attr.la_valid & LA_SIZE) && !som_au) {
                struct lustre_capa *capa;

                capa = req_capsule_server_get(info->mti_pill, &RMF_CAPA2);
                LASSERT(capa);
                capa->lc_opc = CAPA_OPC_OSS_DEFAULT | CAPA_OPC_OSS_TRUNC;
                rc = mo_capa_get(info->mti_env, mdt_object_child(mo), capa, 0);
                if (rc)
                        GOTO(out_put, rc);
                repbody->valid |= OBD_MD_FLOSSCAPA;
        }

        EXIT;
out_put:
        mdt_object_put(info->mti_env, mo);
out:
        if (rc == 0)
		mdt_counter_incr(req, LPROC_MDT_SETATTR);

        mdt_client_compatibility(info);
        rc2 = mdt_fix_reply(info);
        if (rc == 0)
                rc = rc2;
        return rc;
}
Exemple #9
0
/*
 * VBR: we save three versions in reply:
 * 0 - parent. Check that parent version is the same during replay.
 * 1 - name. Version of 'name' if file exists with the same name or
 * ENOENT_VERSION, it is needed because file may appear due to missed replays.
 * 2 - child. Version of child by FID. Must be ENOENT. It is mostly sanity
 * check.
 */
static int mdt_md_create(struct mdt_thread_info *info)
{
        struct mdt_device       *mdt = info->mti_mdt;
        struct mdt_object       *parent;
        struct mdt_object       *child;
        struct mdt_lock_handle  *lh;
        struct mdt_body         *repbody;
        struct md_attr          *ma = &info->mti_attr;
        struct mdt_reint_record *rr = &info->mti_rr;
        struct lu_name          *lname;
        int rc;
        ENTRY;

        DEBUG_REQ(D_INODE, mdt_info_req(info), "Create  (%s->"DFID") in "DFID,
                  rr->rr_name, PFID(rr->rr_fid2), PFID(rr->rr_fid1));

        repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);

        lh = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lh, LCK_PW, rr->rr_name, rr->rr_namelen);

        parent = mdt_object_find_lock(info, rr->rr_fid1, lh,
                                      MDS_INODELOCK_UPDATE);
        if (IS_ERR(parent))
                RETURN(PTR_ERR(parent));

        if (mdt_object_obf(parent))
                GOTO(out_put_parent, rc = -EPERM);

        rc = mdt_version_get_check_save(info, parent, 0);
        if (rc)
                GOTO(out_put_parent, rc);

        /*
         * Check child name version during replay.
         * During create replay a file may exist with same name.
         */
        lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
        rc = mdt_lookup_version_check(info, parent, lname,
                                      &info->mti_tmp_fid1, 1);
	if (rc == 0)
		GOTO(out_put_parent, rc = -EEXIST);

	/* -ENOENT is expected here */
	if (rc != -ENOENT)
		GOTO(out_put_parent, rc);

	/* save version of file name for replay, it must be ENOENT here */
	mdt_enoent_version_save(info, 1);

	child = mdt_object_new(info->mti_env, mdt, rr->rr_fid2);
        if (likely(!IS_ERR(child))) {
                struct md_object *next = mdt_object_child(parent);

		if (mdt_object_remote(child)) {
			struct seq_server_site *ss;
			struct lu_ucred *uc  = mdt_ucred(info);

			if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) {
				CERROR("%s: Creating remote dir is only "
				       "permitted for administrator: rc = %d\n",
					mdt2obd_dev(mdt)->obd_name, -EPERM);
				GOTO(out_put_child, rc = -EPERM);
			}

			ss = mdt_seq_site(mdt);
			if (ss->ss_node_id != 0 &&
			    mdt->mdt_enable_remote_dir == 0) {
				CERROR("%s: remote dir is only permitted on"
				       " MDT0 or set_param"
				       " mdt.*.enable_remote_dir=1\n",
				       mdt2obd_dev(mdt)->obd_name);
				GOTO(out_put_child, rc = -EPERM);
			}
		}
                ma->ma_need = MA_INODE;
                ma->ma_valid = 0;
                /* capa for cross-ref will be stored here */
                ma->ma_capa = req_capsule_server_get(info->mti_pill,
                                                     &RMF_CAPA1);
                LASSERT(ma->ma_capa);

                mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                               OBD_FAIL_MDS_REINT_CREATE_WRITE);

                /* Version of child will be updated on disk. */
                info->mti_mos = child;
                rc = mdt_version_get_check_save(info, child, 2);
                if (rc)
                        GOTO(out_put_child, rc);

                /* Let lower layer know current lock mode. */
                info->mti_spec.sp_cr_mode =
                        mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode);

		/*
		 * Do not perform lookup sanity check. We know that name does
		 * not exist.
		 */
		info->mti_spec.sp_cr_lookup = 0;
                info->mti_spec.sp_feat = &dt_directory_features;

                rc = mdo_create(info->mti_env, next, lname,
                                mdt_object_child(child),
                                &info->mti_spec, ma);
		if (rc == 0)
			rc = mdt_attr_get_complex(info, child, ma);

                if (rc == 0) {
                        /* Return fid & attr to client. */
                        if (ma->ma_valid & MA_INODE)
                                mdt_pack_attr2body(info, repbody, &ma->ma_attr,
                                                   mdt_object_fid(child));
                }
out_put_child:
                mdt_object_put(info->mti_env, child);
        } else {
                rc = PTR_ERR(child);
        }
        mdt_create_pack_capa(info, rc, child, repbody);
out_put_parent:
        mdt_object_unlock_put(info, parent, lh, rc);
        RETURN(rc);
}
Exemple #10
0
/*
 * VBR: save parent version in reply and child version getting by its name.
 * Version of child is getting and checking during its lookup. If
 */
static int mdt_reint_unlink(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_attr          *ma = &info->mti_attr;
        struct lu_fid           *child_fid = &info->mti_tmp_fid1;
        struct mdt_object       *mp;
        struct mdt_object       *mc;
        struct mdt_lock_handle  *parent_lh;
        struct mdt_lock_handle  *child_lh;
        struct lu_name          *lname;
        int                      rc;
	int			 no_name = 0;
	ENTRY;

        DEBUG_REQ(D_INODE, req, "unlink "DFID"/%s", PFID(rr->rr_fid1),
                  rr->rr_name);

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK))
                RETURN(err_serious(-ENOENT));

	if (fid_is_obf(rr->rr_fid1) || fid_is_dot_lustre(rr->rr_fid1))
		RETURN(-EPERM);
        /*
	 * step 1: Found the parent.
         */
	mp = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1);
	if (IS_ERR(mp)) {
		rc = PTR_ERR(mp);
		GOTO(out, rc);
	}

	parent_lh = &info->mti_lh[MDT_LH_PARENT];
	lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
	if (mdt_object_remote(mp)) {
		mdt_lock_reg_init(parent_lh, LCK_EX);
		rc = mdt_remote_object_lock(info, mp, &parent_lh->mlh_rreg_lh,
					    parent_lh->mlh_rreg_mode,
					    MDS_INODELOCK_UPDATE);
		if (rc != ELDLM_OK)
			GOTO(put_parent, rc);

	} else {
		mdt_lock_pdo_init(parent_lh, LCK_PW, rr->rr_name,
				  rr->rr_namelen);
		rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE,
				     MDT_LOCAL_LOCK);
		if (rc)
			GOTO(put_parent, rc);

		rc = mdt_version_get_check_save(info, mp, 0);
		if (rc)
			GOTO(unlock_parent, rc);
	}

	/* step 2: find & lock the child */
	/* lookup child object along with version checking */
	fid_zero(child_fid);
	rc = mdt_lookup_version_check(info, mp, lname, child_fid, 1);
	if (rc != 0) {
		/* Name might not be able to find during resend of
		 * remote unlink, considering following case.
		 * dir_A is a remote directory, the name entry of
		 * dir_A is on MDT0, the directory is on MDT1,
		 *
		 * 1. client sends unlink req to MDT1.
		 * 2. MDT1 sends name delete update to MDT0.
		 * 3. name entry is being deleted in MDT0 synchronously.
		 * 4. MDT1 is restarted.
		 * 5. client resends unlink req to MDT1. So it can not
		 *    find the name entry on MDT0 anymore.
		 * In this case, MDT1 only needs to destory the local
		 * directory.
		 * */
		if (mdt_object_remote(mp) && rc == -ENOENT &&
		    !fid_is_zero(rr->rr_fid2) &&
		    lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) {
			no_name = 1;
			*child_fid = *rr->rr_fid2;
		 } else {
			GOTO(unlock_parent, rc);
		 }
	}

	if (fid_is_obf(child_fid) || fid_is_dot_lustre(child_fid))
		GOTO(unlock_parent, rc = -EPERM);

        mdt_reint_init_ma(info, ma);

	/* We will lock the child regardless it is local or remote. No harm. */
	mc = mdt_object_find(info->mti_env, info->mti_mdt, child_fid);
	if (IS_ERR(mc))
		GOTO(unlock_parent, rc = PTR_ERR(mc));

        child_lh = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(child_lh, LCK_EX);
	if (mdt_object_remote(mc)) {
		struct mdt_body	 *repbody;

		if (!fid_is_zero(rr->rr_fid2)) {
			CDEBUG(D_INFO, "%s: name %s can not find "DFID"\n",
			       mdt_obd_name(info->mti_mdt),
			       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));
			GOTO(put_child, rc = -ENOENT);
		}
		CDEBUG(D_INFO, "%s: name %s: "DFID" is another MDT\n",
		       mdt_obd_name(info->mti_mdt),
		       (char *)rr->rr_name, PFID(mdt_object_fid(mc)));

		if (!mdt_is_dne_client(req->rq_export))
			/* Return -EIO for old client */
			GOTO(put_child, rc = -EIO);

		if (info->mti_spec.sp_rm_entry) {
			struct lu_ucred *uc  = mdt_ucred(info);

			if (!md_capable(uc, CFS_CAP_SYS_ADMIN)) {
				CERROR("%s: unlink remote entry is only "
				       "permitted for administrator: rc = %d\n",
					mdt_obd_name(info->mti_mdt),
					-EPERM);
				GOTO(put_child, rc = -EPERM);
			}

			ma->ma_need = MA_INODE;
			ma->ma_valid = 0;
			mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);
			rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
					NULL, lname, ma, no_name);
			GOTO(put_child, rc);
		}
		/* Revoke the LOOKUP lock of the remote object granted by
		 * this MDT. Since the unlink will happen on another MDT,
		 * it will release the LOOKUP lock right away. Then What
		 * would happen if another client try to grab the LOOKUP
		 * lock at the same time with unlink XXX */
		mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP,
				MDT_CROSS_LOCK);
		repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
		LASSERT(repbody != NULL);
		repbody->fid1 = *mdt_object_fid(mc);
		repbody->valid |= (OBD_MD_FLID | OBD_MD_MDS);
		GOTO(unlock_child, rc = -EREMOTE);
	} else if (info->mti_spec.sp_rm_entry) {
		rc = -EPERM;
		CDEBUG(D_INFO, "%s: no rm_entry on local dir '%s': rc = %d\n",
		       mdt_obd_name(info->mti_mdt), (char *)rr->rr_name, rc);
		GOTO(put_child, rc);
	}

	/* We used to acquire MDS_INODELOCK_FULL here but we can't do
	 * this now because a running HSM restore on the child (unlink
	 * victim) will hold the layout lock. See LU-4002. */
	rc = mdt_object_lock(info, mc, child_lh,
			     MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE,
			     MDT_CROSS_LOCK);
	if (rc != 0)
		GOTO(put_child, rc);

        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_UNLINK_WRITE);
        /* save version when object is locked */
        mdt_version_get_save(info, mc, 1);
	/*
	 * Now we can only make sure we need MA_INODE, in mdd layer, will check
	 * whether need MA_LOV and MA_COOKIE.
	 */
	ma->ma_need = MA_INODE;
	ma->ma_valid = 0;
	mdt_set_capainfo(info, 1, child_fid, BYPASS_CAPA);

	mutex_lock(&mc->mot_lov_mutex);

	rc = mdo_unlink(info->mti_env, mdt_object_child(mp),
			mdt_object_child(mc), lname, ma, no_name);

	mutex_unlock(&mc->mot_lov_mutex);

	if (rc == 0 && !lu_object_is_dying(&mc->mot_header))
		rc = mdt_attr_get_complex(info, mc, ma);
	if (rc == 0)
		mdt_handle_last_unlink(info, mc, ma);

        if (ma->ma_valid & MA_INODE) {
                switch (ma->ma_attr.la_mode & S_IFMT) {
                case S_IFDIR:
			mdt_counter_incr(req, LPROC_MDT_RMDIR);
                        break;
                case S_IFREG:
                case S_IFLNK:
                case S_IFCHR:
                case S_IFBLK:
                case S_IFIFO:
                case S_IFSOCK:
			mdt_counter_incr(req, LPROC_MDT_UNLINK);
                        break;
                default:
                        LASSERTF(0, "bad file type %o unlinking\n",
                                 ma->ma_attr.la_mode);
                }
        }

        EXIT;
unlock_child:
	mdt_object_unlock(info, mc, child_lh, rc);
put_child:
	mdt_object_put(info->mti_env, mc);
unlock_parent:
	mdt_object_unlock(info, mp, parent_lh, rc);
put_parent:
	mdt_object_put(info->mti_env, mp);
out:
        return rc;
}
Exemple #11
0
/*
 * VBR: we save three versions in reply:
 * 0 - parent. Check that parent version is the same during replay.
 * 1 - name. Version of 'name' if file exists with the same name or
 * ENOENT_VERSION, it is needed because file may appear due to missed replays.
 * 2 - child. Version of child by FID. Must be ENOENT. It is mostly sanity
 * check.
 */
static int mdt_md_create(struct mdt_thread_info *info)
{
        struct mdt_device       *mdt = info->mti_mdt;
        struct mdt_object       *parent;
        struct mdt_object       *child;
        struct mdt_lock_handle  *lh;
        struct mdt_body         *repbody;
        struct md_attr          *ma = &info->mti_attr;
        struct mdt_reint_record *rr = &info->mti_rr;
        struct lu_name          *lname;
        int rc;
        ENTRY;

        DEBUG_REQ(D_INODE, mdt_info_req(info), "Create  (%s->"DFID") in "DFID,
                  rr->rr_name, PFID(rr->rr_fid2), PFID(rr->rr_fid1));

        repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);

        lh = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lh, LCK_PW, rr->rr_name, rr->rr_namelen);

        parent = mdt_object_find_lock(info, rr->rr_fid1, lh,
                                      MDS_INODELOCK_UPDATE, MDT_OBJ_MUST_EXIST);
        if (IS_ERR(parent))
                RETURN(PTR_ERR(parent));

        rc = mdt_version_get_check_save(info, parent, 0);
        if (rc)
                GOTO(out_put_parent, rc);

        /*
         * Check child name version during replay.
         * During create replay a file may exist with same name.
         */
        lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
        rc = mdt_lookup_version_check(info, parent, lname,
                                      &info->mti_tmp_fid1, 1);
        /* -ENOENT is expected here */
        if (rc != 0 && rc != -ENOENT)
                GOTO(out_put_parent, rc);

        /* save version of file name for replay, it must be ENOENT here */
        mdt_enoent_version_save(info, 1);

        child = mdt_object_find(info->mti_env, mdt, rr->rr_fid2,
                                MDT_OBJ_MAY_NOT_EXIST);
        if (likely(!IS_ERR(child))) {
                struct md_object *next = mdt_object_child(parent);

                ma->ma_need = MA_INODE;
                ma->ma_valid = 0;
                /* capa for cross-ref will be stored here */
                ma->ma_capa = req_capsule_server_get(info->mti_pill,
                                                     &RMF_CAPA1);
                LASSERT(ma->ma_capa);

                mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                               OBD_FAIL_MDS_REINT_CREATE_WRITE);

                /* Version of child will be updated on disk. */
                info->mti_mos = child;
                rc = mdt_version_get_check_save(info, child, 2);
                if (rc)
                        GOTO(out_put_child, rc);

                /* Let lower layer know current lock mode. */
                info->mti_spec.sp_cr_mode =
                        mdt_dlm_mode2mdl_mode(lh->mlh_pdo_mode);

                /*
                 * Do perform lookup sanity check. We do not know if name exists
                 * or not.
                 */
                info->mti_spec.sp_cr_lookup = 1;
                info->mti_spec.sp_feat = &dt_directory_features;

                rc = mdo_create(info->mti_env, next, lname,
                                mdt_object_child(child),
                                &info->mti_spec, ma);
                if (rc == 0) {
                        /* Return fid & attr to client. */
                        if (ma->ma_valid & MA_INODE)
                                mdt_pack_attr2body(info, repbody, &ma->ma_attr,
                                                   mdt_object_fid(child));
                }
out_put_child:
                mdt_object_put(info->mti_env, child);
        } else {
                rc = PTR_ERR(child);
        }
        mdt_create_pack_capa(info, rc, child, repbody);
out_put_parent:
        mdt_object_unlock_put(info, parent, lh, rc);
        RETURN(rc);
}