Example #1
0
static int lfsck_namespace_check_exist(const struct lu_env *env,
				       struct lfsck_instance *lfsck,
				       struct dt_object *obj, const char *name)
{
	struct dt_object *dir = lfsck->li_obj_dir;
	struct lu_fid	 *fid = &lfsck_env_info(env)->lti_fid;
	int		  rc;
	ENTRY;

	if (unlikely(lfsck_is_dead_obj(obj)))
		RETURN(LFSCK_NAMEENTRY_DEAD);

	rc = dt_lookup(env, dir, (struct dt_rec *)fid,
		       (const struct dt_key *)name, BYPASS_CAPA);
	if (rc == -ENOENT)
		RETURN(LFSCK_NAMEENTRY_REMOVED);

	if (rc < 0)
		RETURN(rc);

	if (!lu_fid_eq(fid, lfsck_dto2fid(obj)))
		RETURN(LFSCK_NAMEENTRY_RECREATED);

	RETURN(0);
}
Example #2
0
/*
 * This is is_subdir() variant, it is CMD if cmm forwards it to correct
 * target. Source should not be ancestor of target dir. May be other rename
 * checks can be moved here later.
 */
static int mdt_rename_sanity(struct mdt_thread_info *info, struct lu_fid *fid)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct lu_fid dst_fid = *rr->rr_fid2;
        struct mdt_object *dst;
        int rc = 0;
        ENTRY;

        do {
                LASSERT(fid_is_sane(&dst_fid));
                dst = mdt_object_find(info->mti_env, info->mti_mdt, &dst_fid);
                if (!IS_ERR(dst)) {
                        rc = mdo_is_subdir(info->mti_env,
                                           mdt_object_child(dst), fid,
                                           &dst_fid);
                        mdt_object_put(info->mti_env, dst);
                        if (rc != -EREMOTE && rc < 0) {
                                CERROR("Failed mdo_is_subdir(), rc %d\n", rc);
                        } else {
                                /* check the found fid */
                                if (lu_fid_eq(&dst_fid, fid))
                                        rc = -EINVAL;
                        }
                } else {
                        rc = PTR_ERR(dst);
                }
        } while (rc == -EREMOTE);

        RETURN(rc);
}
Example #3
0
static int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
			   struct lustre_capa *capa, __u64 opc)
{
	const struct lu_fid	*fid = lu_object_fid(&dt->do_lu);
	struct osd_device	*dev = osd_dev(dt->do_lu.lo_dev);
	int			 rc;

	if (!dev->od_fl_capa)
		return 0;

	if (capa == BYPASS_CAPA)
		return 0;

	if (!capa) {
		CERROR("no capability is provided for fid "DFID"\n", PFID(fid));
		return -EACCES;
	}

	if (!lu_fid_eq(fid, &capa->lc_fid)) {
		DEBUG_CAPA(D_ERROR, capa, "fid "DFID" mismatch with",PFID(fid));
		return -EACCES;
	}

	if (!capa_opc_supported(capa, opc)) {
		DEBUG_CAPA(D_ERROR, capa, "opc "LPX64" not supported by", opc);
		return -EACCES;
	}

	if ((rc = capa_is_sane(env, dev, capa, dev->od_capa_keys))) {
		DEBUG_CAPA(D_ERROR, capa, "insane (rc %d)", rc);
		return -EACCES;
	}

	return 0;
}
Example #4
0
/**
 * Check if such a link exists in linkEA.
 *
 * \param ldata link data the search to be done on
 * \param lname name in the parent's directory entry pointing to this object
 * \param pfid parent fid the link to be found for
 *
 * \retval   0 success
 * \retval -ENOENT link does not exist
 * \retval -ve on error
 */
int linkea_links_find(struct linkea_data *ldata, const struct lu_name *lname,
		      const struct lu_fid  *pfid)
{
	struct lu_name tmpname;
	struct lu_fid  tmpfid;
	int count;

	LASSERT(ldata->ld_leh != NULL);

	/* link #0, if leh_reccount == 0 we skip the loop and return -ENOENT */
	if (likely(ldata->ld_leh->leh_reccount > 0))
		ldata->ld_lee = (struct link_ea_entry *)(ldata->ld_leh + 1);

	for (count = 0; count < ldata->ld_leh->leh_reccount; count++) {
		linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen,
				    &tmpname, &tmpfid);
		if (tmpname.ln_namelen == lname->ln_namelen &&
		    lu_fid_eq(&tmpfid, pfid) &&
		    (strncmp(tmpname.ln_name, lname->ln_name,
			     tmpname.ln_namelen) == 0))
			break;
		ldata->ld_lee = (struct link_ea_entry *)((char *)ldata->ld_lee +
							 ldata->ld_reclen);
	}

	if (count == ldata->ld_leh->leh_reccount) {
		CDEBUG(D_INODE, "Old link_ea name '%.*s' not found\n",
		       lname->ln_namelen, lname->ln_name);
		ldata->ld_lee = NULL;
		ldata->ld_reclen = 0;
		return -ENOENT;
	}
	return 0;
}
Example #5
0
/* Return true if a FID is present in an action list. */
static bool is_fid_in_hal(struct hsm_action_list *hal, const lustre_fid *fid)
{
	struct hsm_action_item *hai;
	int i;

	for (hai = hai_first(hal), i = 0;
	     i < hal->hal_count;
	     i++, hai = hai_next(hai)) {
		if (lu_fid_eq(&hai->hai_fid, fid))
			return true;
	}

	return false;
}
Example #6
0
static int ll_nfs_get_name_filldir(void *cookie, const char *name, int namelen,
				   loff_t hash, u64 ino, unsigned type)
{
	/* It is hack to access lde_fid for comparison with lgd_fid.
	 * So the input 'name' must be part of the 'lu_dirent'. */
	struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name);
	struct ll_getname_data *lgd = cookie;
	struct lu_fid fid;

	fid_le_to_cpu(&fid, &lde->lde_fid);
	if (lu_fid_eq(&fid, &lgd->lgd_fid)) {
		memcpy(lgd->lgd_name, name, namelen);
		lgd->lgd_name[namelen] = 0;
		lgd->lgd_found = 1;
	}
	return lgd->lgd_found;
}
Example #7
0
static struct eacl_entry *__et_search_del(struct eacl_table *et, pid_t key,
					struct lu_fid *fid, int type)
{
	struct eacl_entry *ee;
	struct list_head *head = &et->et_entries[ee_hashfunc(key)];

	LASSERT(fid != NULL);
	list_for_each_entry(ee, head, ee_list)
		if (ee->ee_key == key) {
			if (lu_fid_eq(&ee->ee_fid, fid) &&
			    ee->ee_type == type) {
				list_del_init(&ee->ee_list);
				return ee;
			}
		}

	return NULL;
}
Example #8
0
int osd_oi_delete(struct osd_thread_info *info,
		  struct osd_device *osd, const struct lu_fid *fid,
		  handle_t *th, enum oi_check_flags flags)
{
	struct lu_fid *oi_fid = &info->oti_fid2;

	/* clear idmap cache */
	if (lu_fid_eq(fid, &info->oti_cache.oic_fid))
		fid_zero(&info->oti_cache.oic_fid);

	if (fid_is_last_id(fid))
		return 0;

	if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
		return osd_obj_map_delete(info, osd, fid, th);

	fid_cpu_to_be(oi_fid, fid);
	return osd_oi_iam_delete(info, osd_fid2oi(osd, fid),
				 (const struct dt_key *)oi_fid, th);
}
Example #9
0
static int osd_find_parent_fid(const struct lu_env *env, struct dt_object *o,
			       struct lu_fid *fid)
{
	struct link_ea_header  *leh;
	struct link_ea_entry   *lee;
	struct lu_buf		buf;
	int			rc;
	ENTRY;

	buf.lb_buf = osd_oti_get(env)->oti_buf;
	buf.lb_len = sizeof(osd_oti_get(env)->oti_buf);

	rc = osd_xattr_get(env, o, &buf, XATTR_NAME_LINK, BYPASS_CAPA);
	if (rc == -ERANGE) {
		rc = osd_xattr_get(env, o, &LU_BUF_NULL,
				   XATTR_NAME_LINK, BYPASS_CAPA);
		if (rc < 0)
			RETURN(rc);
		LASSERT(rc > 0);
		OBD_ALLOC(buf.lb_buf, rc);
		if (buf.lb_buf == NULL)
			RETURN(-ENOMEM);
		buf.lb_len = rc;
		rc = osd_xattr_get(env, o, &buf, XATTR_NAME_LINK, BYPASS_CAPA);
	}
	if (rc < 0)
		GOTO(out, rc);
	if (rc < sizeof(*leh) + sizeof(*lee))
		GOTO(out, rc = -EINVAL);

	leh = buf.lb_buf;
	if (leh->leh_magic == __swab32(LINK_EA_MAGIC)) {
		leh->leh_magic = LINK_EA_MAGIC;
		leh->leh_reccount = __swab32(leh->leh_reccount);
		leh->leh_len = __swab64(leh->leh_len);
	}
	if (leh->leh_magic != LINK_EA_MAGIC)
		GOTO(out, rc = -EINVAL);
	if (leh->leh_reccount == 0)
		GOTO(out, rc = -ENODATA);

	lee = (struct link_ea_entry *)(leh + 1);
	fid_be_to_cpu(fid, (const struct lu_fid *)&lee->lee_parent_fid);
	rc = 0;

out:
	if (buf.lb_buf != osd_oti_get(env)->oti_buf)
		OBD_FREE(buf.lb_buf, buf.lb_len);

#if 0
	/* this block can be enabled for additional verification
	 * it's trying to match FID from LinkEA vs. FID from LMA */
	if (rc == 0) {
		struct lu_fid fid2;
		int rc2;
		rc2 = osd_find_parent_by_dnode(env, o, &fid2);
		if (rc2 == 0)
			if (lu_fid_eq(fid, &fid2) == 0)
				CERROR("wrong parent: "DFID" != "DFID"\n",
				       PFID(fid), PFID(&fid2));
	}
#endif

	/* no LinkEA is found, let's try to find the fid in parent's LMA */
	if (unlikely(rc != 0))
		rc = osd_find_parent_by_dnode(env, o, fid);

	RETURN(rc);
}
Example #10
0
int ll_revalidate_it(struct dentry *de, int lookup_flags,
                     struct lookup_intent *it)
{
        struct md_op_data *op_data;
        struct ptlrpc_request *req = NULL;
        struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
        struct obd_export *exp;
        struct inode *parent = de->d_parent->d_inode;
        int rc, first = 0;

        ENTRY;
        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
               LL_IT2STR(it));

        if (de->d_inode == NULL) {
                /* We can only use negative dentries if this is stat or lookup,
                   for opens and stuff we do need to query server. */
                /* If there is IT_CREAT in intent op set, then we must throw
                   away this negative dentry and actually do the request to
                   kernel to create whatever needs to be created (if possible)*/
                if (it && (it->it_op & IT_CREAT))
                        RETURN(0);

                if (de->d_flags & DCACHE_LUSTRE_INVALID)
                        RETURN(0);

                rc = ll_have_md_lock(parent, MDS_INODELOCK_UPDATE, LCK_MINMODE);
                GOTO(out_sa, rc);
        }

        /* Never execute intents for mount points.
         * Attributes will be fixed up in ll_inode_revalidate_it */
        if (d_mountpoint(de))
                GOTO(out_sa, rc = 1);

        /* need to get attributes in case root got changed from other client */
        if (de == de->d_sb->s_root) {
                rc = __ll_inode_revalidate_it(de, it, MDS_INODELOCK_LOOKUP);
                if (rc == 0)
                        rc = 1;
                GOTO(out_sa, rc);
        }

        exp = ll_i2mdexp(de->d_inode);

        OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
        ll_frob_intent(&it, &lookup_it);
        LASSERT(it);

        if (it->it_op == IT_LOOKUP && !(de->d_flags & DCACHE_LUSTRE_INVALID))
                GOTO(out_sa, rc = 1);

        op_data = ll_prep_md_op_data(NULL, parent, de->d_inode,
                                     de->d_name.name, de->d_name.len,
                                     0, LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));

        if ((it->it_op == IT_OPEN) && de->d_inode) {
                struct inode *inode = de->d_inode;
                struct ll_inode_info *lli = ll_i2info(inode);
                struct obd_client_handle **och_p;
                __u64 *och_usecount;

                /*
                 * We used to check for MDS_INODELOCK_OPEN here, but in fact
                 * just having LOOKUP lock is enough to justify inode is the
                 * same. And if inode is the same and we have suitable
                 * openhandle, then there is no point in doing another OPEN RPC
                 * just to throw away newly received openhandle.  There are no
                 * security implications too, if file owner or access mode is
                 * change, LOOKUP lock is revoked.
                 */


                if (it->it_flags & FMODE_WRITE) {
                        och_p = &lli->lli_mds_write_och;
                        och_usecount = &lli->lli_open_fd_write_count;
                } else if (it->it_flags & FMODE_EXEC) {
                        och_p = &lli->lli_mds_exec_och;
                        och_usecount = &lli->lli_open_fd_exec_count;
                } else {
                        och_p = &lli->lli_mds_read_och;
                        och_usecount = &lli->lli_open_fd_read_count;
                }
                /* Check for the proper lock. */
                if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP, LCK_MINMODE))
                        goto do_lock;
                cfs_down(&lli->lli_och_sem);
                if (*och_p) { /* Everything is open already, do nothing */
                        /*(*och_usecount)++;  Do not let them steal our open
                          handle from under us */
                        /* XXX The code above was my original idea, but in case
                           we have the handle, but we cannot use it due to later
                           checks (e.g. O_CREAT|O_EXCL flags set), nobody
                           would decrement counter increased here. So we just
                           hope the lock won't be invalidated in between. But
                           if it would be, we'll reopen the open request to
                           MDS later during file open path */
                        cfs_up(&lli->lli_och_sem);
                        ll_finish_md_op_data(op_data);
                        RETURN(1);
                } else {
                        cfs_up(&lli->lli_och_sem);
                }
        }

        if (it->it_op == IT_GETATTR) {
                first = ll_statahead_enter(parent, &de, 0);
                if (first == 1) {
                        ll_statahead_exit(parent, de, 1);
                        ll_finish_md_op_data(op_data);
                        GOTO(out, rc = 1);
                }
        }

do_lock:
        it->it_create_mode &= ~current->fs->umask;
        it->it_create_mode |= M_CHECK_STALE;
        rc = md_intent_lock(exp, op_data, NULL, 0, it,
                            lookup_flags,
                            &req, ll_md_blocking_ast, 0);
        it->it_create_mode &= ~M_CHECK_STALE;
        ll_finish_md_op_data(op_data);
        if (it->it_op == IT_GETATTR && !first)
                /* If there are too many locks on client-side, then some
                 * locks taken by statahead maybe dropped automatically
                 * before the real "revalidate" using them. */
                ll_statahead_exit(parent, de, req == NULL ? rc : 0);
        else if (first == -EEXIST)
                ll_statahead_mark(parent, de);

        /* If req is NULL, then md_intent_lock only tried to do a lock match;
         * if all was well, it will return 1 if it found locks, 0 otherwise. */
        if (req == NULL && rc >= 0) {
                if (!rc)
                        goto do_lookup;
                GOTO(out, rc);
        }

        if (rc < 0) {
                if (rc != -ESTALE) {
                        CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
                               "%d\n", rc, it->d.lustre.it_status);
                }
                GOTO(out, rc = 0);
        }

revalidate_finish:
        rc = ll_revalidate_it_finish(req, it, de);
        if (rc != 0) {
                if (rc != -ESTALE && rc != -ENOENT)
                        ll_intent_release(it);
                GOTO(out, rc = 0);
        }

        if ((it->it_op & IT_OPEN) && de->d_inode &&
            !S_ISREG(de->d_inode->i_mode) &&
            !S_ISDIR(de->d_inode->i_mode)) {
                ll_release_openhandle(de, it);
        }
        rc = 1;

        /* unfortunately ll_intent_lock may cause a callback and revoke our
         * dentry */
        cfs_spin_lock(&ll_lookup_lock);
        spin_lock(&dcache_lock);
        lock_dentry(de);
        __d_drop(de);
        unlock_dentry(de);
        d_rehash_cond(de, 0);
        spin_unlock(&dcache_lock);
        cfs_spin_unlock(&ll_lookup_lock);

out:
        /* We do not free request as it may be reused during following lookup
         * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
         * be freed in ll_lookup_it or in ll_intent_release. But if
         * request was not completed, we need to free it. (bug 5154, 9903) */
        if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
                ptlrpc_req_finished(req);
        if (rc == 0) {
                ll_unhash_aliases(de->d_inode);
                /* done in ll_unhash_aliases()
                   dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
        } else {
                CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
                       "inode %p refc %d\n", de->d_name.len,
                       de->d_name.name, de, de->d_parent, de->d_inode,
                       atomic_read(&de->d_count));
                if (first != 1) {
                        if (de->d_flags & DCACHE_LUSTRE_INVALID) {
                                lock_dentry(de);
                                de->d_flags &= ~DCACHE_LUSTRE_INVALID;
                                unlock_dentry(de);
                        }
                        ll_lookup_finish_locks(it, de);
                }
        }
        RETURN(rc);

        /*
         * This part is here to combat evil-evil race in real_lookup on 2.6
         * kernels.  The race details are: We enter do_lookup() looking for some
         * name, there is nothing in dcache for this name yet and d_lookup()
         * returns NULL.  We proceed to real_lookup(), and while we do this,
         * another process does open on the same file we looking up (most simple
         * reproducer), open succeeds and the dentry is added. Now back to
         * us. In real_lookup() we do d_lookup() again and suddenly find the
         * dentry, so we call d_revalidate on it, but there is no lock, so
         * without this code we would return 0, but unpatched real_lookup just
         * returns -ENOENT in such a case instead of retrying the lookup. Once
         * this is dealt with in real_lookup(), all of this ugly mess can go and
         * we can just check locks in ->d_revalidate without doing any RPCs
         * ever.
         */
do_lookup:
        if (it != &lookup_it) {
                /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */
                if (it->it_op == IT_GETATTR)
                        lookup_it.it_op = IT_GETATTR;
                ll_lookup_finish_locks(it, de);
                it = &lookup_it;
        }

        /* Do real lookup here. */
        op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name,
                                     de->d_name.len, 0, (it->it_op & IT_CREAT ?
                                                         LUSTRE_OPC_CREATE :
                                                         LUSTRE_OPC_ANY), NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));

        rc = md_intent_lock(exp, op_data, NULL, 0,  it, 0, &req,
                            ll_md_blocking_ast, 0);
        if (rc >= 0) {
                struct mdt_body *mdt_body;
                struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0};
                mdt_body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);

                if (de->d_inode)
                        fid = *ll_inode2fid(de->d_inode);

                /* see if we got same inode, if not - return error */
                if (lu_fid_eq(&fid, &mdt_body->fid1)) {
                        ll_finish_md_op_data(op_data);
                        op_data = NULL;
                        goto revalidate_finish;
                }
                ll_intent_release(it);
        }
        ll_finish_md_op_data(op_data);
        GOTO(out, rc = 0);

out_sa:
        /*
         * For rc == 1 case, should not return directly to prevent losing
         * statahead windows; for rc == 0 case, the "lookup" will be done later.
         */
        if (it && it->it_op == IT_GETATTR && rc == 1) {
                first = ll_statahead_enter(parent, &de, 0);
                if (first >= 0)
                        ll_statahead_exit(parent, de, 1);
                else if (first == -EEXIST)
                        ll_statahead_mark(parent, de);
        }

        return rc;
}

#if 0
static void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
{
        struct inode *inode= de->d_inode;
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct ll_dentry_data *ldd = ll_d2d(de);
        struct obd_client_handle *handle;
        struct obd_capa *oc;
        int rc = 0;
        ENTRY;
        LASSERT(ldd);

        cfs_lock_kernel();
        /* Strictly speaking this introduces an additional race: the
         * increments should wait until the rpc has returned.
         * However, given that at present the function is void, this
         * issue is moot. */
        if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
                cfs_unlock_kernel();
                EXIT;
                return;
        }

        if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
                cfs_unlock_kernel();
                EXIT;
                return;
        }
        cfs_unlock_kernel();

        handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
        oc = ll_mdscapa_get(inode);
        rc = obd_pin(sbi->ll_md_exp, ll_inode2fid(inode), oc, handle, flag);
        capa_put(oc);
        if (rc) {
                cfs_lock_kernel();
                memset(handle, 0, sizeof(*handle));
                if (flag == 0)
                        ldd->lld_cwd_count--;
                else
                        ldd->lld_mnt_count--;
                cfs_unlock_kernel();
        }

        EXIT;
        return;
}

static void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
{
        struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
        struct ll_dentry_data *ldd = ll_d2d(de);
        struct obd_client_handle handle;
        int count, rc = 0;
        ENTRY;
        LASSERT(ldd);

        cfs_lock_kernel();
        /* Strictly speaking this introduces an additional race: the
         * increments should wait until the rpc has returned.
         * However, given that at present the function is void, this
         * issue is moot. */
        handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
        if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
                /* the "pin" failed */
                cfs_unlock_kernel();
                EXIT;
                return;
        }

        if (flag)
                count = --ldd->lld_mnt_count;
        else
                count = --ldd->lld_cwd_count;
        cfs_unlock_kernel();

        if (count != 0) {
                EXIT;
                return;
        }

        rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
        EXIT;
        return;
}
#endif

#ifdef HAVE_VFS_INTENT_PATCHES
int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
{
        int rc;
        ENTRY;

        if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
                rc = ll_revalidate_it(dentry, nd->flags, &nd->intent);
        else
                rc = ll_revalidate_it(dentry, 0, NULL);

        RETURN(rc);
}
Example #11
0
File: dcache.c Project: LLNL/lustre
int ll_revalidate_it(struct dentry *de, int lookup_flags,
                     struct lookup_intent *it)
{
        struct md_op_data *op_data;
        struct ptlrpc_request *req = NULL;
        struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
        struct obd_export *exp;
        struct inode *parent = de->d_parent->d_inode;
        int rc;

        ENTRY;
        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
               LL_IT2STR(it));

        if (de->d_inode == NULL) {
                __u64 ibits;

                /* We can only use negative dentries if this is stat or lookup,
                   for opens and stuff we do need to query server. */
                /* If there is IT_CREAT in intent op set, then we must throw
                   away this negative dentry and actually do the request to
                   kernel to create whatever needs to be created (if possible)*/
                if (it && (it->it_op & IT_CREAT))
                        RETURN(0);

                if (de->d_flags & DCACHE_LUSTRE_INVALID)
                        RETURN(0);

                ibits = MDS_INODELOCK_UPDATE;
                rc = ll_have_md_lock(parent, &ibits, LCK_MINMODE);
                GOTO(out_sa, rc);
        }

        /* Never execute intents for mount points.
         * Attributes will be fixed up in ll_inode_revalidate_it */
        if (d_mountpoint(de))
                GOTO(out_sa, rc = 1);

        /* need to get attributes in case root got changed from other client */
        if (de == de->d_sb->s_root) {
                rc = __ll_inode_revalidate_it(de, it, MDS_INODELOCK_LOOKUP);
                if (rc == 0)
                        rc = 1;
                GOTO(out_sa, rc);
        }

        exp = ll_i2mdexp(de->d_inode);

        OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
        ll_frob_intent(&it, &lookup_it);
        LASSERT(it);

        if (it->it_op == IT_LOOKUP && !(de->d_flags & DCACHE_LUSTRE_INVALID))
                RETURN(1);

        if ((it->it_op == IT_OPEN) && de->d_inode) {
                struct inode *inode = de->d_inode;
                struct ll_inode_info *lli = ll_i2info(inode);
                struct obd_client_handle **och_p;
                __u64 *och_usecount;
                __u64 ibits;

                /*
                 * We used to check for MDS_INODELOCK_OPEN here, but in fact
                 * just having LOOKUP lock is enough to justify inode is the
                 * same. And if inode is the same and we have suitable
                 * openhandle, then there is no point in doing another OPEN RPC
                 * just to throw away newly received openhandle.  There are no
                 * security implications too, if file owner or access mode is
                 * change, LOOKUP lock is revoked.
                 */


                if (it->it_flags & FMODE_WRITE) {
                        och_p = &lli->lli_mds_write_och;
                        och_usecount = &lli->lli_open_fd_write_count;
                } else if (it->it_flags & FMODE_EXEC) {
                        och_p = &lli->lli_mds_exec_och;
                        och_usecount = &lli->lli_open_fd_exec_count;
                } else {
                        och_p = &lli->lli_mds_read_och;
                        och_usecount = &lli->lli_open_fd_read_count;
                }
                /* Check for the proper lock. */
                ibits = MDS_INODELOCK_LOOKUP;
                if (!ll_have_md_lock(inode, &ibits, LCK_MINMODE))
                        goto do_lock;
                cfs_mutex_lock(&lli->lli_och_mutex);
                if (*och_p) { /* Everything is open already, do nothing */
                        /*(*och_usecount)++;  Do not let them steal our open
                          handle from under us */
                        SET_BUT_UNUSED(och_usecount);
                        /* XXX The code above was my original idea, but in case
                           we have the handle, but we cannot use it due to later
                           checks (e.g. O_CREAT|O_EXCL flags set), nobody
                           would decrement counter increased here. So we just
                           hope the lock won't be invalidated in between. But
                           if it would be, we'll reopen the open request to
                           MDS later during file open path */
                        cfs_mutex_unlock(&lli->lli_och_mutex);
                        RETURN(1);
                } else {
                        cfs_mutex_unlock(&lli->lli_och_mutex);
                }
        }

        if (it->it_op == IT_GETATTR) {
                rc = ll_statahead_enter(parent, &de, 0);
                if (rc == 1)
                        goto mark;
        }

do_lock:
        op_data = ll_prep_md_op_data(NULL, parent, de->d_inode,
                                     de->d_name.name, de->d_name.len,
                                     0, LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));

        it->it_create_mode &= ~cfs_curproc_umask();
        it->it_create_mode |= M_CHECK_STALE;
        rc = md_intent_lock(exp, op_data, NULL, 0, it,
                            lookup_flags,
                            &req, ll_md_blocking_ast, 0);
        it->it_create_mode &= ~M_CHECK_STALE;
        ll_finish_md_op_data(op_data);

        /* If req is NULL, then md_intent_lock only tried to do a lock match;
         * if all was well, it will return 1 if it found locks, 0 otherwise. */
        if (req == NULL && rc >= 0) {
                if (!rc)
                        goto do_lookup;
                GOTO(out, rc);
        }

        if (rc < 0) {
                if (rc != -ESTALE) {
                        CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
                               "%d\n", rc, it->d.lustre.it_status);
                }
                GOTO(out, rc = 0);
        }

revalidate_finish:
        rc = ll_revalidate_it_finish(req, it, de);
        if (rc != 0) {
                if (rc != -ESTALE && rc != -ENOENT)
                        ll_intent_release(it);
                GOTO(out, rc = 0);
        }

        if ((it->it_op & IT_OPEN) && de->d_inode &&
            !S_ISREG(de->d_inode->i_mode) &&
            !S_ISDIR(de->d_inode->i_mode)) {
                ll_release_openhandle(de, it);
        }
        rc = 1;

        /* unfortunately ll_intent_lock may cause a callback and revoke our
         * dentry */
        ll_dentry_rehash(de, 0);

out:
        /* We do not free request as it may be reused during following lookup
         * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
         * be freed in ll_lookup_it or in ll_intent_release. But if
         * request was not completed, we need to free it. (bug 5154, 9903) */
        if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
                ptlrpc_req_finished(req);
        if (rc == 0) {
                ll_unhash_aliases(de->d_inode);
                /* done in ll_unhash_aliases()
                   dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
        } else {
                __u64 bits = 0;

                CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
                       "inode %p refc %d\n", de->d_name.len,
                       de->d_name.name, de, de->d_parent, de->d_inode,
                       atomic_read(&de->d_count));
                ll_set_lock_data(exp, de->d_inode, it, &bits);
                ll_dentry_reset_flags(de, bits);
                ll_lookup_finish_locks(it, de);
        }

mark:
        if (it != NULL && it->it_op == IT_GETATTR && rc > 0)
                ll_statahead_mark(parent, de);
        RETURN(rc);

        /*
         * This part is here to combat evil-evil race in real_lookup on 2.6
         * kernels.  The race details are: We enter do_lookup() looking for some
         * name, there is nothing in dcache for this name yet and d_lookup()
         * returns NULL.  We proceed to real_lookup(), and while we do this,
         * another process does open on the same file we looking up (most simple
         * reproducer), open succeeds and the dentry is added. Now back to
         * us. In real_lookup() we do d_lookup() again and suddenly find the
         * dentry, so we call d_revalidate on it, but there is no lock, so
         * without this code we would return 0, but unpatched real_lookup just
         * returns -ENOENT in such a case instead of retrying the lookup. Once
         * this is dealt with in real_lookup(), all of this ugly mess can go and
         * we can just check locks in ->d_revalidate without doing any RPCs
         * ever.
         */
do_lookup:
        if (it != &lookup_it) {
                /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */
                if (it->it_op == IT_GETATTR)
                        lookup_it.it_op = IT_GETATTR;
                ll_lookup_finish_locks(it, de);
                it = &lookup_it;
        }

        /* Do real lookup here. */
        op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name,
                                     de->d_name.len, 0, (it->it_op & IT_CREAT ?
                                                         LUSTRE_OPC_CREATE :
                                                         LUSTRE_OPC_ANY), NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));

        rc = md_intent_lock(exp, op_data, NULL, 0,  it, 0, &req,
                            ll_md_blocking_ast, 0);
        if (rc >= 0) {
                struct mdt_body *mdt_body;
                struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0};
                mdt_body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);

                if (de->d_inode)
                        fid = *ll_inode2fid(de->d_inode);

                /* see if we got same inode, if not - return error */
                if (lu_fid_eq(&fid, &mdt_body->fid1)) {
                        ll_finish_md_op_data(op_data);
                        op_data = NULL;
                        goto revalidate_finish;
                }
                ll_intent_release(it);
        }
        ll_finish_md_op_data(op_data);
        GOTO(out, rc = 0);

out_sa:
        /*
         * For rc == 1 case, should not return directly to prevent losing
         * statahead windows; for rc == 0 case, the "lookup" will be done later.
         */
        if (it != NULL && it->it_op == IT_GETATTR && rc == 1)
                ll_statahead_enter(parent, &de, 1);
        goto mark;
}

int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
{
        int rc;
        ENTRY;

        if (nd && !(nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))) {
                struct lookup_intent *it;

                it = ll_convert_intent(&nd->intent.open, nd->flags);
                if (IS_ERR(it))
                        RETURN(0);

                if (it->it_op == (IT_OPEN|IT_CREAT) &&
                    nd->intent.open.flags & O_EXCL) {
                        CDEBUG(D_VFSTRACE, "create O_EXCL, returning 0\n");
                        rc = 0;
                        goto out_it;
                }

                rc = ll_revalidate_it(dentry, nd->flags, it);

                if (rc && (nd->flags & LOOKUP_OPEN) &&
                    it_disposition(it, DISP_OPEN_OPEN)) {/*Open*/
// XXX Code duplication with ll_lookup_nd
                        if (S_ISFIFO(dentry->d_inode->i_mode)) {
                                // We cannot call open here as it would
                                // deadlock.
                                ptlrpc_req_finished(
                                               (struct ptlrpc_request *)
                                                  it->d.lustre.it_data);
                        } else {
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
/* 2.6.1[456] have a bug in open_namei() that forgets to check
 * nd->intent.open.file for error, so we need to return it as lookup's result
 * instead */
                                struct file *filp;

                                nd->intent.open.file->private_data = it;
                                filp = lookup_instantiate_filp(nd, dentry,NULL);
                                if (IS_ERR(filp)) {
                                        rc = PTR_ERR(filp);
                                }
#else
                                nd->intent.open.file->private_data = it;
                                (void)lookup_instantiate_filp(nd, dentry,NULL);
#endif
                        }
                }
                if (!rc && (nd->flags & LOOKUP_CREATE) &&
                    it_disposition(it, DISP_OPEN_CREATE)) {
                        /* We created something but we may only return
                         * negative dentry here, so save request in dentry,
                         * if lookup will be called later on, it will
                         * pick the request, otherwise it would be freed
                         * with dentry */
                        ll_d2d(dentry)->lld_it = it;
                        it = NULL; /* avoid freeing */
                }

out_it:
                if (it) {
                        ll_intent_release(it);
                        OBD_FREE(it, sizeof(*it));
                }
        } else {
                rc = ll_revalidate_it(dentry, 0, NULL);
        }

        RETURN(rc);
}
Example #12
0
/**
 * \retval +ve	repaired
 * \retval 0	no need to repair
 * \retval -ve	error cases
 */
static int lfsck_namespace_double_scan_one(const struct lu_env *env,
					   struct lfsck_component *com,
					   struct dt_object *child, __u8 flags)
{
	struct lfsck_thread_info *info	  = lfsck_env_info(env);
	struct lu_attr		 *la	  = &info->lti_la;
	struct lu_name		 *cname	  = &info->lti_name;
	struct lu_fid		 *pfid	  = &info->lti_fid;
	struct lu_fid		 *cfid	  = &info->lti_fid2;
	struct lfsck_instance	*lfsck	  = com->lc_lfsck;
	struct lfsck_bookmark	*bk	  = &lfsck->li_bookmark_ram;
	struct lfsck_namespace	*ns	  = com->lc_file_ram;
	struct linkea_data	 ldata	  = { 0 };
	struct thandle		*handle   = NULL;
	bool			 locked   = false;
	bool			 update	  = false;
	int			 rc;
	ENTRY;

	if (com->lc_journal) {

again:
		LASSERT(!locked);

		update = false;
		com->lc_journal = 1;
		handle = dt_trans_create(env, lfsck->li_next);
		if (IS_ERR(handle))
			RETURN(rc = PTR_ERR(handle));

		rc = dt_declare_xattr_set(env, child,
			lfsck_buf_get_const(env, NULL, DEFAULT_LINKEA_SIZE),
			XATTR_NAME_LINK, 0, handle);
		if (rc != 0)
			GOTO(stop, rc);

		rc = dt_trans_start(env, lfsck->li_next, handle);
		if (rc != 0)
			GOTO(stop, rc);

		dt_write_lock(env, child, MOR_TGT_CHILD);
		locked = true;
	}

	if (unlikely(lfsck_is_dead_obj(child)))
		GOTO(stop, rc = 0);

	rc = dt_attr_get(env, child, la, BYPASS_CAPA);
	if (rc == 0)
		rc = lfsck_links_read(env, child, &ldata);
	if (rc != 0) {
		if ((bk->lb_param & LPF_DRYRUN) &&
		    (rc == -EINVAL || rc == -ENODATA))
			rc = 1;

		GOTO(stop, rc);
	}

	linkea_first_entry(&ldata);
	while (ldata.ld_lee != NULL) {
		struct dt_object *parent = NULL;

		rc = lfsck_linkea_entry_unpack(lfsck, &ldata, cname, pfid);
		if (rc > 0)
			update = true;

		if (!fid_is_sane(pfid))
			goto shrink;

		parent = lfsck_object_find(env, lfsck, pfid);
		if (parent == NULL)
			goto shrink;
		else if (IS_ERR(parent))
			GOTO(stop, rc = PTR_ERR(parent));

		if (!dt_object_exists(parent))
			goto shrink;

		/* XXX: Currently, skip remote object, the consistency for
		 *	remote object will be processed in LFSCK phase III. */
		if (dt_object_remote(parent)) {
			lfsck_object_put(env, parent);
			linkea_next_entry(&ldata);
			continue;
		}

		if (unlikely(!dt_try_as_dir(env, parent)))
			goto shrink;

		/* To guarantee the 'name' is terminated with '0'. */
		memcpy(info->lti_key, cname->ln_name, cname->ln_namelen);
		info->lti_key[cname->ln_namelen] = 0;
		cname->ln_name = info->lti_key;
		rc = dt_lookup(env, parent, (struct dt_rec *)cfid,
			       (const struct dt_key *)cname->ln_name,
			       BYPASS_CAPA);
		if (rc != 0 && rc != -ENOENT) {
			lfsck_object_put(env, parent);
			GOTO(stop, rc);
		}

		if (rc == 0) {
			if (lu_fid_eq(cfid, lfsck_dto2fid(child))) {
				lfsck_object_put(env, parent);
				linkea_next_entry(&ldata);
				continue;
			}

			goto shrink;
		}

		/* If there is no name entry in the parent dir and the object
		 * link count is less than the linkea entries count, then the
		 * linkea entry should be removed. */
		if (ldata.ld_leh->leh_reccount > la->la_nlink)
			goto shrink;

		/* XXX: For the case of there is a linkea entry, but without
		 *	name entry pointing to the object and its hard links
		 *	count is not less than the object name entries count,
		 *	then seems we should add the 'missed' name entry back
		 *	to namespace, but before LFSCK phase III finished, we
		 *	do not know whether the object has some inconsistency
		 *	on other MDTs. So now, do NOT add the name entry back
		 *	to the namespace, but keep the linkEA entry. LU-2914 */
		lfsck_object_put(env, parent);
		linkea_next_entry(&ldata);
		continue;

shrink:
		if (parent != NULL)
			lfsck_object_put(env, parent);
		if (bk->lb_param & LPF_DRYRUN)
			RETURN(1);

		CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n",
		       PFID(lfsck_dto2fid(child)), cname->ln_namelen, cname->ln_name,
		       PFID(pfid));
		linkea_del_buf(&ldata, cname);
		update = true;
	}

	if (update) {
		if (!com->lc_journal) {
			com->lc_journal = 1;
			goto again;
		}

		rc = lfsck_links_write(env, child, &ldata, handle);
	}

	GOTO(stop, rc);

stop:
	if (locked) {
	/* XXX: For the case linkea entries count does not match the object hard
	 *	links count, we cannot update the later one simply. Before LFSCK
	 *	phase III finished, we cannot know whether there are some remote
	 *	name entries to be repaired or not. LU-2914 */
		if (rc == 0 && !lfsck_is_dead_obj(child) &&
		    ldata.ld_leh != NULL &&
		    ldata.ld_leh->leh_reccount != la->la_nlink)
			CWARN("%s: the object "DFID" linkEA entry count %u "
			      "may not match its hardlink count %u\n",
			      lfsck_lfsck2name(lfsck), PFID(cfid),
			      ldata.ld_leh->leh_reccount, la->la_nlink);

		dt_write_unlock(env, child);
	}

	if (handle != NULL)
		dt_trans_stop(env, lfsck->li_next, handle);

	if (rc == 0 && update) {
		ns->ln_objs_nlink_repaired++;
		rc = 1;
	}

	return rc;
}
Example #13
0
static int
osd_scrub_check_update(const struct lu_env *env, struct osd_device *dev,
		       const struct lu_fid *fid, uint64_t oid, int val)
{
	struct lustre_scrub *scrub = &dev->od_scrub;
	struct scrub_file *sf = &scrub->os_file;
	struct osd_inconsistent_item *oii = NULL;
	nvlist_t *nvbuf = NULL;
	dnode_t *dn = NULL;
	uint64_t oid2;
	int ops = DTO_INDEX_UPDATE;
	int rc;
	ENTRY;

	down_write(&scrub->os_rwsem);
	scrub->os_new_checked++;
	if (val < 0)
		GOTO(out, rc = val);

	if (scrub->os_in_prior)
		oii = list_entry(scrub->os_inconsistent_items.next,
				 struct osd_inconsistent_item, oii_list);

	if (oid < sf->sf_pos_latest_start && !oii)
		GOTO(out, rc = 0);

	if (oii && oii->oii_insert) {
		ops = DTO_INDEX_INSERT;
		goto zget;
	}

	rc = osd_fid_lookup(env, dev, fid, &oid2);
	if (rc) {
		if (rc != -ENOENT)
			GOTO(out, rc);

		ops = DTO_INDEX_INSERT;

zget:
		rc = __osd_obj2dnode(dev->od_os, oid, &dn);
		if (rc) {
			/* Someone removed the object by race. */
			if (rc == -ENOENT || rc == -EEXIST)
				rc = 0;
			GOTO(out, rc);
		}

		scrub->os_full_speed = 1;
		sf->sf_flags |= SF_INCONSISTENT;
	} else if (oid == oid2) {
		GOTO(out, rc = 0);
	} else {
		struct lustre_mdt_attrs *lma = NULL;
		int size;

		rc = __osd_xattr_load_by_oid(dev, oid2, &nvbuf);
		if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
			goto update;
		if (rc)
			GOTO(out, rc);

		rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA,
					       (uchar_t **)&lma, &size);
		if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
			goto update;
		if (rc)
			GOTO(out, rc);

		lustre_lma_swab(lma);
		if (unlikely(lu_fid_eq(&lma->lma_self_fid, fid))) {
			CDEBUG(D_LFSCK, "%s: the FID "DFID" is used by "
			       "two objects: %llu and %llu (in OI)\n",
			       osd_name(dev), PFID(fid), oid, oid2);

			GOTO(out, rc = -EEXIST);
		}

update:
		scrub->os_full_speed = 1;
		sf->sf_flags |= SF_INCONSISTENT;
	}

	rc = osd_scrub_refresh_mapping(env, dev, fid, oid, ops, false, NULL);
	if (!rc) {
		if (scrub->os_in_prior)
			sf->sf_items_updated_prior++;
		else
			sf->sf_items_updated++;
	}

	GOTO(out, rc);

out:
	if (nvbuf)
		nvlist_free(nvbuf);

	if (rc < 0) {
		sf->sf_items_failed++;
		if (sf->sf_pos_first_inconsistent == 0 ||
		    sf->sf_pos_first_inconsistent > oid)
			sf->sf_pos_first_inconsistent = oid;
	} else {
		rc = 0;
	}

	/* There may be conflict unlink during the OI scrub,
	 * if happend, then remove the new added OI mapping. */
	if (ops == DTO_INDEX_INSERT && dn && dn->dn_free_txg)
		osd_scrub_refresh_mapping(env, dev, fid, oid,
					  DTO_INDEX_DELETE, false, NULL);
	up_write(&scrub->os_rwsem);

	if (dn)
		osd_dnode_rele(dn);

	if (oii) {
		spin_lock(&scrub->os_lock);
		if (likely(!list_empty(&oii->oii_list)))
			list_del(&oii->oii_list);
		spin_unlock(&scrub->os_lock);
		OBD_FREE_PTR(oii);
	}

	RETURN(sf->sf_param & SP_FAILOUT ? rc : 0);
}
Example #14
0
int osd_oi_insert(struct osd_thread_info *info, struct osd_device *osd,
		  const struct lu_fid *fid, const struct osd_inode_id *id,
		  handle_t *th, enum oi_check_flags flags)
{
	struct lu_fid	    *oi_fid = &info->oti_fid2;
	struct osd_inode_id *oi_id  = &info->oti_id2;
	int		     rc     = 0;

	if (unlikely(fid_is_last_id(fid)))
		return osd_obj_spec_insert(info, osd, fid, id, th);

	if (fid_is_on_ost(info, osd, fid, flags) || fid_is_llog(fid))
		return osd_obj_map_insert(info, osd, fid, id, th);

	fid_cpu_to_be(oi_fid, fid);
	osd_id_pack(oi_id, id);
	rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid),
			       (const struct dt_rec *)oi_id,
			       (const struct dt_key *)oi_fid, th, true);
	if (rc != 0) {
		struct inode *inode;
		struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;

		if (rc != -EEXIST)
			return rc;

		rc = osd_oi_lookup(info, osd, fid, oi_id, 0);
		if (rc != 0)
			return rc;

		if (unlikely(osd_id_eq(id, oi_id)))
			return 1;

		/* Check whether the mapping for oi_id is valid or not. */
		inode = osd_iget(info, osd, oi_id);
		if (IS_ERR(inode)) {
			rc = PTR_ERR(inode);
			if (rc == -ENOENT || rc == -ESTALE)
				goto update;
			return rc;
		}

		/* The EA inode should NOT be in OI, old OI scrub may added
		 * such OI mapping by wrong, replace it. */
		if (unlikely(osd_is_ea_inode(inode))) {
			iput(inode);
			goto update;
		}

		rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
		iput(inode);
		if (rc == -ENODATA)
			goto update;

		if (rc != 0)
			return rc;

		if (!(lma->lma_compat & LMAC_NOT_IN_OI) &&
		    lu_fid_eq(fid, &lma->lma_self_fid)) {
			CERROR("%.16s: the FID "DFID" is used by two objects: "
			       "%u/%u %u/%u\n",
			       LDISKFS_SB(osd_sb(osd))->s_es->s_volume_name,
			       PFID(fid), oi_id->oii_ino, oi_id->oii_gen,
			       id->oii_ino, id->oii_gen);
			return -EEXIST;
		}

update:
		osd_id_pack(oi_id, id);
		rc = osd_oi_iam_refresh(info, osd_fid2oi(osd, fid),
					(const struct dt_rec *)oi_id,
					(const struct dt_key *)oi_fid, th, false);
		if (rc != 0)
			return rc;
	}

	if (unlikely(fid_seq(fid) == FID_SEQ_LOCAL_FILE))
		rc = osd_obj_spec_insert(info, osd, fid, id, th);
	return rc;
}
Example #15
0
int mdd_compat_fixes(const struct lu_env *env, struct mdd_device *mdd)
{
	struct mdd_thread_info	*info = mdd_env_info(env);
	struct mdd_object	*root;
	struct dt_object	*o;
	struct lustre_mdt_attrs	*lma;
	struct lu_buf		 buf;
	int			 rc;
	ENTRY;

	/* IGIF FIDS are valid for old 1.8 and 2.[123] ROOT and are kept.
	 * Normal FIDs used by Xyratex 1.8->2.1 upgrade tool are also kept. */
	if (fid_is_igif(&mdd->mdd_root_fid) || fid_is_norm(&mdd->mdd_root_fid))
		RETURN(0);

	/*
	 * FID is supposed to be FID_SEQ_ROOT for:
	 *  - new ldiskfs fs
	 *  - new ZFS fs
	 *  - old ZFS fs, by now processed with osd_convert_root_to_new_seq()
	 */
	if (fid_seq(&mdd->mdd_root_fid) != FID_SEQ_ROOT) {
		CERROR("%s: wrong FID "DFID" is used for /ROOT\n",
		       mdd2obd_dev(mdd)->obd_name,
		       PFID(&mdd->mdd_root_fid));
		RETURN(-EINVAL);
	}

	root = mdd_object_find(env, mdd, &mdd->mdd_root_fid);
	if (IS_ERR(root))
		RETURN(PTR_ERR(root));
	o = mdd_object_child(root);

	CDEBUG(D_OTHER, "/ROOT = "DFID"\n", PFID(&mdd->mdd_root_fid));

	if (dt_try_as_dir(env, o) == 0) {
		CERROR("%s: not a directory\n", mdd2obd_dev(mdd)->obd_name);
		GOTO(out, rc = -ENOTDIR);
	}

	lma = (struct lustre_mdt_attrs *)&info->mti_xattr_buf;
	CLASSERT(sizeof(info->mti_xattr_buf) >= LMA_OLD_SIZE);
	buf.lb_len = LMA_OLD_SIZE;
	buf.lb_buf = lma;
	rc = mdo_xattr_get(env, root, &buf, XATTR_NAME_LMA, BYPASS_CAPA);
	if (rc < 0 && rc != -ENODATA) {
		CERROR("%s: can't fetch LMA: rc = %d\n",
		       mdd2obd_dev(mdd)->obd_name, rc);
		GOTO(out, rc);
	}

	lustre_lma_swab(lma);
	if (lu_fid_eq(&lma->lma_self_fid, &mdd->mdd_root_fid)) {
		/* /ROOT has been converted already
		 * or was correct from the beginning */
		CDEBUG(D_OTHER, "%s: converted already\n",
		       mdd2obd_dev(mdd)->obd_name);
		GOTO(out, rc = 0);
	}

	/* this is supposed to happen only on pre-production ZFS backend */
	if (strcmp(mdd->mdd_bottom->dd_lu_dev.ld_type->ldt_name,
		   LUSTRE_OSD_ZFS_NAME) != 0) {
		CERROR("%s: "DFID" is used on ldiskfs?!\n",
		       mdd2obd_dev(mdd)->obd_name, PFID(&mdd->mdd_root_fid));
		GOTO(out, rc = -ENOTSUPP);
	}

	LCONSOLE_INFO("%s: FID of /ROOT has been changed. "
		      "Please remount the clients.\n",
		      mdd2obd_dev(mdd)->obd_name);

	/* Fill FLDB first */
	rc = mdd_fill_fldb(env, mdd);
	if (rc)
		GOTO(out, rc);

	/* remove ./.. from /ROOT */
	rc = mdd_convert_remove_dots(env, mdd, root);
	if (rc)
		GOTO(out, rc);

	/* go over the directory, fix all the objects */
	rc = mdd_fix_children(env, mdd, o);
	if (rc)
		GOTO(out, rc);

	/* Update LMA on /ROOT.  Done for simplicity in MDD, not in osd-zfs.
	 * Correct LMA will imply the whole directory has been coverted
	 * successfully, otherwise it will be retried on next mount. */
	rc = mdd_convert_lma(env, mdd, root);

out:
	mdd_object_put(env, root);
	RETURN(rc);
}
Example #16
0
static int ll_nfs_test_inode(struct inode *inode, void *opaque)
{
	return lu_fid_eq(&ll_i2info(inode)->lli_fid,
			 (struct lu_fid *)opaque);
}
Example #17
0
/*
 * VBR: save versions in reply: 0 - parent; 1 - child by fid; 2 - target by
 * name.
 */
static int mdt_reint_link(struct mdt_thread_info *info,
                          struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_attr          *ma = &info->mti_attr;
        struct mdt_object       *ms;
        struct mdt_object       *mp;
        struct mdt_lock_handle  *lhs;
        struct mdt_lock_handle  *lhp;
        struct lu_name          *lname;
        int rc;
        ENTRY;

        DEBUG_REQ(D_INODE, req, "link "DFID" to "DFID"/%s",
                  PFID(rr->rr_fid1), PFID(rr->rr_fid2), rr->rr_name);

        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK))
                RETURN(err_serious(-ENOENT));

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        /* Invalid case so return error immediately instead of
         * processing it */
        if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2))
                RETURN(-EPERM);

        /* step 1: find & lock the target parent dir */
        lhp = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lhp, LCK_PW, rr->rr_name,
                          rr->rr_namelen);
        mp = mdt_object_find_lock(info, rr->rr_fid2, lhp,
                                  MDS_INODELOCK_UPDATE);
        if (IS_ERR(mp))
                RETURN(PTR_ERR(mp));

        if (mdt_object_obf(mp))
                GOTO(out_unlock_parent, rc = -EPERM);

        rc = mdt_version_get_check_save(info, mp, 0);
        if (rc)
                GOTO(out_unlock_parent, rc);

        /* step 2: find & lock the source */
        lhs = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(lhs, LCK_EX);

        ms = mdt_object_find(info->mti_env, info->mti_mdt, rr->rr_fid1);
        if (IS_ERR(ms))
                GOTO(out_unlock_parent, rc = PTR_ERR(ms));

	if (mdt_object_remote(ms)) {
		mdt_object_put(info->mti_env, ms);
		CERROR("Target directory "DFID" is on another MDT\n",
			PFID(rr->rr_fid1));
		GOTO(out_unlock_parent, rc = -EXDEV);
	}

        rc = mdt_object_lock(info, ms, lhs, MDS_INODELOCK_UPDATE,
                            MDT_CROSS_LOCK);
        if (rc != 0) {
                mdt_object_put(info->mti_env, ms);
                GOTO(out_unlock_parent, rc);
        }

        /* step 3: link it */
        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_LINK_WRITE);

        info->mti_mos = ms;
        rc = mdt_version_get_check_save(info, ms, 1);
        if (rc)
                GOTO(out_unlock_child, rc);

        lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
        /** check target version by name during replay */
        rc = mdt_lookup_version_check(info, mp, lname, &info->mti_tmp_fid1, 2);
        if (rc != 0 && rc != -ENOENT)
                GOTO(out_unlock_child, rc);
        /* save version of file name for replay, it must be ENOENT here */
        if (!req_is_replay(mdt_info_req(info))) {
                info->mti_ver[2] = ENOENT_VERSION;
                mdt_version_save(mdt_info_req(info), info->mti_ver[2], 2);
        }

        rc = mdo_link(info->mti_env, mdt_object_child(mp),
                      mdt_object_child(ms), lname, ma);

        if (rc == 0)
		mdt_counter_incr(req, LPROC_MDT_LINK);

        EXIT;
out_unlock_child:
        mdt_object_unlock_put(info, ms, lhs, rc);
out_unlock_parent:
        mdt_object_unlock_put(info, mp, lhp, rc);
        return rc;
}
Example #18
0
/*
 * VBR: rename versions in reply: 0 - src parent; 1 - tgt parent;
 * 2 - src child; 3 - tgt child.
 * Update on disk version of src child.
 */
static int mdt_reint_rename(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct md_attr          *ma = &info->mti_attr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct mdt_object       *msrcdir;
        struct mdt_object       *mtgtdir;
        struct mdt_object       *mold;
        struct mdt_object       *mnew = NULL;
        struct mdt_lock_handle  *lh_srcdirp;
        struct mdt_lock_handle  *lh_tgtdirp;
        struct mdt_lock_handle  *lh_oldp;
        struct mdt_lock_handle  *lh_newp;
        struct lu_fid           *old_fid = &info->mti_tmp_fid1;
        struct lu_fid           *new_fid = &info->mti_tmp_fid2;
        struct lustre_handle     rename_lh = { 0 };
        struct lu_name           slname = { 0 };
        struct lu_name          *lname;
        int                      rc;
        ENTRY;

        if (info->mti_dlm_req)
                ldlm_request_cancel(req, info->mti_dlm_req, 0);

        DEBUG_REQ(D_INODE, req, "rename "DFID"/%s to "DFID"/%s",
                  PFID(rr->rr_fid1), rr->rr_name,
                  PFID(rr->rr_fid2), rr->rr_tgt);

	rc = mdt_rename_lock(info, &rename_lh);
	if (rc) {
		CERROR("Can't lock FS for rename, rc %d\n", rc);
		RETURN(rc);
	}

        lh_newp = &info->mti_lh[MDT_LH_NEW];

        /* step 1: lock the source dir. */
        lh_srcdirp = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lh_srcdirp, LCK_PW, rr->rr_name,
                          rr->rr_namelen);
        msrcdir = mdt_object_find_lock(info, rr->rr_fid1, lh_srcdirp,
                                       MDS_INODELOCK_UPDATE);
        if (IS_ERR(msrcdir))
                GOTO(out_rename_lock, rc = PTR_ERR(msrcdir));

        if (mdt_object_obf(msrcdir))
                GOTO(out_unlock_source, rc = -EPERM);

        rc = mdt_version_get_check_save(info, msrcdir, 0);
        if (rc)
                GOTO(out_unlock_source, rc);

        /* step 2: find & lock the target dir. */
        lh_tgtdirp = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_pdo_init(lh_tgtdirp, LCK_PW, rr->rr_tgt,
                          rr->rr_tgtlen);
        if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) {
                mdt_object_get(info->mti_env, msrcdir);
                mtgtdir = msrcdir;
                if (lh_tgtdirp->mlh_pdo_hash != lh_srcdirp->mlh_pdo_hash) {
                         rc = mdt_pdir_hash_lock(info, lh_tgtdirp, mtgtdir,
                                                 MDS_INODELOCK_UPDATE);
                         if (rc)
                                 GOTO(out_unlock_source, rc);
                         OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK2, 10);
                }
        } else {
                mtgtdir = mdt_object_find(info->mti_env, info->mti_mdt,
                                          rr->rr_fid2);
                if (IS_ERR(mtgtdir))
                        GOTO(out_unlock_source, rc = PTR_ERR(mtgtdir));

                if (mdt_object_obf(mtgtdir))
                        GOTO(out_put_target, rc = -EPERM);

                /* check early, the real version will be saved after locking */
                rc = mdt_version_get_check(info, mtgtdir, 1);
                if (rc)
                        GOTO(out_put_target, rc);

		if (unlikely(mdt_object_remote(mtgtdir))) {
			CDEBUG(D_INFO, "Source dir "DFID" target dir "DFID
			       "on different MDTs\n", PFID(rr->rr_fid1),
			       PFID(rr->rr_fid2));
			GOTO(out_put_target, rc = -EXDEV);
		} else {
			if (likely(mdt_object_exists(mtgtdir))) {
				/* we lock the target dir if it is local */
				rc = mdt_object_lock(info, mtgtdir, lh_tgtdirp,
						     MDS_INODELOCK_UPDATE,
						     MDT_LOCAL_LOCK);
				if (rc != 0)
					GOTO(out_put_target, rc);
				/* get and save correct version after locking */
				mdt_version_get_save(info, mtgtdir, 1);
			} else {
				GOTO(out_put_target, rc = -ESTALE);
			}
		}
	}

        /* step 3: find & lock the old object. */
        lname = mdt_name(info->mti_env, (char *)rr->rr_name, rr->rr_namelen);
        mdt_name_copy(&slname, lname);
        fid_zero(old_fid);
        rc = mdt_lookup_version_check(info, msrcdir, &slname, old_fid, 2);
        if (rc != 0)
                GOTO(out_unlock_target, rc);

        if (lu_fid_eq(old_fid, rr->rr_fid1) || lu_fid_eq(old_fid, rr->rr_fid2))
                GOTO(out_unlock_target, rc = -EINVAL);

	mold = mdt_object_find(info->mti_env, info->mti_mdt, old_fid);
	if (IS_ERR(mold))
		GOTO(out_unlock_target, rc = PTR_ERR(mold));
	if (mdt_object_remote(mold)) {
		mdt_object_put(info->mti_env, mold);
		CDEBUG(D_INFO, "Source child "DFID" is on another MDT\n",
		       PFID(old_fid));
		GOTO(out_unlock_target, rc = -EXDEV);
	}

	if (mdt_object_obf(mold)) {
		mdt_object_put(info->mti_env, mold);
		GOTO(out_unlock_target, rc = -EPERM);
	}

        lh_oldp = &info->mti_lh[MDT_LH_OLD];
        mdt_lock_reg_init(lh_oldp, LCK_EX);
        rc = mdt_object_lock(info, mold, lh_oldp, MDS_INODELOCK_LOOKUP,
                             MDT_CROSS_LOCK);
        if (rc != 0) {
                mdt_object_put(info->mti_env, mold);
                GOTO(out_unlock_target, rc);
        }

        info->mti_mos = mold;
        /* save version after locking */
        mdt_version_get_save(info, mold, 2);
        mdt_set_capainfo(info, 2, old_fid, BYPASS_CAPA);

        /* step 4: find & lock the new object. */
        /* new target object may not exist now */
        lname = mdt_name(info->mti_env, (char *)rr->rr_tgt, rr->rr_tgtlen);
        /* lookup with version checking */
        fid_zero(new_fid);
        rc = mdt_lookup_version_check(info, mtgtdir, lname, new_fid, 3);
        if (rc == 0) {
                /* the new_fid should have been filled at this moment */
                if (lu_fid_eq(old_fid, new_fid))
                       GOTO(out_unlock_old, rc);

                if (lu_fid_eq(new_fid, rr->rr_fid1) ||
                    lu_fid_eq(new_fid, rr->rr_fid2))
                        GOTO(out_unlock_old, rc = -EINVAL);

                mdt_lock_reg_init(lh_newp, LCK_EX);
                mnew = mdt_object_find(info->mti_env, info->mti_mdt, new_fid);
                if (IS_ERR(mnew))
                        GOTO(out_unlock_old, rc = PTR_ERR(mnew));

		if (mdt_object_obf(mnew)) {
			mdt_object_put(info->mti_env, mnew);
			GOTO(out_unlock_old, rc = -EPERM);
		}

		if (mdt_object_remote(mnew)) {
			mdt_object_put(info->mti_env, mnew);
			CDEBUG(D_INFO, "src child "DFID" is on another MDT\n",
			       PFID(new_fid));
			GOTO(out_unlock_old, rc = -EXDEV);
		}

                rc = mdt_object_lock(info, mnew, lh_newp,
                                     MDS_INODELOCK_FULL, MDT_CROSS_LOCK);
                if (rc != 0) {
                        mdt_object_put(info->mti_env, mnew);
                        GOTO(out_unlock_old, rc);
                }
                /* get and save version after locking */
                mdt_version_get_save(info, mnew, 3);
                mdt_set_capainfo(info, 3, new_fid, BYPASS_CAPA);
        } else if (rc != -EREMOTE && rc != -ENOENT) {
                GOTO(out_unlock_old, rc);
        } else {
                mdt_enoent_version_save(info, 3);
        }

        /* step 5: rename it */
        mdt_reint_init_ma(info, ma);

        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_RENAME_WRITE);


        /* Check if @dst is subdir of @src. */
        rc = mdt_rename_sanity(info, old_fid);
        if (rc)
                GOTO(out_unlock_new, rc);

        rc = mdo_rename(info->mti_env, mdt_object_child(msrcdir),
                        mdt_object_child(mtgtdir), old_fid, &slname,
                        (mnew ? mdt_object_child(mnew) : NULL),
                        lname, ma);

        /* handle last link of tgt object */
        if (rc == 0) {
		mdt_counter_incr(req, LPROC_MDT_RENAME);
                if (mnew)
                        mdt_handle_last_unlink(info, mnew, ma);

		mdt_rename_counter_tally(info, info->mti_mdt, req,
                                         msrcdir, mtgtdir);
        }

        EXIT;
out_unlock_new:
        if (mnew)
                mdt_object_unlock_put(info, mnew, lh_newp, rc);
out_unlock_old:
        mdt_object_unlock_put(info, mold, lh_oldp, rc);
out_unlock_target:
        mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc);
out_put_target:
        mdt_object_put(info->mti_env, mtgtdir);
out_unlock_source:
        mdt_object_unlock_put(info, msrcdir, lh_srcdirp, rc);
out_rename_lock:
	if (lustre_handle_is_used(&rename_lh))
		mdt_rename_unlock(&rename_lh);
	return rc;
}
Example #19
0
/* partial operation for rename */
static int mdt_reint_rename_tgt(struct mdt_thread_info *info)
{
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
        struct md_attr          *ma = &info->mti_attr;
        struct mdt_object       *mtgtdir;
        struct mdt_object       *mtgt = NULL;
        struct mdt_lock_handle  *lh_tgtdir;
        struct mdt_lock_handle  *lh_tgt = NULL;
        struct lu_fid           *tgt_fid = &info->mti_tmp_fid1;
        struct lu_name          *lname;
        int                      rc;
        ENTRY;

        DEBUG_REQ(D_INODE, req, "rename_tgt: insert (%s->"DFID") in "DFID,
                  rr->rr_tgt, PFID(rr->rr_fid2), PFID(rr->rr_fid1));

        /* step 1: lookup & lock the tgt dir. */
        lh_tgtdir = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lh_tgtdir, LCK_PW, rr->rr_tgt,
                          rr->rr_tgtlen);
        mtgtdir = mdt_object_find_lock(info, rr->rr_fid1, lh_tgtdir,
                                       MDS_INODELOCK_UPDATE,
                                       MDT_OBJ_MUST_EXIST);
        if (IS_ERR(mtgtdir))
                RETURN(PTR_ERR(mtgtdir));

        /* step 2: find & lock the target object if exists. */
        mdt_set_capainfo(info, 0, rr->rr_fid1, BYPASS_CAPA);
        lname = mdt_name(info->mti_env, (char *)rr->rr_tgt, rr->rr_tgtlen);
        rc = mdo_lookup(info->mti_env, mdt_object_child(mtgtdir),
                        lname, tgt_fid, &info->mti_spec);
        if (rc != 0 && rc != -ENOENT) {
                GOTO(out_unlock_tgtdir, rc);
        } else if (rc == 0) {
                /*
                 * In case of replay that name can be already inserted, check
                 * that and do nothing if so.
                 */
                if (lu_fid_eq(tgt_fid, rr->rr_fid2))
                        GOTO(out_unlock_tgtdir, rc);

                lh_tgt = &info->mti_lh[MDT_LH_CHILD];
                mdt_lock_reg_init(lh_tgt, LCK_EX);

                mtgt = mdt_object_find_lock(info, tgt_fid, lh_tgt,
                                            MDS_INODELOCK_LOOKUP,
                                            MDT_OBJ_MUST_EXIST);
                if (IS_ERR(mtgt))
                        GOTO(out_unlock_tgtdir, rc = PTR_ERR(mtgt));

                mdt_reint_init_ma(info, ma);
                if (!ma->ma_lmm || !ma->ma_cookie)
                        GOTO(out_unlock_tgt, rc = -EINVAL);

                rc = mdo_rename_tgt(info->mti_env, mdt_object_child(mtgtdir),
                                    mdt_object_child(mtgt), rr->rr_fid2,
                                    lname, ma);
        } else /* -ENOENT */ {
                rc = mdo_name_insert(info->mti_env, mdt_object_child(mtgtdir),
                                     lname, rr->rr_fid2, ma);
        }

        /* handle last link of tgt object */
        if (rc == 0 && mtgt)
                mdt_handle_last_unlink(info, mtgt, ma);

        EXIT;
out_unlock_tgt:
        if (mtgt)
                mdt_object_unlock_put(info, mtgt, lh_tgt, rc);
out_unlock_tgtdir:
        mdt_object_unlock_put(info, mtgtdir, lh_tgtdir, rc);
        return rc;
}