Exemplo n.º 1
0
Arquivo: file.c Projeto: DCteam/lustre
void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1,
                         struct inode *i2, const char *name, int namelen,
                         int mode, __u32 opc)
{
        LASSERT(i1 != NULL || i2 != NULL);
        LASSERT(op_data);

        if (i1) {
                ll_i2gids(op_data->op_suppgids, i1, i2);
                op_data->op_fid1 = *ll_inode2fid(i1);
        }else {
                ll_i2gids(op_data->op_suppgids, i2, i1);
                op_data->op_fid1 = *ll_inode2fid(i2);
        }

        if (i2)
                op_data->op_fid2 = *ll_inode2fid(i2);
        else
                fid_zero(&op_data->op_fid2);

        op_data->op_opc = opc;
        op_data->op_name = name;
        op_data->op_mode = mode;
        op_data->op_namelen = namelen;
        op_data->op_mod_time = CFS_CURRENT_TIME;
        op_data->op_data = NULL;
}
Exemplo n.º 2
0
/**
 * \a connectable - is nfsd will connect himself or this should be done
 *		  at lustre
 *
 * The return value is file handle type:
 * 1 -- contains child file handle;
 * 2 -- contains child file handle and parent file handle;
 * 255 -- error.
 */
static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
			struct inode *parent)
{
	struct lustre_nfs_fid *nfs_fid = (void *)fh;

	CDEBUG(D_INFO, "encoding for (%lu,"DFID") maxlen=%d minlen=%d\n",
	      inode->i_ino, PFID(ll_inode2fid(inode)), *plen,
	      (int)sizeof(struct lustre_nfs_fid));

	if (*plen < sizeof(struct lustre_nfs_fid) / 4)
		return 255;

	nfs_fid->lnf_child = *ll_inode2fid(inode);
	nfs_fid->lnf_parent = *ll_inode2fid(parent);
	*plen = sizeof(struct lustre_nfs_fid) / 4;

	return LUSTRE_NFS_FID;
}
Exemplo n.º 3
0
int ll_setxattr(struct dentry *dentry, const char *name,
                const void *value, size_t size, int flags)
{
        struct inode *inode = dentry->d_inode;

        LASSERT(inode);
        LASSERT(name);

	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
	       PFID(ll_inode2fid(inode)), inode, name);

        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_SETXATTR, 1);

        if ((strncmp(name, XATTR_TRUSTED_PREFIX,
                     sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
             strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
            (strncmp(name, XATTR_LUSTRE_PREFIX,
                     sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
             strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
                struct lov_user_md *lump = (struct lov_user_md *)value;
                int rc = 0;

                /* Attributes that are saved via getxattr will always have
                 * the stripe_offset as 0.  Instead, the MDS should be
                 * allowed to pick the starting OST index.   b=17846 */
                if (lump != NULL && lump->lmm_stripe_offset == 0)
                        lump->lmm_stripe_offset = -1;

		if (lump != NULL && S_ISREG(inode->i_mode)) {
			struct file	f;
			__u64		it_flags = FMODE_WRITE;
			int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
				sizeof(*lump) : sizeof(struct lov_user_md_v3);

			f.f_dentry = dentry;
			rc = ll_lov_setstripe_ea_info(inode, &f, it_flags, lump,
						      lum_size);
			/* b10667: rc always be 0 here for now */
			rc = 0;
                } else if (S_ISDIR(inode->i_mode)) {
                        rc = ll_dir_setstripe(inode, lump, 0);
                }

                return rc;

        } else if (strcmp(name, XATTR_NAME_LMA) == 0 ||
                   strcmp(name, XATTR_NAME_LINK) == 0)
                return 0;

        return ll_setxattr_common(inode, name, value, size, flags,
                                  OBD_MD_FLXATTR);
}
Exemplo n.º 4
0
int ll_removexattr(struct dentry *dentry, const char *name)
{
        struct inode *inode = dentry->d_inode;

        LASSERT(inode);
        LASSERT(name);

	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
	       PFID(ll_inode2fid(inode)), inode, name);

        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_REMOVEXATTR, 1);
        return ll_setxattr_common(inode, name, NULL, 0, 0,
                                  OBD_MD_FLXATTRRM);
}
Exemplo n.º 5
0
static int ll_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	int count = 0;
	bool printed = false;
	bool retry;
	int result;

	ll_stats_ops_tally(ll_i2sbi(file_inode(vma->vm_file)),
			   LPROC_LL_MKWRITE, 1);

	file_update_time(vma->vm_file);
        do {
                retry = false;
                result = ll_page_mkwrite0(vma, vmf->page, &retry);

                if (!printed && ++count > 16) {
			const struct dentry *de = file_dentry(vma->vm_file);

			CWARN("app(%s): the page %lu of file "DFID" is under"
			      " heavy contention\n",
			      current->comm, vmf->pgoff,
			      PFID(ll_inode2fid(de->d_inode)));
                        printed = true;
                }
        } while (retry);

        switch(result) {
        case 0:
                LASSERT(PageLocked(vmf->page));
                result = VM_FAULT_LOCKED;
                break;
        case -ENODATA:
        case -EFAULT:
                result = VM_FAULT_NOPAGE;
                break;
        case -ENOMEM:
                result = VM_FAULT_OOM;
                break;
        case -EAGAIN:
                result = VM_FAULT_RETRY;
                break;
        default:
                result = VM_FAULT_SIGBUS;
                break;
        }

        return result;
}
Exemplo n.º 6
0
/* find any ldlm lock of the inode in mdc and lov
 * return 0    not find
 *        1    find one
 *      < 0    error */
static int find_cbdata(struct inode *inode)
{
        struct ll_inode_info *lli = ll_i2info(inode);
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        int rc = 0;
        ENTRY;

        LASSERT(inode);
        rc = md_find_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
                            return_if_equal, NULL);
        if (rc != 0)
                 RETURN(rc);

        if (lli->lli_smd)
                rc = obd_find_cbdata(sbi->ll_dt_exp, lli->lli_smd,
                                     return_if_equal, NULL);

        RETURN(rc);
}
Exemplo n.º 7
0
Arquivo: xattr.c Projeto: IDM350/linux
static
int ll_getxattr_common(struct inode *inode, const char *name,
		       void *buffer, size_t size, __u64 valid)
{
	struct ll_sb_info *sbi = ll_i2sbi(inode);
	struct ptlrpc_request *req = NULL;
	struct mdt_body *body;
	int xattr_type, rc;
	void *xdata;
	struct obd_capa *oc;
	struct rmtacl_ctl_entry *rce = NULL;

	CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n",
	       inode->i_ino, inode->i_generation, inode);

	/* listxattr have slightly different behavior from of ext3:
	 * without 'user_xattr' ext3 will list all xattr names but
	 * filtered out "^user..*"; we list them all for simplicity.
	 */
	if (!name) {
		xattr_type = XATTR_OTHER_T;
		goto do_getxattr;
	}

	xattr_type = get_xattr_type(name);
	rc = xattr_type_filter(sbi, xattr_type);
	if (rc)
		return rc;

	/* b15587: ignore security.capability xattr for now */
	if ((xattr_type == XATTR_SECURITY_T &&
	    strcmp(name, "security.capability") == 0))
		return -ENODATA;

	/* LU-549:  Disable security.selinux when selinux is disabled */
	if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
	    strcmp(name, "security.selinux") == 0)
		return -EOPNOTSUPP;

#ifdef CONFIG_FS_POSIX_ACL
	if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
	    (xattr_type == XATTR_ACL_ACCESS_T ||
	    xattr_type == XATTR_ACL_DEFAULT_T)) {
		rce = rct_search(&sbi->ll_rct, current_pid());
		if (rce == NULL ||
		    (rce->rce_ops != RMT_LSETFACL &&
		    rce->rce_ops != RMT_LGETFACL &&
		    rce->rce_ops != RMT_RSETFACL &&
		    rce->rce_ops != RMT_RGETFACL))
			return -EOPNOTSUPP;
	}

	/* posix acl is under protection of LOOKUP lock. when calling to this,
	 * we just have path resolution to the target inode, so we have great
	 * chance that cached ACL is uptodate.
	 */
	if (xattr_type == XATTR_ACL_ACCESS_T &&
	    !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) {
		struct ll_inode_info *lli = ll_i2info(inode);
		struct posix_acl *acl;

		spin_lock(&lli->lli_lock);
		acl = posix_acl_dup(lli->lli_posix_acl);
		spin_unlock(&lli->lli_lock);

		if (!acl)
			return -ENODATA;

		rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
		posix_acl_release(acl);
		return rc;
	}
	if (xattr_type == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode))
		return -ENODATA;
#endif

do_getxattr:
	if (sbi->ll_xattr_cache_enabled && (rce == NULL ||
					    rce->rce_ops == RMT_LGETFACL ||
					    rce->rce_ops == RMT_LSETFACL)) {
		rc = ll_xattr_cache_get(inode, name, buffer, size, valid);
		if (rc < 0)
			GOTO(out_xattr, rc);
	} else {
		oc = ll_mdscapa_get(inode);
		rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
				valid | (rce ? rce_ops2valid(rce->rce_ops) : 0),
				name, NULL, 0, size, 0, &req);
		capa_put(oc);

		if (rc < 0)
			GOTO(out_xattr, rc);

		body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
		LASSERT(body);

		/* only detect the xattr size */
		if (size == 0)
			GOTO(out, rc = body->eadatasize);

		if (size < body->eadatasize) {
			CERROR("server bug: replied size %u > %u\n",
				body->eadatasize, (int)size);
			GOTO(out, rc = -ERANGE);
		}

		if (body->eadatasize == 0)
			GOTO(out, rc = -ENODATA);

		/* do not need swab xattr data */
		xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA,
							body->eadatasize);
		if (!xdata)
			GOTO(out, rc = -EFAULT);

		memcpy(buffer, xdata, body->eadatasize);
		rc = body->eadatasize;
	}

#ifdef CONFIG_FS_POSIX_ACL
	if (rce && rce->rce_ops == RMT_LSETFACL) {
		ext_acl_xattr_header *acl;

		acl = lustre_posix_acl_xattr_2ext(
					(posix_acl_xattr_header *)buffer, rc);
		if (IS_ERR(acl))
			GOTO(out, rc = PTR_ERR(acl));

		rc = ee_add(&sbi->ll_et, current_pid(), ll_inode2fid(inode),
			    xattr_type, acl);
		if (unlikely(rc < 0)) {
			lustre_ext_acl_xattr_free(acl);
			GOTO(out, rc);
		}
	}
#endif

out_xattr:
	if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
		LCONSOLE_INFO(
			"%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n",
			ll_get_fsname(inode->i_sb, NULL, 0), rc);
		sbi->ll_flags &= ~LL_SBI_USER_XATTR;
	}
out:
	ptlrpc_req_finished(req);
	return rc;
}
Exemplo n.º 8
0
Arquivo: xattr.c Projeto: IDM350/linux
static
int ll_setxattr_common(struct inode *inode, const char *name,
		       const void *value, size_t size,
		       int flags, __u64 valid)
{
	struct ll_sb_info *sbi = ll_i2sbi(inode);
	struct ptlrpc_request *req = NULL;
	int xattr_type, rc;
	struct obd_capa *oc;
	struct rmtacl_ctl_entry *rce = NULL;
#ifdef CONFIG_FS_POSIX_ACL
	posix_acl_xattr_header *new_value = NULL;
	ext_acl_xattr_header *acl = NULL;
#endif
	const char *pv = value;

	xattr_type = get_xattr_type(name);
	rc = xattr_type_filter(sbi, xattr_type);
	if (rc)
		return rc;

	/* b10667: ignore lustre special xattr for now */
	if ((xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0) ||
	    (xattr_type == XATTR_LUSTRE_T && strcmp(name, "lustre.lov") == 0))
		return 0;

	/* b15587: ignore security.capability xattr for now */
	if ((xattr_type == XATTR_SECURITY_T &&
	    strcmp(name, "security.capability") == 0))
		return 0;

	/* LU-549:  Disable security.selinux when selinux is disabled */
	if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() &&
	    strcmp(name, "security.selinux") == 0)
		return -EOPNOTSUPP;

#ifdef CONFIG_FS_POSIX_ACL
	if (sbi->ll_flags & LL_SBI_RMT_CLIENT &&
	    (xattr_type == XATTR_ACL_ACCESS_T ||
	    xattr_type == XATTR_ACL_DEFAULT_T)) {
		rce = rct_search(&sbi->ll_rct, current_pid());
		if (rce == NULL ||
		    (rce->rce_ops != RMT_LSETFACL &&
		    rce->rce_ops != RMT_RSETFACL))
			return -EOPNOTSUPP;

		if (rce->rce_ops == RMT_LSETFACL) {
			struct eacl_entry *ee;

			ee = et_search_del(&sbi->ll_et, current_pid(),
					   ll_inode2fid(inode), xattr_type);
			LASSERT(ee != NULL);
			if (valid & OBD_MD_FLXATTR) {
				acl = lustre_acl_xattr_merge2ext(
						(posix_acl_xattr_header *)value,
						size, ee->ee_acl);
				if (IS_ERR(acl)) {
					ee_free(ee);
					return PTR_ERR(acl);
				}
				size =  CFS_ACL_XATTR_SIZE(\
						le32_to_cpu(acl->a_count), \
						ext_acl_xattr);
				pv = (const char *)acl;
			}
			ee_free(ee);
		} else if (rce->rce_ops == RMT_RSETFACL) {
			size = lustre_posix_acl_xattr_filter(
						(posix_acl_xattr_header *)value,
						size, &new_value);
			if (unlikely(size < 0))
				return size;

			pv = (const char *)new_value;
		} else
			return -EOPNOTSUPP;

		valid |= rce_ops2valid(rce->rce_ops);
	}
#endif
	if (sbi->ll_xattr_cache_enabled &&
	    (rce == NULL || rce->rce_ops == RMT_LSETFACL)) {
		rc = ll_xattr_cache_update(inode, name, pv, size, valid, flags);
	} else {
		oc = ll_mdscapa_get(inode);
		rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc,
				valid, name, pv, size, 0, flags,
				ll_i2suppgid(inode), &req);
		capa_put(oc);
	}

#ifdef CONFIG_FS_POSIX_ACL
	if (new_value != NULL)
		lustre_posix_acl_xattr_free(new_value, size);
	if (acl != NULL)
		lustre_ext_acl_xattr_free(acl);
#endif
	if (rc) {
		if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) {
			LCONSOLE_INFO("Disabling user_xattr feature because "
				      "it is not supported on the server\n");
			sbi->ll_flags &= ~LL_SBI_USER_XATTR;
		}
		return rc;
	}

	ptlrpc_req_finished(req);
	return 0;
}
Exemplo n.º 9
0
int ll_revalidate_it(struct dentry *de, int lookup_flags,
                     struct lookup_intent *it)
{
        struct md_op_data *op_data;
        struct ptlrpc_request *req = NULL;
        struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
        struct obd_export *exp;
        struct inode *parent = de->d_parent->d_inode;
        int rc, first = 0;

        ENTRY;
        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
               LL_IT2STR(it));

        if (de->d_inode == NULL) {
                /* We can only use negative dentries if this is stat or lookup,
                   for opens and stuff we do need to query server. */
                /* If there is IT_CREAT in intent op set, then we must throw
                   away this negative dentry and actually do the request to
                   kernel to create whatever needs to be created (if possible)*/
                if (it && (it->it_op & IT_CREAT))
                        RETURN(0);

                if (de->d_flags & DCACHE_LUSTRE_INVALID)
                        RETURN(0);

                rc = ll_have_md_lock(parent, MDS_INODELOCK_UPDATE, LCK_MINMODE);
                GOTO(out_sa, rc);
        }

        /* Never execute intents for mount points.
         * Attributes will be fixed up in ll_inode_revalidate_it */
        if (d_mountpoint(de))
                GOTO(out_sa, rc = 1);

        /* need to get attributes in case root got changed from other client */
        if (de == de->d_sb->s_root) {
                rc = __ll_inode_revalidate_it(de, it, MDS_INODELOCK_LOOKUP);
                if (rc == 0)
                        rc = 1;
                GOTO(out_sa, rc);
        }

        exp = ll_i2mdexp(de->d_inode);

        OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
        ll_frob_intent(&it, &lookup_it);
        LASSERT(it);

        if (it->it_op == IT_LOOKUP && !(de->d_flags & DCACHE_LUSTRE_INVALID))
                GOTO(out_sa, rc = 1);

        op_data = ll_prep_md_op_data(NULL, parent, de->d_inode,
                                     de->d_name.name, de->d_name.len,
                                     0, LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));

        if ((it->it_op == IT_OPEN) && de->d_inode) {
                struct inode *inode = de->d_inode;
                struct ll_inode_info *lli = ll_i2info(inode);
                struct obd_client_handle **och_p;
                __u64 *och_usecount;

                /*
                 * We used to check for MDS_INODELOCK_OPEN here, but in fact
                 * just having LOOKUP lock is enough to justify inode is the
                 * same. And if inode is the same and we have suitable
                 * openhandle, then there is no point in doing another OPEN RPC
                 * just to throw away newly received openhandle.  There are no
                 * security implications too, if file owner or access mode is
                 * change, LOOKUP lock is revoked.
                 */


                if (it->it_flags & FMODE_WRITE) {
                        och_p = &lli->lli_mds_write_och;
                        och_usecount = &lli->lli_open_fd_write_count;
                } else if (it->it_flags & FMODE_EXEC) {
                        och_p = &lli->lli_mds_exec_och;
                        och_usecount = &lli->lli_open_fd_exec_count;
                } else {
                        och_p = &lli->lli_mds_read_och;
                        och_usecount = &lli->lli_open_fd_read_count;
                }
                /* Check for the proper lock. */
                if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP, LCK_MINMODE))
                        goto do_lock;
                cfs_down(&lli->lli_och_sem);
                if (*och_p) { /* Everything is open already, do nothing */
                        /*(*och_usecount)++;  Do not let them steal our open
                          handle from under us */
                        /* XXX The code above was my original idea, but in case
                           we have the handle, but we cannot use it due to later
                           checks (e.g. O_CREAT|O_EXCL flags set), nobody
                           would decrement counter increased here. So we just
                           hope the lock won't be invalidated in between. But
                           if it would be, we'll reopen the open request to
                           MDS later during file open path */
                        cfs_up(&lli->lli_och_sem);
                        ll_finish_md_op_data(op_data);
                        RETURN(1);
                } else {
                        cfs_up(&lli->lli_och_sem);
                }
        }

        if (it->it_op == IT_GETATTR) {
                first = ll_statahead_enter(parent, &de, 0);
                if (first == 1) {
                        ll_statahead_exit(parent, de, 1);
                        ll_finish_md_op_data(op_data);
                        GOTO(out, rc = 1);
                }
        }

do_lock:
        it->it_create_mode &= ~current->fs->umask;
        it->it_create_mode |= M_CHECK_STALE;
        rc = md_intent_lock(exp, op_data, NULL, 0, it,
                            lookup_flags,
                            &req, ll_md_blocking_ast, 0);
        it->it_create_mode &= ~M_CHECK_STALE;
        ll_finish_md_op_data(op_data);
        if (it->it_op == IT_GETATTR && !first)
                /* If there are too many locks on client-side, then some
                 * locks taken by statahead maybe dropped automatically
                 * before the real "revalidate" using them. */
                ll_statahead_exit(parent, de, req == NULL ? rc : 0);
        else if (first == -EEXIST)
                ll_statahead_mark(parent, de);

        /* If req is NULL, then md_intent_lock only tried to do a lock match;
         * if all was well, it will return 1 if it found locks, 0 otherwise. */
        if (req == NULL && rc >= 0) {
                if (!rc)
                        goto do_lookup;
                GOTO(out, rc);
        }

        if (rc < 0) {
                if (rc != -ESTALE) {
                        CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
                               "%d\n", rc, it->d.lustre.it_status);
                }
                GOTO(out, rc = 0);
        }

revalidate_finish:
        rc = ll_revalidate_it_finish(req, it, de);
        if (rc != 0) {
                if (rc != -ESTALE && rc != -ENOENT)
                        ll_intent_release(it);
                GOTO(out, rc = 0);
        }

        if ((it->it_op & IT_OPEN) && de->d_inode &&
            !S_ISREG(de->d_inode->i_mode) &&
            !S_ISDIR(de->d_inode->i_mode)) {
                ll_release_openhandle(de, it);
        }
        rc = 1;

        /* unfortunately ll_intent_lock may cause a callback and revoke our
         * dentry */
        cfs_spin_lock(&ll_lookup_lock);
        spin_lock(&dcache_lock);
        lock_dentry(de);
        __d_drop(de);
        unlock_dentry(de);
        d_rehash_cond(de, 0);
        spin_unlock(&dcache_lock);
        cfs_spin_unlock(&ll_lookup_lock);

out:
        /* We do not free request as it may be reused during following lookup
         * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
         * be freed in ll_lookup_it or in ll_intent_release. But if
         * request was not completed, we need to free it. (bug 5154, 9903) */
        if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
                ptlrpc_req_finished(req);
        if (rc == 0) {
                ll_unhash_aliases(de->d_inode);
                /* done in ll_unhash_aliases()
                   dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
        } else {
                CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
                       "inode %p refc %d\n", de->d_name.len,
                       de->d_name.name, de, de->d_parent, de->d_inode,
                       atomic_read(&de->d_count));
                if (first != 1) {
                        if (de->d_flags & DCACHE_LUSTRE_INVALID) {
                                lock_dentry(de);
                                de->d_flags &= ~DCACHE_LUSTRE_INVALID;
                                unlock_dentry(de);
                        }
                        ll_lookup_finish_locks(it, de);
                }
        }
        RETURN(rc);

        /*
         * This part is here to combat evil-evil race in real_lookup on 2.6
         * kernels.  The race details are: We enter do_lookup() looking for some
         * name, there is nothing in dcache for this name yet and d_lookup()
         * returns NULL.  We proceed to real_lookup(), and while we do this,
         * another process does open on the same file we looking up (most simple
         * reproducer), open succeeds and the dentry is added. Now back to
         * us. In real_lookup() we do d_lookup() again and suddenly find the
         * dentry, so we call d_revalidate on it, but there is no lock, so
         * without this code we would return 0, but unpatched real_lookup just
         * returns -ENOENT in such a case instead of retrying the lookup. Once
         * this is dealt with in real_lookup(), all of this ugly mess can go and
         * we can just check locks in ->d_revalidate without doing any RPCs
         * ever.
         */
do_lookup:
        if (it != &lookup_it) {
                /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */
                if (it->it_op == IT_GETATTR)
                        lookup_it.it_op = IT_GETATTR;
                ll_lookup_finish_locks(it, de);
                it = &lookup_it;
        }

        /* Do real lookup here. */
        op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name,
                                     de->d_name.len, 0, (it->it_op & IT_CREAT ?
                                                         LUSTRE_OPC_CREATE :
                                                         LUSTRE_OPC_ANY), NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));

        rc = md_intent_lock(exp, op_data, NULL, 0,  it, 0, &req,
                            ll_md_blocking_ast, 0);
        if (rc >= 0) {
                struct mdt_body *mdt_body;
                struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0};
                mdt_body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);

                if (de->d_inode)
                        fid = *ll_inode2fid(de->d_inode);

                /* see if we got same inode, if not - return error */
                if (lu_fid_eq(&fid, &mdt_body->fid1)) {
                        ll_finish_md_op_data(op_data);
                        op_data = NULL;
                        goto revalidate_finish;
                }
                ll_intent_release(it);
        }
        ll_finish_md_op_data(op_data);
        GOTO(out, rc = 0);

out_sa:
        /*
         * For rc == 1 case, should not return directly to prevent losing
         * statahead windows; for rc == 0 case, the "lookup" will be done later.
         */
        if (it && it->it_op == IT_GETATTR && rc == 1) {
                first = ll_statahead_enter(parent, &de, 0);
                if (first >= 0)
                        ll_statahead_exit(parent, de, 1);
                else if (first == -EEXIST)
                        ll_statahead_mark(parent, de);
        }

        return rc;
}

#if 0
static void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
{
        struct inode *inode= de->d_inode;
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct ll_dentry_data *ldd = ll_d2d(de);
        struct obd_client_handle *handle;
        struct obd_capa *oc;
        int rc = 0;
        ENTRY;
        LASSERT(ldd);

        cfs_lock_kernel();
        /* Strictly speaking this introduces an additional race: the
         * increments should wait until the rpc has returned.
         * However, given that at present the function is void, this
         * issue is moot. */
        if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
                cfs_unlock_kernel();
                EXIT;
                return;
        }

        if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
                cfs_unlock_kernel();
                EXIT;
                return;
        }
        cfs_unlock_kernel();

        handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
        oc = ll_mdscapa_get(inode);
        rc = obd_pin(sbi->ll_md_exp, ll_inode2fid(inode), oc, handle, flag);
        capa_put(oc);
        if (rc) {
                cfs_lock_kernel();
                memset(handle, 0, sizeof(*handle));
                if (flag == 0)
                        ldd->lld_cwd_count--;
                else
                        ldd->lld_mnt_count--;
                cfs_unlock_kernel();
        }

        EXIT;
        return;
}

static void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
{
        struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
        struct ll_dentry_data *ldd = ll_d2d(de);
        struct obd_client_handle handle;
        int count, rc = 0;
        ENTRY;
        LASSERT(ldd);

        cfs_lock_kernel();
        /* Strictly speaking this introduces an additional race: the
         * increments should wait until the rpc has returned.
         * However, given that at present the function is void, this
         * issue is moot. */
        handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
        if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
                /* the "pin" failed */
                cfs_unlock_kernel();
                EXIT;
                return;
        }

        if (flag)
                count = --ldd->lld_mnt_count;
        else
                count = --ldd->lld_cwd_count;
        cfs_unlock_kernel();

        if (count != 0) {
                EXIT;
                return;
        }

        rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
        EXIT;
        return;
}
#endif

#ifdef HAVE_VFS_INTENT_PATCHES
int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
{
        int rc;
        ENTRY;

        if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
                rc = ll_revalidate_it(dentry, nd->flags, &nd->intent);
        else
                rc = ll_revalidate_it(dentry, 0, NULL);

        RETURN(rc);
}
Exemplo n.º 10
0
Arquivo: rw26.c Projeto: rread/lustre
static ssize_t
ll_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
             loff_t file_offset, unsigned long nr_segs)
{
    struct ll_cl_context *lcc;
    const struct lu_env *env;
    struct cl_io *io;
    struct file *file = iocb->ki_filp;
    struct inode *inode = file->f_mapping->host;
    ssize_t count = iov_length(iov, nr_segs);
    ssize_t tot_bytes = 0, result = 0;
    unsigned long seg = 0;
    size_t size = MAX_DIO_SIZE;
    ENTRY;

    /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
    if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
        RETURN(-EINVAL);

    CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
           "offset=%lld=%llx, pages %zd (max %lu)\n",
           PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
           file_offset, file_offset, count >> PAGE_SHIFT,
           MAX_DIO_SIZE >> PAGE_SHIFT);

    /* Check that all user buffers are aligned as well */
    for (seg = 0; seg < nr_segs; seg++) {
        if (((unsigned long)iov[seg].iov_base & ~PAGE_MASK) ||
                (iov[seg].iov_len & ~PAGE_MASK))
            RETURN(-EINVAL);
    }

    lcc = ll_cl_find(file);
    if (lcc == NULL)
        RETURN(-EIO);

    env = lcc->lcc_env;
    LASSERT(!IS_ERR(env));
    io = lcc->lcc_io;
    LASSERT(io != NULL);

    for (seg = 0; seg < nr_segs; seg++) {
        size_t iov_left = iov[seg].iov_len;
        unsigned long user_addr = (unsigned long)iov[seg].iov_base;

        if (rw == READ) {
            if (file_offset >= i_size_read(inode))
                break;
            if (file_offset + iov_left > i_size_read(inode))
                iov_left = i_size_read(inode) - file_offset;
        }

        while (iov_left > 0) {
            struct page **pages;
            int page_count, max_pages = 0;
            size_t bytes;

            bytes = min(size, iov_left);
            page_count = ll_get_user_pages(rw, user_addr, bytes,
                                           &pages, &max_pages);
            if (likely(page_count > 0)) {
                if (unlikely(page_count <  max_pages))
                    bytes = page_count << PAGE_SHIFT;
                result = ll_direct_IO_seg(env, io, rw, inode,
                                          bytes, file_offset,
                                          pages, page_count);
                ll_free_user_pages(pages, max_pages, rw==READ);
            } else if (page_count == 0) {
                GOTO(out, result = -EFAULT);
            } else {
                result = page_count;
            }
            if (unlikely(result <= 0)) {
                /* If we can't allocate a large enough buffer
                 * for the request, shrink it to a smaller
                 * PAGE_SIZE multiple and try again.
                 * We should always be able to kmalloc for a
                 * page worth of page pointers = 4MB on i386. */
                if (result == -ENOMEM &&
                        size > (PAGE_SIZE / sizeof(*pages)) *
                        PAGE_SIZE) {
                    size = ((((size / 2) - 1) |
                             ~PAGE_MASK) + 1) &
                           PAGE_MASK;
                    CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                           size);
                    continue;
                }

                GOTO(out, result);
            }

            tot_bytes += result;
            file_offset += result;
            iov_left -= result;
            user_addr += result;
        }
    }
out:
    if (tot_bytes > 0) {
        struct vvp_io *vio = vvp_env_io(env);

        /* no commit async for direct IO */
        vio->u.write.vui_written += tot_bytes;
    }

    RETURN(tot_bytes ? tot_bytes : result);
}
Exemplo n.º 11
0
Arquivo: rw26.c Projeto: rread/lustre
static ssize_t
ll_direct_IO(
# ifndef HAVE_IOV_ITER_RW
    int rw,
# endif
    struct kiocb *iocb, struct iov_iter *iter,
    loff_t file_offset)
{
    struct ll_cl_context *lcc;
    const struct lu_env *env;
    struct cl_io *io;
    struct file *file = iocb->ki_filp;
    struct inode *inode = file->f_mapping->host;
    ssize_t count = iov_iter_count(iter);
    ssize_t tot_bytes = 0, result = 0;
    size_t size = MAX_DIO_SIZE;

    /* FIXME: io smaller than PAGE_SIZE is broken on ia64 ??? */
    if ((file_offset & ~PAGE_MASK) || (count & ~PAGE_MASK))
        return -EINVAL;

    CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), size=%zd (max %lu), "
           "offset=%lld=%llx, pages %zd (max %lu)\n",
           PFID(ll_inode2fid(inode)), inode, count, MAX_DIO_SIZE,
           file_offset, file_offset, count >> PAGE_SHIFT,
           MAX_DIO_SIZE >> PAGE_SHIFT);

    /* Check that all user buffers are aligned as well */
    if (iov_iter_alignment(iter) & ~PAGE_MASK)
        return -EINVAL;

    lcc = ll_cl_find(file);
    if (lcc == NULL)
        RETURN(-EIO);

    env = lcc->lcc_env;
    LASSERT(!IS_ERR(env));
    io = lcc->lcc_io;
    LASSERT(io != NULL);

    /* 0. Need locking between buffered and direct access. and race with
     *    size changing by concurrent truncates and writes.
     * 1. Need inode mutex to operate transient pages.
     */
    if (iov_iter_rw(iter) == READ)
        inode_lock(inode);

    while (iov_iter_count(iter)) {
        struct page **pages;
        size_t offs;

        count = min_t(size_t, iov_iter_count(iter), size);
        if (iov_iter_rw(iter) == READ) {
            if (file_offset >= i_size_read(inode))
                break;

            if (file_offset + count > i_size_read(inode))
                count = i_size_read(inode) - file_offset;
        }

        result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
        if (likely(result > 0)) {
            int n = DIV_ROUND_UP(result + offs, PAGE_SIZE);

            result = ll_direct_IO_seg(env, io, iov_iter_rw(iter),
                                      inode, result, file_offset,
                                      pages, n);
            ll_free_user_pages(pages, n,
                               iov_iter_rw(iter) == READ);

        }
        if (unlikely(result <= 0)) {
            /* If we can't allocate a large enough buffer
             * for the request, shrink it to a smaller
             * PAGE_SIZE multiple and try again.
             * We should always be able to kmalloc for a
             * page worth of page pointers = 4MB on i386. */
            if (result == -ENOMEM &&
                    size > (PAGE_SIZE / sizeof(*pages)) *
                    PAGE_SIZE) {
                size = ((((size / 2) - 1) |
                         ~PAGE_MASK) + 1) & PAGE_MASK;
                CDEBUG(D_VFSTRACE, "DIO size now %zu\n",
                       size);
                continue;
            }

            GOTO(out, result);
        }

        iov_iter_advance(iter, result);
        tot_bytes += result;
        file_offset += result;
    }
out:
    if (iov_iter_rw(iter) == READ)
        inode_unlock(inode);

    if (tot_bytes > 0) {
        struct vvp_io *vio = vvp_env_io(env);

        /* no commit async for direct IO */
        vio->u.write.vui_written += tot_bytes;
    }

    return tot_bytes ? : result;
}
Exemplo n.º 12
0
int lustre_check_remote_perm(struct inode *inode, int mask)
{
	struct ll_inode_info *lli = ll_i2info(inode);
	struct ll_sb_info *sbi = ll_i2sbi(inode);
	struct ptlrpc_request *req = NULL;
	struct mdt_remote_perm *perm;
	struct obd_capa *oc;
	unsigned long save;
	int i = 0, rc;

	do {
		save = lli->lli_rmtperm_time;
		rc = do_check_remote_perm(lli, mask);
		if (!rc || (rc != -ENOENT && i))
			break;

		might_sleep();

		mutex_lock(&lli->lli_rmtperm_mutex);
		/* check again */
		if (save != lli->lli_rmtperm_time) {
			rc = do_check_remote_perm(lli, mask);
			if (!rc || (rc != -ENOENT && i)) {
				mutex_unlock(&lli->lli_rmtperm_mutex);
				break;
			}
		}

		if (i++ > 5) {
			CERROR("check remote perm falls in dead loop!\n");
			LBUG();
		}

		oc = ll_mdscapa_get(inode);
		rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), oc,
					ll_i2suppgid(inode), &req);
		capa_put(oc);
		if (rc) {
			mutex_unlock(&lli->lli_rmtperm_mutex);
			break;
		}

		perm = req_capsule_server_swab_get(&req->rq_pill, &RMF_ACL,
						   lustre_swab_mdt_remote_perm);
		if (unlikely(perm == NULL)) {
			mutex_unlock(&lli->lli_rmtperm_mutex);
			rc = -EPROTO;
			break;
		}

		rc = ll_update_remote_perm(inode, perm);
		mutex_unlock(&lli->lli_rmtperm_mutex);
		if (rc == -ENOMEM)
			break;

		ptlrpc_req_finished(req);
		req = NULL;
	} while (1);
	ptlrpc_req_finished(req);
	return rc;
}
Exemplo n.º 13
0
ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
        struct inode *inode = dentry->d_inode;
        int rc = 0, rc2 = 0;
        struct lov_mds_md *lmm = NULL;
        struct ptlrpc_request *request = NULL;
        int lmmsize;

        LASSERT(inode);

	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
	       PFID(ll_inode2fid(inode)), inode);

        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_LISTXATTR, 1);

        rc = ll_getxattr_common(inode, NULL, buffer, size, OBD_MD_FLXATTRLS);
        if (rc < 0)
                GOTO(out, rc);

	if (buffer != NULL) {
		struct ll_sb_info *sbi = ll_i2sbi(inode);
		char *xattr_name = buffer;
		int xlen, rem = rc;

		while (rem > 0) {
			xlen = strnlen(xattr_name, rem - 1) + 1;
			rem -= xlen;
			if (xattr_type_filter(sbi,
					get_xattr_type(xattr_name)) == 0) {
				/* skip OK xattr type
				 * leave it in buffer
				 */
				xattr_name += xlen;
				continue;
			}
			/* move up remaining xattrs in buffer
			 * removing the xattr that is not OK
			 */
			memmove(xattr_name, xattr_name + xlen, rem);
			rc -= xlen;
		}
	}
	if (S_ISREG(inode->i_mode)) {
		if (!ll_i2info(inode)->lli_has_smd)
                        rc2 = -1;
        } else if (S_ISDIR(inode->i_mode)) {
                rc2 = ll_dir_getstripe(inode, &lmm, &lmmsize, &request);
        }

        if (rc2 < 0) {
                GOTO(out, rc2 = 0);
        } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) {
                const int prefix_len = sizeof(XATTR_LUSTRE_PREFIX) - 1;
                const size_t name_len   = sizeof("lov") - 1;
                const size_t total_len  = prefix_len + name_len + 1;

		if (((rc + total_len) > size) && (buffer != NULL)) {
			ptlrpc_req_finished(request);
			return -ERANGE;
		}

		if (buffer != NULL) {
			buffer += rc;
			memcpy(buffer, XATTR_LUSTRE_PREFIX, prefix_len);
			memcpy(buffer + prefix_len, "lov", name_len);
			buffer[prefix_len + name_len] = '\0';
		}
		rc2 = total_len;
	}
out:
        ptlrpc_req_finished(request);
        rc = rc + rc2;

        return rc;
}
Exemplo n.º 14
0
ssize_t ll_getxattr(struct dentry *dentry, const char *name,
                    void *buffer, size_t size)
{
        struct inode *inode = dentry->d_inode;

        LASSERT(inode);
        LASSERT(name);

	CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), xattr %s\n",
	       PFID(ll_inode2fid(inode)), inode, name);

        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_GETXATTR, 1);

        if ((strncmp(name, XATTR_TRUSTED_PREFIX,
                     sizeof(XATTR_TRUSTED_PREFIX) - 1) == 0 &&
             strcmp(name + sizeof(XATTR_TRUSTED_PREFIX) - 1, "lov") == 0) ||
            (strncmp(name, XATTR_LUSTRE_PREFIX,
                     sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
             strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
		struct lov_stripe_md *lsm;
                struct lov_user_md *lump;
                struct lov_mds_md *lmm = NULL;
                struct ptlrpc_request *request = NULL;
                int rc = 0, lmmsize = 0;

                if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
                        return -ENODATA;

                if (size == 0 && S_ISDIR(inode->i_mode)) {
                        /* XXX directory EA is fix for now, optimize to save
                         * RPC transfer */
                        GOTO(out, rc = sizeof(struct lov_user_md));
                }

		lsm = ccc_inode_lsm_get(inode);
		if (lsm == NULL) {
                        if (S_ISDIR(inode->i_mode)) {
                                rc = ll_dir_getstripe(inode, &lmm,
                                                      &lmmsize, &request);
                        } else {
                                rc = -ENODATA;
                        }
                } else {
                        /* LSM is present already after lookup/getattr call.
                         * we need to grab layout lock once it is implemented */
			rc = obd_packmd(ll_i2dtexp(inode), &lmm, lsm);
			lmmsize = rc;
		}
		ccc_inode_lsm_put(inode, lsm);

                if (rc < 0)
                       GOTO(out, rc);

                if (size == 0) {
                        /* used to call ll_get_max_mdsize() forward to get
                         * the maximum buffer size, while some apps (such as
                         * rsync 3.0.x) care much about the exact xattr value
                         * size */
                        rc = lmmsize;
                        GOTO(out, rc);
                }

                if (size < lmmsize) {
                        CERROR("server bug: replied size %d > %d for %s (%s)\n",
                               lmmsize, (int)size, dentry->d_name.name, name);
                        GOTO(out, rc = -ERANGE);
                }

                lump = (struct lov_user_md *)buffer;
                memcpy(lump, lmm, lmmsize);
		/* do not return layout gen for getxattr otherwise it would
		 * confuse tar --xattr by recognizing layout gen as stripe
		 * offset when the file is restored. See LU-2809. */
		lump->lmm_layout_gen = 0;

                rc = lmmsize;
out:
                if (request)
                        ptlrpc_req_finished(request);
                else if (lmm)
                        obd_free_diskmd(ll_i2dtexp(inode), &lmm);
                return(rc);
        }

        return ll_getxattr_common(inode, name, buffer, size, OBD_MD_FLXATTR);
}
Exemplo n.º 15
0
Arquivo: dcache.c Projeto: LLNL/lustre
int ll_revalidate_it(struct dentry *de, int lookup_flags,
                     struct lookup_intent *it)
{
        struct md_op_data *op_data;
        struct ptlrpc_request *req = NULL;
        struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
        struct obd_export *exp;
        struct inode *parent = de->d_parent->d_inode;
        int rc;

        ENTRY;
        CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
               LL_IT2STR(it));

        if (de->d_inode == NULL) {
                __u64 ibits;

                /* We can only use negative dentries if this is stat or lookup,
                   for opens and stuff we do need to query server. */
                /* If there is IT_CREAT in intent op set, then we must throw
                   away this negative dentry and actually do the request to
                   kernel to create whatever needs to be created (if possible)*/
                if (it && (it->it_op & IT_CREAT))
                        RETURN(0);

                if (de->d_flags & DCACHE_LUSTRE_INVALID)
                        RETURN(0);

                ibits = MDS_INODELOCK_UPDATE;
                rc = ll_have_md_lock(parent, &ibits, LCK_MINMODE);
                GOTO(out_sa, rc);
        }

        /* Never execute intents for mount points.
         * Attributes will be fixed up in ll_inode_revalidate_it */
        if (d_mountpoint(de))
                GOTO(out_sa, rc = 1);

        /* need to get attributes in case root got changed from other client */
        if (de == de->d_sb->s_root) {
                rc = __ll_inode_revalidate_it(de, it, MDS_INODELOCK_LOOKUP);
                if (rc == 0)
                        rc = 1;
                GOTO(out_sa, rc);
        }

        exp = ll_i2mdexp(de->d_inode);

        OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
        ll_frob_intent(&it, &lookup_it);
        LASSERT(it);

        if (it->it_op == IT_LOOKUP && !(de->d_flags & DCACHE_LUSTRE_INVALID))
                RETURN(1);

        if ((it->it_op == IT_OPEN) && de->d_inode) {
                struct inode *inode = de->d_inode;
                struct ll_inode_info *lli = ll_i2info(inode);
                struct obd_client_handle **och_p;
                __u64 *och_usecount;
                __u64 ibits;

                /*
                 * We used to check for MDS_INODELOCK_OPEN here, but in fact
                 * just having LOOKUP lock is enough to justify inode is the
                 * same. And if inode is the same and we have suitable
                 * openhandle, then there is no point in doing another OPEN RPC
                 * just to throw away newly received openhandle.  There are no
                 * security implications too, if file owner or access mode is
                 * change, LOOKUP lock is revoked.
                 */


                if (it->it_flags & FMODE_WRITE) {
                        och_p = &lli->lli_mds_write_och;
                        och_usecount = &lli->lli_open_fd_write_count;
                } else if (it->it_flags & FMODE_EXEC) {
                        och_p = &lli->lli_mds_exec_och;
                        och_usecount = &lli->lli_open_fd_exec_count;
                } else {
                        och_p = &lli->lli_mds_read_och;
                        och_usecount = &lli->lli_open_fd_read_count;
                }
                /* Check for the proper lock. */
                ibits = MDS_INODELOCK_LOOKUP;
                if (!ll_have_md_lock(inode, &ibits, LCK_MINMODE))
                        goto do_lock;
                cfs_mutex_lock(&lli->lli_och_mutex);
                if (*och_p) { /* Everything is open already, do nothing */
                        /*(*och_usecount)++;  Do not let them steal our open
                          handle from under us */
                        SET_BUT_UNUSED(och_usecount);
                        /* XXX The code above was my original idea, but in case
                           we have the handle, but we cannot use it due to later
                           checks (e.g. O_CREAT|O_EXCL flags set), nobody
                           would decrement counter increased here. So we just
                           hope the lock won't be invalidated in between. But
                           if it would be, we'll reopen the open request to
                           MDS later during file open path */
                        cfs_mutex_unlock(&lli->lli_och_mutex);
                        RETURN(1);
                } else {
                        cfs_mutex_unlock(&lli->lli_och_mutex);
                }
        }

        if (it->it_op == IT_GETATTR) {
                rc = ll_statahead_enter(parent, &de, 0);
                if (rc == 1)
                        goto mark;
        }

do_lock:
        op_data = ll_prep_md_op_data(NULL, parent, de->d_inode,
                                     de->d_name.name, de->d_name.len,
                                     0, LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));

        it->it_create_mode &= ~cfs_curproc_umask();
        it->it_create_mode |= M_CHECK_STALE;
        rc = md_intent_lock(exp, op_data, NULL, 0, it,
                            lookup_flags,
                            &req, ll_md_blocking_ast, 0);
        it->it_create_mode &= ~M_CHECK_STALE;
        ll_finish_md_op_data(op_data);

        /* If req is NULL, then md_intent_lock only tried to do a lock match;
         * if all was well, it will return 1 if it found locks, 0 otherwise. */
        if (req == NULL && rc >= 0) {
                if (!rc)
                        goto do_lookup;
                GOTO(out, rc);
        }

        if (rc < 0) {
                if (rc != -ESTALE) {
                        CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
                               "%d\n", rc, it->d.lustre.it_status);
                }
                GOTO(out, rc = 0);
        }

revalidate_finish:
        rc = ll_revalidate_it_finish(req, it, de);
        if (rc != 0) {
                if (rc != -ESTALE && rc != -ENOENT)
                        ll_intent_release(it);
                GOTO(out, rc = 0);
        }

        if ((it->it_op & IT_OPEN) && de->d_inode &&
            !S_ISREG(de->d_inode->i_mode) &&
            !S_ISDIR(de->d_inode->i_mode)) {
                ll_release_openhandle(de, it);
        }
        rc = 1;

        /* unfortunately ll_intent_lock may cause a callback and revoke our
         * dentry */
        ll_dentry_rehash(de, 0);

out:
        /* We do not free request as it may be reused during following lookup
         * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
         * be freed in ll_lookup_it or in ll_intent_release. But if
         * request was not completed, we need to free it. (bug 5154, 9903) */
        if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
                ptlrpc_req_finished(req);
        if (rc == 0) {
                ll_unhash_aliases(de->d_inode);
                /* done in ll_unhash_aliases()
                   dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
        } else {
                __u64 bits = 0;

                CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
                       "inode %p refc %d\n", de->d_name.len,
                       de->d_name.name, de, de->d_parent, de->d_inode,
                       atomic_read(&de->d_count));
                ll_set_lock_data(exp, de->d_inode, it, &bits);
                ll_dentry_reset_flags(de, bits);
                ll_lookup_finish_locks(it, de);
        }

mark:
        if (it != NULL && it->it_op == IT_GETATTR && rc > 0)
                ll_statahead_mark(parent, de);
        RETURN(rc);

        /*
         * This part is here to combat evil-evil race in real_lookup on 2.6
         * kernels.  The race details are: We enter do_lookup() looking for some
         * name, there is nothing in dcache for this name yet and d_lookup()
         * returns NULL.  We proceed to real_lookup(), and while we do this,
         * another process does open on the same file we looking up (most simple
         * reproducer), open succeeds and the dentry is added. Now back to
         * us. In real_lookup() we do d_lookup() again and suddenly find the
         * dentry, so we call d_revalidate on it, but there is no lock, so
         * without this code we would return 0, but unpatched real_lookup just
         * returns -ENOENT in such a case instead of retrying the lookup. Once
         * this is dealt with in real_lookup(), all of this ugly mess can go and
         * we can just check locks in ->d_revalidate without doing any RPCs
         * ever.
         */
do_lookup:
        if (it != &lookup_it) {
                /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */
                if (it->it_op == IT_GETATTR)
                        lookup_it.it_op = IT_GETATTR;
                ll_lookup_finish_locks(it, de);
                it = &lookup_it;
        }

        /* Do real lookup here. */
        op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name,
                                     de->d_name.len, 0, (it->it_op & IT_CREAT ?
                                                         LUSTRE_OPC_CREATE :
                                                         LUSTRE_OPC_ANY), NULL);
        if (IS_ERR(op_data))
                RETURN(PTR_ERR(op_data));

        rc = md_intent_lock(exp, op_data, NULL, 0,  it, 0, &req,
                            ll_md_blocking_ast, 0);
        if (rc >= 0) {
                struct mdt_body *mdt_body;
                struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0};
                mdt_body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);

                if (de->d_inode)
                        fid = *ll_inode2fid(de->d_inode);

                /* see if we got same inode, if not - return error */
                if (lu_fid_eq(&fid, &mdt_body->fid1)) {
                        ll_finish_md_op_data(op_data);
                        op_data = NULL;
                        goto revalidate_finish;
                }
                ll_intent_release(it);
        }
        ll_finish_md_op_data(op_data);
        GOTO(out, rc = 0);

out_sa:
        /*
         * For rc == 1 case, should not return directly to prevent losing
         * statahead windows; for rc == 0 case, the "lookup" will be done later.
         */
        if (it != NULL && it->it_op == IT_GETATTR && rc == 1)
                ll_statahead_enter(parent, &de, 1);
        goto mark;
}

int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
{
        int rc;
        ENTRY;

        if (nd && !(nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))) {
                struct lookup_intent *it;

                it = ll_convert_intent(&nd->intent.open, nd->flags);
                if (IS_ERR(it))
                        RETURN(0);

                if (it->it_op == (IT_OPEN|IT_CREAT) &&
                    nd->intent.open.flags & O_EXCL) {
                        CDEBUG(D_VFSTRACE, "create O_EXCL, returning 0\n");
                        rc = 0;
                        goto out_it;
                }

                rc = ll_revalidate_it(dentry, nd->flags, it);

                if (rc && (nd->flags & LOOKUP_OPEN) &&
                    it_disposition(it, DISP_OPEN_OPEN)) {/*Open*/
// XXX Code duplication with ll_lookup_nd
                        if (S_ISFIFO(dentry->d_inode->i_mode)) {
                                // We cannot call open here as it would
                                // deadlock.
                                ptlrpc_req_finished(
                                               (struct ptlrpc_request *)
                                                  it->d.lustre.it_data);
                        } else {
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
/* 2.6.1[456] have a bug in open_namei() that forgets to check
 * nd->intent.open.file for error, so we need to return it as lookup's result
 * instead */
                                struct file *filp;

                                nd->intent.open.file->private_data = it;
                                filp = lookup_instantiate_filp(nd, dentry,NULL);
                                if (IS_ERR(filp)) {
                                        rc = PTR_ERR(filp);
                                }
#else
                                nd->intent.open.file->private_data = it;
                                (void)lookup_instantiate_filp(nd, dentry,NULL);
#endif
                        }
                }
                if (!rc && (nd->flags & LOOKUP_CREATE) &&
                    it_disposition(it, DISP_OPEN_CREATE)) {
                        /* We created something but we may only return
                         * negative dentry here, so save request in dentry,
                         * if lookup will be called later on, it will
                         * pick the request, otherwise it would be freed
                         * with dentry */
                        ll_d2d(dentry)->lld_it = it;
                        it = NULL; /* avoid freeing */
                }

out_it:
                if (it) {
                        ll_intent_release(it);
                        OBD_FREE(it, sizeof(*it));
                }
        } else {
                rc = ll_revalidate_it(dentry, 0, NULL);
        }

        RETURN(rc);
}
Exemplo n.º 16
0
static int ll_get_name(struct dentry *dentry, char *name,
		       struct dentry *child)
{
	struct inode *dir = d_inode(dentry);
	int rc;
	struct ll_getname_data lgd = {
		.lgd_name = name,
		.lgd_fid = ll_i2info(d_inode(child))->lli_fid,
		.ctx.actor = ll_nfs_get_name_filldir,
	};

	if (!dir || !S_ISDIR(dir->i_mode)) {
		rc = -ENOTDIR;
		goto out;
	}

	if (!dir->i_fop) {
		rc = -EINVAL;
		goto out;
	}

	mutex_lock(&dir->i_mutex);
	rc = ll_dir_read(dir, &lgd.ctx);
	mutex_unlock(&dir->i_mutex);
	if (!rc && !lgd.lgd_found)
		rc = -ENOENT;
out:
	return rc;
}

static struct dentry *ll_fh_to_dentry(struct super_block *sb, struct fid *fid,
				      int fh_len, int fh_type)
{
	struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;

	if (fh_type != LUSTRE_NFS_FID)
		return ERR_PTR(-EPROTO);

	return ll_iget_for_nfs(sb, &nfs_fid->lnf_child, &nfs_fid->lnf_parent);
}

static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
				      int fh_len, int fh_type)
{
	struct lustre_nfs_fid *nfs_fid = (struct lustre_nfs_fid *)fid;

	if (fh_type != LUSTRE_NFS_FID)
		return ERR_PTR(-EPROTO);

	return ll_iget_for_nfs(sb, &nfs_fid->lnf_parent, NULL);
}

static struct dentry *ll_get_parent(struct dentry *dchild)
{
	struct ptlrpc_request *req = NULL;
	struct inode	  *dir = d_inode(dchild);
	struct ll_sb_info     *sbi;
	struct dentry	 *result = NULL;
	struct mdt_body       *body;
	static char	   dotdot[] = "..";
	struct md_op_data     *op_data;
	int		   rc;
	int		      lmmsize;

	LASSERT(dir && S_ISDIR(dir->i_mode));

	sbi = ll_s2sbi(dir->i_sb);

	CDEBUG(D_INFO, "getting parent for (%lu,"DFID")\n",
			dir->i_ino, PFID(ll_inode2fid(dir)));

	rc = ll_get_default_mdsize(sbi, &lmmsize);
	if (rc != 0)
		return ERR_PTR(rc);

	op_data = ll_prep_md_op_data(NULL, dir, NULL, dotdot,
				     strlen(dotdot), lmmsize,
				     LUSTRE_OPC_ANY, NULL);
	if (IS_ERR(op_data))
		return (void *)op_data;

	rc = md_getattr_name(sbi->ll_md_exp, op_data, &req);
	ll_finish_md_op_data(op_data);
	if (rc) {
		CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
		return ERR_PTR(rc);
	}
	body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
	LASSERT(body->valid & OBD_MD_FLID);

	CDEBUG(D_INFO, "parent for "DFID" is "DFID"\n",
		PFID(ll_inode2fid(dir)), PFID(&body->fid1));

	result = ll_iget_for_nfs(dir->i_sb, &body->fid1, NULL);

	ptlrpc_req_finished(req);
	return result;
}

struct export_operations lustre_export_operations = {
       .get_parent = ll_get_parent,
       .encode_fh  = ll_encode_fh,
       .get_name   = ll_get_name,
	.fh_to_dentry = ll_fh_to_dentry,
	.fh_to_parent = ll_fh_to_parent,
};