static int capa_is_sane(const struct lu_env *env, struct osd_device *dev, struct lustre_capa *capa, struct lustre_capa_key *keys) { struct osd_thread_info *oti = osd_oti_get(env); struct obd_capa *oc; int i, rc = 0; ENTRY; oc = capa_lookup(dev->od_capa_hash, capa, 0); if (oc) { if (capa_is_expired(oc)) { DEBUG_CAPA(D_ERROR, capa, "expired"); rc = -ESTALE; } capa_put(oc); RETURN(rc); } spin_lock(&capa_lock); for (i = 0; i < 2; i++) { if (keys[i].lk_keyid == capa->lc_keyid) { oti->oti_capa_key = keys[i]; break; } } spin_unlock(&capa_lock); if (i == 2) { DEBUG_CAPA(D_ERROR, capa, "no matched capa key"); RETURN(-ESTALE); } rc = capa_hmac(oti->oti_capa.lc_hmac, capa, oti->oti_capa_key.lk_key); if (rc) RETURN(rc); if (memcmp(oti->oti_capa.lc_hmac, capa->lc_hmac, sizeof(capa->lc_hmac))) { DEBUG_CAPA(D_ERROR, capa, "HMAC mismatch"); RETURN(-EACCES); } oc = capa_add(dev->od_capa_hash, capa); capa_put(oc); RETURN(0); }
static int ll_getxattr_common(struct inode *inode, const char *name, void *buffer, size_t size, __u64 valid) { struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req = NULL; struct mdt_body *body; int xattr_type, rc; void *xdata; struct obd_capa *oc; struct rmtacl_ctl_entry *rce = NULL; CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); /* listxattr have slightly different behavior from of ext3: * without 'user_xattr' ext3 will list all xattr names but * filtered out "^user..*"; we list them all for simplicity. */ if (!name) { xattr_type = XATTR_OTHER_T; goto do_getxattr; } xattr_type = get_xattr_type(name); rc = xattr_type_filter(sbi, xattr_type); if (rc) return rc; /* b15587: ignore security.capability xattr for now */ if ((xattr_type == XATTR_SECURITY_T && strcmp(name, "security.capability") == 0)) return -ENODATA; /* LU-549: Disable security.selinux when selinux is disabled */ if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() && strcmp(name, "security.selinux") == 0) return -EOPNOTSUPP; #ifdef CONFIG_FS_POSIX_ACL if (sbi->ll_flags & LL_SBI_RMT_CLIENT && (xattr_type == XATTR_ACL_ACCESS_T || xattr_type == XATTR_ACL_DEFAULT_T)) { rce = rct_search(&sbi->ll_rct, current_pid()); if (rce == NULL || (rce->rce_ops != RMT_LSETFACL && rce->rce_ops != RMT_LGETFACL && rce->rce_ops != RMT_RSETFACL && rce->rce_ops != RMT_RGETFACL)) return -EOPNOTSUPP; } /* posix acl is under protection of LOOKUP lock. when calling to this, * we just have path resolution to the target inode, so we have great * chance that cached ACL is uptodate. */ if (xattr_type == XATTR_ACL_ACCESS_T && !(sbi->ll_flags & LL_SBI_RMT_CLIENT)) { struct ll_inode_info *lli = ll_i2info(inode); struct posix_acl *acl; spin_lock(&lli->lli_lock); acl = posix_acl_dup(lli->lli_posix_acl); spin_unlock(&lli->lli_lock); if (!acl) return -ENODATA; rc = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); posix_acl_release(acl); return rc; } if (xattr_type == XATTR_ACL_DEFAULT_T && !S_ISDIR(inode->i_mode)) return -ENODATA; #endif do_getxattr: if (sbi->ll_xattr_cache_enabled && (rce == NULL || rce->rce_ops == RMT_LGETFACL || rce->rce_ops == RMT_LSETFACL)) { rc = ll_xattr_cache_get(inode, name, buffer, size, valid); if (rc < 0) GOTO(out_xattr, rc); } else { oc = ll_mdscapa_get(inode); rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid | (rce ? rce_ops2valid(rce->rce_ops) : 0), name, NULL, 0, size, 0, &req); capa_put(oc); if (rc < 0) GOTO(out_xattr, rc); body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); LASSERT(body); /* only detect the xattr size */ if (size == 0) GOTO(out, rc = body->eadatasize); if (size < body->eadatasize) { CERROR("server bug: replied size %u > %u\n", body->eadatasize, (int)size); GOTO(out, rc = -ERANGE); } if (body->eadatasize == 0) GOTO(out, rc = -ENODATA); /* do not need swab xattr data */ xdata = req_capsule_server_sized_get(&req->rq_pill, &RMF_EADATA, body->eadatasize); if (!xdata) GOTO(out, rc = -EFAULT); memcpy(buffer, xdata, body->eadatasize); rc = body->eadatasize; } #ifdef CONFIG_FS_POSIX_ACL if (rce && rce->rce_ops == RMT_LSETFACL) { ext_acl_xattr_header *acl; acl = lustre_posix_acl_xattr_2ext( (posix_acl_xattr_header *)buffer, rc); if (IS_ERR(acl)) GOTO(out, rc = PTR_ERR(acl)); rc = ee_add(&sbi->ll_et, current_pid(), ll_inode2fid(inode), xattr_type, acl); if (unlikely(rc < 0)) { lustre_ext_acl_xattr_free(acl); GOTO(out, rc); } } #endif out_xattr: if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { LCONSOLE_INFO( "%s: disabling user_xattr feature because it is not supported on the server: rc = %d\n", ll_get_fsname(inode->i_sb, NULL, 0), rc); sbi->ll_flags &= ~LL_SBI_USER_XATTR; } out: ptlrpc_req_finished(req); return rc; }
static int ll_setxattr_common(struct inode *inode, const char *name, const void *value, size_t size, int flags, __u64 valid) { struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req = NULL; int xattr_type, rc; struct obd_capa *oc; struct rmtacl_ctl_entry *rce = NULL; #ifdef CONFIG_FS_POSIX_ACL posix_acl_xattr_header *new_value = NULL; ext_acl_xattr_header *acl = NULL; #endif const char *pv = value; xattr_type = get_xattr_type(name); rc = xattr_type_filter(sbi, xattr_type); if (rc) return rc; /* b10667: ignore lustre special xattr for now */ if ((xattr_type == XATTR_TRUSTED_T && strcmp(name, "trusted.lov") == 0) || (xattr_type == XATTR_LUSTRE_T && strcmp(name, "lustre.lov") == 0)) return 0; /* b15587: ignore security.capability xattr for now */ if ((xattr_type == XATTR_SECURITY_T && strcmp(name, "security.capability") == 0)) return 0; /* LU-549: Disable security.selinux when selinux is disabled */ if (xattr_type == XATTR_SECURITY_T && !selinux_is_enabled() && strcmp(name, "security.selinux") == 0) return -EOPNOTSUPP; #ifdef CONFIG_FS_POSIX_ACL if (sbi->ll_flags & LL_SBI_RMT_CLIENT && (xattr_type == XATTR_ACL_ACCESS_T || xattr_type == XATTR_ACL_DEFAULT_T)) { rce = rct_search(&sbi->ll_rct, current_pid()); if (rce == NULL || (rce->rce_ops != RMT_LSETFACL && rce->rce_ops != RMT_RSETFACL)) return -EOPNOTSUPP; if (rce->rce_ops == RMT_LSETFACL) { struct eacl_entry *ee; ee = et_search_del(&sbi->ll_et, current_pid(), ll_inode2fid(inode), xattr_type); LASSERT(ee != NULL); if (valid & OBD_MD_FLXATTR) { acl = lustre_acl_xattr_merge2ext( (posix_acl_xattr_header *)value, size, ee->ee_acl); if (IS_ERR(acl)) { ee_free(ee); return PTR_ERR(acl); } size = CFS_ACL_XATTR_SIZE(\ le32_to_cpu(acl->a_count), \ ext_acl_xattr); pv = (const char *)acl; } ee_free(ee); } else if (rce->rce_ops == RMT_RSETFACL) { size = lustre_posix_acl_xattr_filter( (posix_acl_xattr_header *)value, size, &new_value); if (unlikely(size < 0)) return size; pv = (const char *)new_value; } else return -EOPNOTSUPP; valid |= rce_ops2valid(rce->rce_ops); } #endif if (sbi->ll_xattr_cache_enabled && (rce == NULL || rce->rce_ops == RMT_LSETFACL)) { rc = ll_xattr_cache_update(inode, name, pv, size, valid, flags); } else { oc = ll_mdscapa_get(inode); rc = md_setxattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, valid, name, pv, size, 0, flags, ll_i2suppgid(inode), &req); capa_put(oc); } #ifdef CONFIG_FS_POSIX_ACL if (new_value != NULL) lustre_posix_acl_xattr_free(new_value, size); if (acl != NULL) lustre_ext_acl_xattr_free(acl); #endif if (rc) { if (rc == -EOPNOTSUPP && xattr_type == XATTR_USER_T) { LCONSOLE_INFO("Disabling user_xattr feature because " "it is not supported on the server\n"); sbi->ll_flags &= ~LL_SBI_USER_XATTR; } return rc; } ptlrpc_req_finished(req); return 0; }
int ll_revalidate_it(struct dentry *de, int lookup_flags, struct lookup_intent *it) { struct md_op_data *op_data; struct ptlrpc_request *req = NULL; struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; struct obd_export *exp; struct inode *parent = de->d_parent->d_inode; int rc, first = 0; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name, LL_IT2STR(it)); if (de->d_inode == NULL) { /* We can only use negative dentries if this is stat or lookup, for opens and stuff we do need to query server. */ /* If there is IT_CREAT in intent op set, then we must throw away this negative dentry and actually do the request to kernel to create whatever needs to be created (if possible)*/ if (it && (it->it_op & IT_CREAT)) RETURN(0); if (de->d_flags & DCACHE_LUSTRE_INVALID) RETURN(0); rc = ll_have_md_lock(parent, MDS_INODELOCK_UPDATE, LCK_MINMODE); GOTO(out_sa, rc); } /* Never execute intents for mount points. * Attributes will be fixed up in ll_inode_revalidate_it */ if (d_mountpoint(de)) GOTO(out_sa, rc = 1); /* need to get attributes in case root got changed from other client */ if (de == de->d_sb->s_root) { rc = __ll_inode_revalidate_it(de, it, MDS_INODELOCK_LOOKUP); if (rc == 0) rc = 1; GOTO(out_sa, rc); } exp = ll_i2mdexp(de->d_inode); OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5); ll_frob_intent(&it, &lookup_it); LASSERT(it); if (it->it_op == IT_LOOKUP && !(de->d_flags & DCACHE_LUSTRE_INVALID)) GOTO(out_sa, rc = 1); op_data = ll_prep_md_op_data(NULL, parent, de->d_inode, de->d_name.name, de->d_name.len, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); if ((it->it_op == IT_OPEN) && de->d_inode) { struct inode *inode = de->d_inode; struct ll_inode_info *lli = ll_i2info(inode); struct obd_client_handle **och_p; __u64 *och_usecount; /* * We used to check for MDS_INODELOCK_OPEN here, but in fact * just having LOOKUP lock is enough to justify inode is the * same. And if inode is the same and we have suitable * openhandle, then there is no point in doing another OPEN RPC * just to throw away newly received openhandle. There are no * security implications too, if file owner or access mode is * change, LOOKUP lock is revoked. */ if (it->it_flags & FMODE_WRITE) { och_p = &lli->lli_mds_write_och; och_usecount = &lli->lli_open_fd_write_count; } else if (it->it_flags & FMODE_EXEC) { och_p = &lli->lli_mds_exec_och; och_usecount = &lli->lli_open_fd_exec_count; } else { och_p = &lli->lli_mds_read_och; och_usecount = &lli->lli_open_fd_read_count; } /* Check for the proper lock. */ if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP, LCK_MINMODE)) goto do_lock; cfs_down(&lli->lli_och_sem); if (*och_p) { /* Everything is open already, do nothing */ /*(*och_usecount)++; Do not let them steal our open handle from under us */ /* XXX The code above was my original idea, but in case we have the handle, but we cannot use it due to later checks (e.g. O_CREAT|O_EXCL flags set), nobody would decrement counter increased here. So we just hope the lock won't be invalidated in between. But if it would be, we'll reopen the open request to MDS later during file open path */ cfs_up(&lli->lli_och_sem); ll_finish_md_op_data(op_data); RETURN(1); } else { cfs_up(&lli->lli_och_sem); } } if (it->it_op == IT_GETATTR) { first = ll_statahead_enter(parent, &de, 0); if (first == 1) { ll_statahead_exit(parent, de, 1); ll_finish_md_op_data(op_data); GOTO(out, rc = 1); } } do_lock: it->it_create_mode &= ~current->fs->umask; it->it_create_mode |= M_CHECK_STALE; rc = md_intent_lock(exp, op_data, NULL, 0, it, lookup_flags, &req, ll_md_blocking_ast, 0); it->it_create_mode &= ~M_CHECK_STALE; ll_finish_md_op_data(op_data); if (it->it_op == IT_GETATTR && !first) /* If there are too many locks on client-side, then some * locks taken by statahead maybe dropped automatically * before the real "revalidate" using them. */ ll_statahead_exit(parent, de, req == NULL ? rc : 0); else if (first == -EEXIST) ll_statahead_mark(parent, de); /* If req is NULL, then md_intent_lock only tried to do a lock match; * if all was well, it will return 1 if it found locks, 0 otherwise. */ if (req == NULL && rc >= 0) { if (!rc) goto do_lookup; GOTO(out, rc); } if (rc < 0) { if (rc != -ESTALE) { CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status " "%d\n", rc, it->d.lustre.it_status); } GOTO(out, rc = 0); } revalidate_finish: rc = ll_revalidate_it_finish(req, it, de); if (rc != 0) { if (rc != -ESTALE && rc != -ENOENT) ll_intent_release(it); GOTO(out, rc = 0); } if ((it->it_op & IT_OPEN) && de->d_inode && !S_ISREG(de->d_inode->i_mode) && !S_ISDIR(de->d_inode->i_mode)) { ll_release_openhandle(de, it); } rc = 1; /* unfortunately ll_intent_lock may cause a callback and revoke our * dentry */ cfs_spin_lock(&ll_lookup_lock); spin_lock(&dcache_lock); lock_dentry(de); __d_drop(de); unlock_dentry(de); d_rehash_cond(de, 0); spin_unlock(&dcache_lock); cfs_spin_unlock(&ll_lookup_lock); out: /* We do not free request as it may be reused during following lookup * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will * be freed in ll_lookup_it or in ll_intent_release. But if * request was not completed, we need to free it. (bug 5154, 9903) */ if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE)) ptlrpc_req_finished(req); if (rc == 0) { ll_unhash_aliases(de->d_inode); /* done in ll_unhash_aliases() dentry->d_flags |= DCACHE_LUSTRE_INVALID; */ } else { CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p " "inode %p refc %d\n", de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, atomic_read(&de->d_count)); if (first != 1) { if (de->d_flags & DCACHE_LUSTRE_INVALID) { lock_dentry(de); de->d_flags &= ~DCACHE_LUSTRE_INVALID; unlock_dentry(de); } ll_lookup_finish_locks(it, de); } } RETURN(rc); /* * This part is here to combat evil-evil race in real_lookup on 2.6 * kernels. The race details are: We enter do_lookup() looking for some * name, there is nothing in dcache for this name yet and d_lookup() * returns NULL. We proceed to real_lookup(), and while we do this, * another process does open on the same file we looking up (most simple * reproducer), open succeeds and the dentry is added. Now back to * us. In real_lookup() we do d_lookup() again and suddenly find the * dentry, so we call d_revalidate on it, but there is no lock, so * without this code we would return 0, but unpatched real_lookup just * returns -ENOENT in such a case instead of retrying the lookup. Once * this is dealt with in real_lookup(), all of this ugly mess can go and * we can just check locks in ->d_revalidate without doing any RPCs * ever. */ do_lookup: if (it != &lookup_it) { /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */ if (it->it_op == IT_GETATTR) lookup_it.it_op = IT_GETATTR; ll_lookup_finish_locks(it, de); it = &lookup_it; } /* Do real lookup here. */ op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name, de->d_name.len, 0, (it->it_op & IT_CREAT ? LUSTRE_OPC_CREATE : LUSTRE_OPC_ANY), NULL); if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); rc = md_intent_lock(exp, op_data, NULL, 0, it, 0, &req, ll_md_blocking_ast, 0); if (rc >= 0) { struct mdt_body *mdt_body; struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0}; mdt_body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); if (de->d_inode) fid = *ll_inode2fid(de->d_inode); /* see if we got same inode, if not - return error */ if (lu_fid_eq(&fid, &mdt_body->fid1)) { ll_finish_md_op_data(op_data); op_data = NULL; goto revalidate_finish; } ll_intent_release(it); } ll_finish_md_op_data(op_data); GOTO(out, rc = 0); out_sa: /* * For rc == 1 case, should not return directly to prevent losing * statahead windows; for rc == 0 case, the "lookup" will be done later. */ if (it && it->it_op == IT_GETATTR && rc == 1) { first = ll_statahead_enter(parent, &de, 0); if (first >= 0) ll_statahead_exit(parent, de, 1); else if (first == -EEXIST) ll_statahead_mark(parent, de); } return rc; } #if 0 static void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag) { struct inode *inode= de->d_inode; struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_dentry_data *ldd = ll_d2d(de); struct obd_client_handle *handle; struct obd_capa *oc; int rc = 0; ENTRY; LASSERT(ldd); cfs_lock_kernel(); /* Strictly speaking this introduces an additional race: the * increments should wait until the rpc has returned. * However, given that at present the function is void, this * issue is moot. */ if (flag == 1 && (++ldd->lld_mnt_count) > 1) { cfs_unlock_kernel(); EXIT; return; } if (flag == 0 && (++ldd->lld_cwd_count) > 1) { cfs_unlock_kernel(); EXIT; return; } cfs_unlock_kernel(); handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och; oc = ll_mdscapa_get(inode); rc = obd_pin(sbi->ll_md_exp, ll_inode2fid(inode), oc, handle, flag); capa_put(oc); if (rc) { cfs_lock_kernel(); memset(handle, 0, sizeof(*handle)); if (flag == 0) ldd->lld_cwd_count--; else ldd->lld_mnt_count--; cfs_unlock_kernel(); } EXIT; return; } static void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag) { struct ll_sb_info *sbi = ll_i2sbi(de->d_inode); struct ll_dentry_data *ldd = ll_d2d(de); struct obd_client_handle handle; int count, rc = 0; ENTRY; LASSERT(ldd); cfs_lock_kernel(); /* Strictly speaking this introduces an additional race: the * increments should wait until the rpc has returned. * However, given that at present the function is void, this * issue is moot. */ handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och; if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) { /* the "pin" failed */ cfs_unlock_kernel(); EXIT; return; } if (flag) count = --ldd->lld_mnt_count; else count = --ldd->lld_cwd_count; cfs_unlock_kernel(); if (count != 0) { EXIT; return; } rc = obd_unpin(sbi->ll_md_exp, &handle, flag); EXIT; return; } #endif #ifdef HAVE_VFS_INTENT_PATCHES int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd) { int rc; ENTRY; if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST)) rc = ll_revalidate_it(dentry, nd->flags, &nd->intent); else rc = ll_revalidate_it(dentry, 0, NULL); RETURN(rc); }
int lustre_check_remote_perm(struct inode *inode, int mask) { struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); struct ptlrpc_request *req = NULL; struct mdt_remote_perm *perm; struct obd_capa *oc; unsigned long save; int i = 0, rc; do { save = lli->lli_rmtperm_time; rc = do_check_remote_perm(lli, mask); if (!rc || (rc != -ENOENT && i)) break; might_sleep(); mutex_lock(&lli->lli_rmtperm_mutex); /* check again */ if (save != lli->lli_rmtperm_time) { rc = do_check_remote_perm(lli, mask); if (!rc || (rc != -ENOENT && i)) { mutex_unlock(&lli->lli_rmtperm_mutex); break; } } if (i++ > 5) { CERROR("check remote perm falls in dead loop!\n"); LBUG(); } oc = ll_mdscapa_get(inode); rc = md_get_remote_perm(sbi->ll_md_exp, ll_inode2fid(inode), oc, ll_i2suppgid(inode), &req); capa_put(oc); if (rc) { mutex_unlock(&lli->lli_rmtperm_mutex); break; } perm = req_capsule_server_swab_get(&req->rq_pill, &RMF_ACL, lustre_swab_mdt_remote_perm); if (unlikely(perm == NULL)) { mutex_unlock(&lli->lli_rmtperm_mutex); rc = -EPROTO; break; } rc = ll_update_remote_perm(inode, perm); mutex_unlock(&lli->lli_rmtperm_mutex); if (rc == -ENOMEM) break; ptlrpc_req_finished(req); req = NULL; } while (1); ptlrpc_req_finished(req); return rc; }