/** * * cache_inode_lookup_sw: looks up for a name in a directory indicated by a * cached entry. * * Looks up for a name in a directory indicated by a cached entry. The directory * should have been cached before. * * @param pentry_parent [IN] entry for the parent directory to be managed. * @param name [IN] name of the entry that we are looking for in the * cache. * @param pattr [OUT] attributes for the entry that we have found. * @param ht [IN] hash table used for the cache, unused in this * call. * @param pclient [INOUT] ressource allocated by the client for the nfs * management. * @param pcontext [IN] FSAL credentials * @param pstatus [OUT] returned status. * @param use_mutex [IN] if TRUE, mutex management is done, not if equal * to FALSE. * * @return CACHE_INODE_SUCCESS if operation is a success \n * @return CACHE_INODE_LRU_ERROR if allocation error occured when validating the * entry * */ cache_entry_t *cache_inode_lookup_sw(cache_entry_t * pentry_parent, fsal_name_t * pname, cache_inode_policy_t policy, fsal_attrib_list_t * pattr, hash_table_t * ht, cache_inode_client_t * pclient, fsal_op_context_t * pcontext, cache_inode_status_t * pstatus, int use_mutex) { cache_inode_dir_entry_t dirent_key[1], *dirent; struct avltree_node *dirent_node; cache_inode_dir_entry_t *new_dir_entry; cache_entry_t *pentry = NULL; fsal_status_t fsal_status; #ifdef _USE_MFSL mfsl_object_t object_handle; #else fsal_handle_t object_handle; #endif fsal_handle_t dir_handle; fsal_attrib_list_t object_attributes; cache_inode_create_arg_t create_arg; cache_inode_file_type_t type; cache_inode_status_t cache_status; cache_inode_fsal_data_t new_entry_fsdata; fsal_accessflags_t access_mask = 0; memset(&create_arg, 0, sizeof(create_arg)); memset( (char *)&new_entry_fsdata, 0, sizeof( new_entry_fsdata ) ) ; /* Set the return default to CACHE_INODE_SUCCESS */ *pstatus = CACHE_INODE_SUCCESS; /* stats */ (pclient->stat.nb_call_total)++; (pclient->stat.func_stats.nb_call[CACHE_INODE_LOOKUP])++; /* We should not renew entries when !use_mutex (because unless we * make the flag explicit (shared vs. exclusive), we don't know * whether a mutating operation is safe--and, the caller should have * already renewed the entry */ if(use_mutex == TRUE) { P_w(&pentry_parent->lock); cache_status = cache_inode_renew_entry(pentry_parent, pattr, ht, pclient, pcontext, pstatus); if(cache_status != CACHE_INODE_SUCCESS) { V_w(&pentry_parent->lock); inc_func_err_retryable(pclient, CACHE_INODE_GETATTR); LogDebug(COMPONENT_CACHE_INODE, "cache_inode_lookup: returning %d(%s) from cache_inode_renew_entry", *pstatus, cache_inode_err_str(*pstatus)); return NULL; } /* RW Lock goes for writer to reader */ rw_lock_downgrade(&pentry_parent->lock); } if(pentry_parent->internal_md.type != DIRECTORY) { /* Parent is no directory base, return NULL */ *pstatus = CACHE_INODE_NOT_A_DIRECTORY; /* stats */ (pclient->stat.func_stats.nb_err_unrecover[CACHE_INODE_LOOKUP])++; if(use_mutex == TRUE) V_r(&pentry_parent->lock); return NULL; } /* if name is ".", use the input value */ if(!FSAL_namecmp(pname, (fsal_name_t *) & FSAL_DOT)) { pentry = pentry_parent; } else if(!FSAL_namecmp(pname, (fsal_name_t *) & FSAL_DOT_DOT)) { /* Directory do only have exactly one parent. This a limitation in all FS, * which implies that hard link are forbidden on directories (so that * they exists only in one dir). Because of this, the parent list is * always limited to one element for a dir. Clients SHOULD never * 'lookup( .. )' in something that is no dir. */ pentry = cache_inode_lookupp_no_mutex(pentry_parent, ht, pclient, pcontext, pstatus); } else { /* This is a "regular lookup" (not on "." or "..") */ /* Check is user (as specified by the credentials) is authorized to * lookup the directory or not */ access_mask = FSAL_MODE_MASK_SET(FSAL_X_OK) | FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_LIST_DIR); if(cache_inode_access_no_mutex(pentry_parent, access_mask, ht, pclient, pcontext, pstatus) != CACHE_INODE_SUCCESS) { if(use_mutex == TRUE) V_r(&pentry_parent->lock); (pclient->stat.func_stats.nb_err_retryable[CACHE_INODE_GETATTR])++; return NULL; } /* We first try avltree_lookup by name. If that fails, we dispatch to * the fsal. */ FSAL_namecpy(&dirent_key->name, pname); dirent_node = avltree_lookup(&dirent_key->node_n, &pentry_parent->object.dir.dentries); if (dirent_node) { dirent = avltree_container_of(dirent_node, cache_inode_dir_entry_t, node_n); pentry = dirent->pentry; } if(pentry == NULL) { LogDebug(COMPONENT_CACHE_INODE, "Cache Miss detected"); dir_handle = pentry_parent->handle; object_attributes.asked_attributes = pclient->attrmask; #ifdef _USE_MFSL #ifdef _USE_MFSL_ASYNC if(!mfsl_async_is_object_asynchronous(&pentry_parent->mobject)) { /* If the parent is asynchronous, rely on the content of the cache * inode parent entry. * * /!\ If the fs behind the FSAL is touched in a non-nfs way, * there will be huge incoherencies. */ #endif /* _USE_MFSL_ASYNC */ fsal_status = MFSL_lookup(&pentry_parent->mobject, pname, pcontext, &pclient->mfsl_context, &object_handle, &object_attributes, NULL); #ifdef _USE_MFSL_ASYNC } else { LogMidDebug(COMPONENT_CACHE_INODE, "cache_inode_lookup chose to bypass FSAL and trusted his cache for name=%s", pname->name); fsal_status.major = ERR_FSAL_NOENT; fsal_status.minor = ENOENT; } #endif /* _USE_MFSL_ASYNC */ #else fsal_status = FSAL_lookup(&dir_handle, pname, pcontext, &object_handle, &object_attributes); #endif /* _USE_MFSL */ if(FSAL_IS_ERROR(fsal_status)) { *pstatus = cache_inode_error_convert(fsal_status); if(use_mutex == TRUE) V_r(&pentry_parent->lock); /* Stale File Handle to be detected and managed */ if(fsal_status.major == ERR_FSAL_STALE) { cache_inode_status_t kill_status; LogEvent(COMPONENT_CACHE_INODE, "cache_inode_lookup: Stale FSAL File Handle detected for pentry = %p, fsal_status=(%u,%u)", pentry_parent, fsal_status.major, fsal_status.minor); if(cache_inode_kill_entry(pentry_parent, NO_LOCK, ht, pclient, &kill_status) != CACHE_INODE_SUCCESS) LogCrit(COMPONENT_CACHE_INODE, "cache_inode_pentry_parent: Could not kill entry %p, status = %u", pentry_parent, kill_status); *pstatus = CACHE_INODE_FSAL_ESTALE; } /* stats */ (pclient->stat.func_stats.nb_err_unrecover[CACHE_INODE_LOOKUP])++; return NULL; } type = cache_inode_fsal_type_convert(object_attributes.type); /* If entry is a symlink, this value for be cached */ if(type == SYMBOLIC_LINK) { if( CACHE_INODE_KEEP_CONTENT( policy ) ) #ifdef _USE_MFSL { fsal_status = MFSL_readlink(&object_handle, pcontext, &pclient->mfsl_context, &create_arg.link_content, &object_attributes, NULL); } #else { fsal_status = FSAL_readlink(&object_handle, pcontext, &create_arg.link_content, &object_attributes); } else { fsal_status.major = ERR_FSAL_NO_ERROR ; fsal_status.minor = 0 ; } #endif if(FSAL_IS_ERROR(fsal_status)) { *pstatus = cache_inode_error_convert(fsal_status); if(use_mutex == TRUE) V_r(&pentry_parent->lock); /* Stale File Handle to be detected and managed */ if(fsal_status.major == ERR_FSAL_STALE) { cache_inode_status_t kill_status; LogEvent(COMPONENT_CACHE_INODE, "cache_inode_lookup: Stale FSAL File Handle detected for pentry = %p, fsal_status=(%u,%u)", pentry_parent, fsal_status.major, fsal_status.minor); if(cache_inode_kill_entry(pentry_parent, NO_LOCK, ht, pclient, &kill_status) != CACHE_INODE_SUCCESS) LogCrit(COMPONENT_CACHE_INODE, "cache_inode_pentry_parent: Could not kill entry %p, status = %u", pentry_parent, kill_status); *pstatus = CACHE_INODE_FSAL_ESTALE; } /* stats */ (pclient->stat.func_stats.nb_err_unrecover[CACHE_INODE_LOOKUP])++; return NULL; } } /* Allocation of a new entry in the cache */ #ifdef _USE_MFSL new_entry_fsdata.handle = object_handle.handle; #else new_entry_fsdata.handle = object_handle; #endif new_entry_fsdata.cookie = 0; if((pentry = cache_inode_new_entry( &new_entry_fsdata, &object_attributes, type, policy, &create_arg, NULL, ht, pclient, pcontext, FALSE, /* This is a population and not a creation */ pstatus ) ) == NULL ) { if(use_mutex == TRUE) V_r(&pentry_parent->lock); /* stats */ (pclient->stat.func_stats.nb_err_unrecover[CACHE_INODE_LOOKUP])++; return NULL; } /* Entry was found in the FSAL, add this entry to the parent * directory */ cache_status = cache_inode_add_cached_dirent(pentry_parent, pname, pentry, ht, &new_dir_entry, pclient, pcontext, pstatus); if(cache_status != CACHE_INODE_SUCCESS && cache_status != CACHE_INODE_ENTRY_EXISTS) { if(use_mutex == TRUE) V_r(&pentry_parent->lock); /* stats */ (pclient->stat.func_stats.nb_err_unrecover[CACHE_INODE_LOOKUP])++; return NULL; } } /* cached lookup fail (try fsal) */
/** * * @brief Checks permissions on an entry for setattrs * * This function acquires the attribute lock on the supplied cache * entry then checks if the supplied credentials are sufficient to * perform the required setattrs. * * @param[in] entry The object to be checked * @param[in] attr Attributes to set/result of set * * @return CACHE_INODE_SUCCESS if operation is a success */ cache_inode_status_t cache_inode_check_setattr_perms(cache_entry_t *entry, struct attrlist *attr, bool is_open_write) { cache_inode_status_t status = CACHE_INODE_SUCCESS; fsal_accessflags_t access_check = 0; bool not_owner; char *note = ""; const struct user_cred *creds = op_ctx->creds; if (isDebug(COMPONENT_CACHE_INODE) || isDebug(COMPONENT_NFS_V4_ACL)) { char *setattr_size = ""; char *setattr_owner = ""; char *setattr_owner_group = ""; char *setattr_mode = ""; char *setattr_acl = ""; char *setattr_mtime = ""; char *setattr_atime = ""; if (FSAL_TEST_MASK(attr->mask, ATTR_SIZE)) setattr_size = " SIZE"; if (FSAL_TEST_MASK(attr->mask, ATTR_OWNER)) setattr_owner = " OWNER"; if (FSAL_TEST_MASK(attr->mask, ATTR_GROUP)) setattr_owner_group = " GROUP"; if (FSAL_TEST_MASK(attr->mask, ATTR_MODE)) setattr_mode = " MODE"; if (FSAL_TEST_MASK(attr->mask, ATTR_ACL)) setattr_acl = " ACL"; if (FSAL_TEST_MASK(attr->mask, ATTR_ATIME)) setattr_atime = " ATIME"; else if (FSAL_TEST_MASK(attr->mask, ATTR_ATIME_SERVER)) setattr_atime = " ATIME_SERVER"; if (FSAL_TEST_MASK(attr->mask, ATTR_MTIME)) setattr_mtime = " MTIME"; else if (FSAL_TEST_MASK(attr->mask, ATTR_MTIME_SERVER)) setattr_mtime = " MTIME_SERVER"; LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "SETATTR %s%s%s%s%s%s%s", setattr_size, setattr_owner, setattr_owner_group, setattr_mode, setattr_acl, setattr_mtime, setattr_atime); } /* Shortcut, if current user is root, then we can just bail out with * success. */ if (creds->caller_uid == 0) { note = " (Ok for root user)"; goto out; } not_owner = (creds->caller_uid != entry->obj_handle->attributes.owner); /* Only ownership change need to be checked for owner */ if (FSAL_TEST_MASK(attr->mask, ATTR_OWNER)) { /* non-root is only allowed to "take ownership of file" */ if (attr->owner != creds->caller_uid) { status = CACHE_INODE_FSAL_EPERM; note = " (new OWNER was not user)"; goto out; } /* Owner of file will always be able to "change" the owner to * himself. */ if (not_owner) { access_check |= FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_WRITE_OWNER); LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "Change OWNER requires FSAL_ACE_PERM_WRITE_OWNER"); } } if (FSAL_TEST_MASK(attr->mask, ATTR_GROUP)) { /* non-root is only allowed to change group_owner to a group * user is a member of. */ int not_in_group = not_in_group_list(attr->group); if (not_in_group) { status = CACHE_INODE_FSAL_EPERM; note = " (user is not member of new GROUP)"; goto out; } /* Owner is always allowed to change the group_owner of a file * to a group they are a member of. */ if (not_owner) { access_check |= FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_WRITE_OWNER); LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "Change GROUP requires FSAL_ACE_PERM_WRITE_OWNER"); } } /* Any attribute after this is always changeable by the owner. * And the above attributes have already been validated as a valid * change for the file owner to make. Note that the owner may be * setting ATTR_OWNER but at this point it MUST be to himself, and * thus is no-op and does not need FSAL_ACE_PERM_WRITE_OWNER. */ if (!not_owner) { note = " (Ok for owner)"; goto out; } if (FSAL_TEST_MASK(attr->mask, ATTR_MODE) || FSAL_TEST_MASK(attr->mask, ATTR_ACL)) { /* Changing mode or ACL requires ACE4_WRITE_ACL */ access_check |= FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_WRITE_ACL); LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "Change MODE or ACL requires FSAL_ACE_PERM_WRITE_ACL"); } if (FSAL_TEST_MASK(attr->mask, ATTR_SIZE) && !is_open_write) { /* Changing size requires owner or write permission */ /** @todo: does FSAL_ACE_PERM_APPEND_DATA allow enlarging the file? */ access_check |= FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_WRITE_DATA); LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "Change SIZE requires FSAL_ACE_PERM_WRITE_DATA"); } /* Check if just setting atime and mtime to "now" */ if ((FSAL_TEST_MASK(attr->mask, ATTR_MTIME_SERVER) || FSAL_TEST_MASK(attr->mask, ATTR_ATIME_SERVER)) && !FSAL_TEST_MASK(attr->mask, ATTR_MTIME) && !FSAL_TEST_MASK(attr->mask, ATTR_ATIME)) { /* If either atime and/or mtime are set to "now" then need only * have write permission. * * Technically, client should not send atime updates, but if * they really do, we'll let them to make the perm check a bit * simpler. */ access_check |= FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_WRITE_DATA); LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "Change ATIME and MTIME to NOW requires FSAL_ACE_PERM_WRITE_DATA"); } else if (FSAL_TEST_MASK(attr->mask, ATTR_MTIME_SERVER) || FSAL_TEST_MASK(attr->mask, ATTR_ATIME_SERVER) || FSAL_TEST_MASK(attr->mask, ATTR_MTIME) || FSAL_TEST_MASK(attr->mask, ATTR_ATIME)) { /* Any other changes to atime or mtime require owner, root, or * ACES4_WRITE_ATTRIBUTES. * * NOTE: we explicity do NOT check for update of atime only to * "now". Section 10.6 of both RFC 3530 and RFC 5661 document * the reasons clients should not do atime updates. */ access_check |= FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_WRITE_ATTR); LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "Change ATIME and/or MTIME requires FSAL_ACE_PERM_WRITE_ATTR"); } if (isDebug(COMPONENT_CACHE_INODE) || isDebug(COMPONENT_NFS_V4_ACL)) { char *need_write_owner = ""; char *need_write_acl = ""; char *need_write_data = ""; char *need_write_attr = ""; if (access_check & FSAL_ACE_PERM_WRITE_OWNER) need_write_owner = " WRITE_OWNER"; if (access_check & FSAL_ACE_PERM_WRITE_ACL) need_write_acl = " WRITE_ACL"; if (access_check & FSAL_ACE_PERM_WRITE_DATA) need_write_data = " WRITE_DATA"; if (access_check & FSAL_ACE_PERM_WRITE_ATTR) need_write_attr = " WRITE_ATTR"; LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "Requires %s%s%s%s", need_write_owner, need_write_acl, need_write_data, need_write_attr); } if (entry->obj_handle->attributes.acl) { status = cache_inode_access_no_mutex(entry, access_check); note = " (checked ACL)"; goto out; } if (access_check != FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_WRITE_DATA)) { /* Without an ACL, this user is not allowed some operation */ status = CACHE_INODE_FSAL_EPERM; note = " (no ACL to check)"; goto out; } status = cache_inode_access_no_mutex(entry, FSAL_W_OK); note = " (checked mode)"; out: LogDebugCIA(COMPONENT_CACHE_INODE, COMPONENT_NFS_V4_ACL, "Access check returned %s%s", cache_inode_err_str(status), note); return status; }
/** * * cache_inode_readdir: Reads a directory. * * Looks up for a name in a directory indicated by a cached entry. The * directory should have been cached before. * * NEW: pending new (C-language) callback based dirent unpacking into caller * structures, we eliminate copies by returning dir entries by pointer. To * permit this, we introduce lock donation. If new int pointer argument * unlock is 1 on return, the calling thread holds pentry read-locked and * must release this lock after dirent processing. * * This is the only function in the cache_inode_readdir.c file that manages MT * safety on a directory cache entry. * * @param pentry [IN] entry for the parent directory to be read. * @param cookie [IN] cookie for the readdir operation (basically the offset). * @param nbwanted [IN] Maximum number of directory entries wanted. * @param peod_met [OUT] A flag to know if end of directory was met during this call. * @param dirent_array [OUT] the resulting array of found directory entries. * @param ht [IN] hash table used for the cache, unused in this call. * @param unlock [OUT] the caller shall release read-lock on pentry when done * @param pclient [INOUT] ressource allocated by the client for the nfs management. * @param pcontext [IN] FSAL credentials * @param pstatus [OUT] returned status. * * @return CACHE_INODE_SUCCESS if operation is a success \n * @return CACHE_INODE_BAD_TYPE if entry is not related to a directory\n * @return CACHE_INODE_LRU_ERROR if allocation error occured when validating the entry * */ cache_inode_status_t cache_inode_readdir(cache_entry_t * dir_pentry, cache_inode_policy_t policy, uint64_t cookie, unsigned int nbwanted, unsigned int *pnbfound, uint64_t *pend_cookie, cache_inode_endofdir_t *peod_met, cache_inode_dir_entry_t **dirent_array, hash_table_t *ht, int *unlock, cache_inode_client_t *pclient, fsal_op_context_t *pcontext, cache_inode_status_t *pstatus) { cache_inode_dir_entry_t dirent_key[1], *dirent; struct avltree_node *dirent_node; fsal_accessflags_t access_mask = 0; uint64_t inoff = cookie; int i = 0; /* Guide to parameters: * the first cookie is parameter 'cookie' * number of entries queried is set by parameter 'nbwanted' * number of found entries before eod is return is '*pnbfound' * '*peod_met' is set if end of directory is encountered */ /* Set the return default to CACHE_INODE_SUCCESS */ *pstatus = CACHE_INODE_SUCCESS; dirent = NULL; /* Set initial value of unlock */ *unlock = FALSE; /* end cookie initial value is the begin cookie */ LogFullDebug(COMPONENT_NFS_READDIR, "--> Cache_inode_readdir: setting pend_cookie to cookie=%" PRIu64, cookie); *pend_cookie = cookie; /* stats */ pclient->stat.nb_call_total++; (pclient->stat.func_stats.nb_call[CACHE_INODE_READDIR])++; LogFullDebug(COMPONENT_NFS_READDIR, "--> Cache_inode_readdir: parameters are cookie=%"PRIu64 "nbwanted=%u", cookie, nbwanted); /* Sanity check */ if(nbwanted == 0) { /* Asking for nothing is not a crime !!!!! * build a 'dummy' return in this case */ *pstatus = CACHE_INODE_SUCCESS; *pnbfound = 0; *peod_met = TO_BE_CONTINUED; /* stats */ (pclient->stat.func_stats.nb_success[CACHE_INODE_READDIR])++; return *pstatus; } /* Force dir content invalidation if policy enforced no name cache */ if( !CACHE_INODE_KEEP_CONTENT( dir_pentry->policy ) ) return cache_inode_readdir_nonamecache( dir_pentry, policy, cookie, nbwanted, pnbfound, pend_cookie, peod_met, dirent_array, ht, unlock, pclient, pcontext, pstatus ) ; P_w(&dir_pentry->lock); /* Renew the entry (to avoid having it being garbagged */ if(cache_inode_renew_entry(dir_pentry, NULL, ht, pclient, pcontext, pstatus) != CACHE_INODE_SUCCESS) { (pclient->stat.func_stats.nb_err_retryable[CACHE_INODE_GETATTR])++; V_w(&dir_pentry->lock); return *pstatus; } /* readdir can be done only with a directory */ if(dir_pentry->internal_md.type != DIRECTORY) { V_w(&dir_pentry->lock); *pstatus = CACHE_INODE_BAD_TYPE; /* stats */ (pclient->stat.func_stats.nb_err_unrecover[CACHE_INODE_READDIR])++; return *pstatus; } /* Check is user (as specified by the credentials) is authorized to read * the directory or not */ access_mask = FSAL_MODE_MASK_SET(FSAL_R_OK) | FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_LIST_DIR); if(cache_inode_access_no_mutex(dir_pentry, access_mask, ht, pclient, pcontext, pstatus) != CACHE_INODE_SUCCESS) { V_w(&dir_pentry->lock); (pclient->stat.func_stats.nb_err_retryable[CACHE_INODE_READDIR])++; return *pstatus; } /* Is the directory fully cached (this is done if a readdir call is done on the directory) */ if(dir_pentry->object.dir.has_been_readdir != CACHE_INODE_YES) { /* populate the cache */ if(cache_inode_readdir_populate(dir_pentry, policy, ht, pclient, pcontext, pstatus) != CACHE_INODE_SUCCESS) { /* stats */ (pclient->stat.func_stats.nb_err_unrecover[CACHE_INODE_READDIR])++; V_w(&dir_pentry->lock); return *pstatus; } } /* deal with dentry cache invalidates */ revalidate_cookie_cache(dir_pentry, pclient); /* Downgrade Writer lock to a reader one. */ rw_lock_downgrade(&dir_pentry->lock); /* deal with initial cookie value: * 1. cookie is invalid (-should- be checked by caller) * 2. cookie is 0 (first cookie) -- ok * 3. cookie is > than highest dirent position (error) * 4. cookie <= highest dirent position but > highest cached cookie * (currently equivalent to #2, because we pre-populate the cookie avl) * 5. cookie is in cached range -- ok */ if (cookie > 0) { if (cookie < 3) { *pstatus = CACHE_INODE_BAD_COOKIE; V_r(&dir_pentry->lock); return *pstatus; } if ((inoff-3) > avltree_size(&dir_pentry->object.dir.dentries)) { LogCrit(COMPONENT_NFS_V4, "Bad initial cookie %"PRIu64, inoff); *pstatus = CACHE_INODE_BAD_COOKIE; V_r(&dir_pentry->lock); return *pstatus; } /* we assert this can now succeed */ dirent_key->cookie = inoff; dirent_node = avltree_lookup(&dirent_key->node_c, &dir_pentry->object.dir.cookies); if (! dirent_node) { LogCrit(COMPONENT_NFS_READDIR, "%s: seek to cookie=%"PRIu64" fail", __func__, inoff); *pstatus = CACHE_INODE_NOT_FOUND; V_r(&dir_pentry->lock); return *pstatus; } /* switch avls */ dirent = avltree_container_of(dirent_node, cache_inode_dir_entry_t, node_c); dirent_node = &dirent->node_n; /* client wants the cookie -after- the last we sent, and * the Linux 3.0 and 3.1.0-rc7 clients misbehave if we * resend the last one */ dirent_node = avltree_next(dirent_node); } else { /* initial readdir */ dirent_node = avltree_first(&dir_pentry->object.dir.dentries); } LogFullDebug(COMPONENT_NFS_READDIR, "About to readdir in cache_inode_readdir: pentry=%p " "cookie=%"PRIu64, dir_pentry, cookie); /* Now satisfy the request from the cached readdir--stop when either * the requested sequence or dirent sequence is exhausted */ *pnbfound = 0; *peod_met = TO_BE_CONTINUED; for(i = 0; i < nbwanted; ++i) { if (!dirent_node) break; dirent = avltree_container_of(dirent_node, cache_inode_dir_entry_t, node_n); dirent_array[i] = dirent; (*pnbfound)++; dirent_node = avltree_next(dirent_node); } if (*pnbfound > 0) { if (!dirent) { LogCrit(COMPONENT_CACHE_INODE, "cache_inode_readdir: " "UNEXPECTED CASE: dirent is NULL whereas nbfound>0"); *pstatus = CACHE_INODE_INCONSISTENT_ENTRY; return CACHE_INODE_INCONSISTENT_ENTRY; } *pend_cookie = dirent->cookie; } if (! dirent_node) *peod_met = END_OF_DIR; *pstatus = cache_inode_valid(dir_pentry, CACHE_INODE_OP_GET, pclient); /* stats */ if(*pstatus != CACHE_INODE_SUCCESS) { (pclient->stat.func_stats.nb_err_retryable[CACHE_INODE_READDIR])++; V_r(&dir_pentry->lock); } else { (pclient->stat.func_stats.nb_success[CACHE_INODE_READDIR])++; *unlock = TRUE; } return *pstatus; } /* cache_inode_readdir */
cache_inode_status_t cache_inode_readdir(cache_entry_t *directory, uint64_t cookie, unsigned int *nbfound, bool *eod_met, attrmask_t attrmask, cache_inode_getattr_cb_t cb, void *opaque) { /* The entry being examined */ cache_inode_dir_entry_t *dirent = NULL; /* The node in the tree being traversed */ struct avltree_node *dirent_node; /* The access mask corresponding to permission to list directory entries */ fsal_accessflags_t access_mask = (FSAL_MODE_MASK_SET(FSAL_R_OK) | FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_LIST_DIR)); fsal_accessflags_t access_mask_attr = (FSAL_MODE_MASK_SET(FSAL_R_OK) | FSAL_MODE_MASK_SET(FSAL_X_OK) | FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_LIST_DIR) | FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_EXECUTE)); cache_inode_status_t status = CACHE_INODE_SUCCESS; cache_inode_status_t attr_status; struct cache_inode_readdir_cb_parms cb_parms = { opaque, NULL, true, 0, true }; bool retry_stale = true; LogFullDebug(COMPONENT_NFS_READDIR, "Enter...."); /* readdir can be done only with a directory */ if (directory->type != DIRECTORY) { status = CACHE_INODE_NOT_A_DIRECTORY; /* no lock acquired so far, just return status */ LogFullDebug(COMPONENT_NFS_READDIR, "Not a directory"); return status; } /* cache_inode_lock_trust_attrs can return an error, and no lock will * be acquired */ status = cache_inode_lock_trust_attrs(directory, false); if (status != CACHE_INODE_SUCCESS) { LogDebug(COMPONENT_NFS_READDIR, "cache_inode_lock_trust_attrs status=%s", cache_inode_err_str(status)); return status; } /* Adjust access mask if ACL is asked for. * NOTE: We intentionally do NOT check ACE4_READ_ATTR. */ if ((attrmask & ATTR_ACL) != 0) { access_mask |= FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_READ_ACL); access_mask_attr |= FSAL_ACE4_MASK_SET(FSAL_ACE_PERM_READ_ACL); } /* Check if user (as specified by the credentials) is authorized to read * the directory or not */ status = cache_inode_access_no_mutex(directory, access_mask); if (status != CACHE_INODE_SUCCESS) { LogFullDebug(COMPONENT_NFS_READDIR, "permission check for directory status=%s", cache_inode_err_str(status)); PTHREAD_RWLOCK_unlock(&directory->attr_lock); return status; } if (attrmask != 0) { /* Check for access permission to get attributes */ attr_status = cache_inode_access_no_mutex(directory, access_mask_attr); if (attr_status != CACHE_INODE_SUCCESS) { LogFullDebug(COMPONENT_NFS_READDIR, "permission check for attributes " "status=%s", cache_inode_err_str(attr_status)); } } else /* No attributes requested, we don't need permission */ attr_status = CACHE_INODE_SUCCESS; PTHREAD_RWLOCK_rdlock(&directory->content_lock); PTHREAD_RWLOCK_unlock(&directory->attr_lock); if (! ((directory->flags & CACHE_INODE_TRUST_CONTENT) && (directory->flags & CACHE_INODE_DIR_POPULATED))) { PTHREAD_RWLOCK_unlock(&directory->content_lock); PTHREAD_RWLOCK_wrlock(&directory->content_lock); status = cache_inode_readdir_populate(directory); if (status != CACHE_INODE_SUCCESS) { LogFullDebug(COMPONENT_NFS_READDIR, "cache_inode_readdir_populate status=%s", cache_inode_err_str(status)); goto unlock_dir; } } /* deal with initial cookie value: * 1. cookie is invalid (-should- be checked by caller) * 2. cookie is 0 (first cookie) -- ok * 3. cookie is > than highest dirent position (error) * 4. cookie <= highest dirent position but > highest cached cookie * (currently equivalent to #2, because we pre-populate the cookie * avl) * 5. cookie is in cached range -- ok */ if (cookie > 0) { /* N.B., cache_inode_avl_qp_insert_s ensures k > 2 */ if (cookie < 3) { status = CACHE_INODE_BAD_COOKIE; LogFullDebug(COMPONENT_NFS_READDIR, "Bad cookie"); goto unlock_dir; } /* we assert this can now succeed */ dirent = cache_inode_avl_lookup_k(directory, cookie, CACHE_INODE_FLAG_NEXT_ACTIVE); if (!dirent) { /* Linux (3.4, etc) has been observed to send readdir * at the offset of the last entry's cookie, and * returns no dirents to userland if that readdir * notfound or badcookie. */ if (cache_inode_avl_lookup_k (directory, cookie, CACHE_INODE_FLAG_NONE)) { /* yup, it was the last entry */ LogFullDebug(COMPONENT_NFS_READDIR, "EOD because empty result"); *eod_met = true; goto unlock_dir; } LogFullDebug(COMPONENT_NFS_READDIR, "seek to cookie=%" PRIu64 " fail", cookie); status = CACHE_INODE_BAD_COOKIE; goto unlock_dir; } /* dirent is the NEXT entry to return, since we sent * CACHE_INODE_FLAG_NEXT_ACTIVE */ dirent_node = &dirent->node_hk; } else { /* initial readdir */ dirent_node = avltree_first(&directory->object.dir.avl.t); } LogFullDebug(COMPONENT_NFS_READDIR, "About to readdir in cache_inode_readdir: directory=%p " "cookie=%" PRIu64 " collisions %d", directory, cookie, directory->object.dir.avl.collisions); /* Now satisfy the request from the cached readdir--stop when either * the requested sequence or dirent sequence is exhausted */ *nbfound = 0; *eod_met = false; for (; cb_parms.in_result && dirent_node; dirent_node = avltree_next(dirent_node)) { cache_entry_t *entry = NULL; cache_inode_status_t tmp_status = 0; dirent = avltree_container_of(dirent_node, cache_inode_dir_entry_t, node_hk); estale_retry: LogFullDebug(COMPONENT_NFS_READDIR, "Lookup direct %s", dirent->name); entry = cache_inode_get_keyed(&dirent->ckey, CIG_KEYED_FLAG_NONE, &tmp_status); if (!entry) { LogFullDebug(COMPONENT_NFS_READDIR, "Lookup returned %s", cache_inode_err_str(tmp_status)); if (retry_stale && tmp_status == CACHE_INODE_ESTALE) { LogDebug(COMPONENT_NFS_READDIR, "cache_inode_get_keyed returned %s " "for %s - retrying entry", cache_inode_err_str(tmp_status), dirent->name); retry_stale = false; /* only one retry per * dirent */ goto estale_retry; } if (tmp_status == CACHE_INODE_NOT_FOUND || tmp_status == CACHE_INODE_ESTALE) { /* Directory changed out from under us. Invalidate it, skip the name, and keep going. */ atomic_clear_uint32_t_bits( &directory->flags, CACHE_INODE_TRUST_CONTENT); LogDebug(COMPONENT_NFS_READDIR, "cache_inode_get_keyed returned %s " "for %s - skipping entry", cache_inode_err_str(tmp_status), dirent->name); continue; } else { /* Something is more seriously wrong, probably an inconsistency. */ status = tmp_status; LogCrit(COMPONENT_NFS_READDIR, "cache_inode_get_keyed returned %s " "for %s - bailing out", cache_inode_err_str(status), dirent->name); goto unlock_dir; } } LogFullDebug(COMPONENT_NFS_READDIR, "cache_inode_readdir: dirent=%p name=%s " "cookie=%" PRIu64 " (probes %d)", dirent, dirent->name, dirent->hk.k, dirent->hk.p); cb_parms.name = dirent->name; cb_parms.attr_allowed = attr_status == CACHE_INODE_SUCCESS; cb_parms.cookie = dirent->hk.k; tmp_status = cache_inode_getattr(entry, &cb_parms, cb, CB_ORIGINAL); if (tmp_status != CACHE_INODE_SUCCESS) { cache_inode_lru_unref(entry, LRU_FLAG_NONE); if (tmp_status == CACHE_INODE_ESTALE) { if (retry_stale) { LogDebug(COMPONENT_NFS_READDIR, "cache_inode_getattr returned " "%s for %s - retrying entry", cache_inode_err_str (tmp_status), dirent->name); retry_stale = false; /* only one retry * per dirent */ goto estale_retry; } /* Directory changed out from under us. Invalidate it, skip the name, and keep going. */ atomic_clear_uint32_t_bits( &directory->flags, CACHE_INODE_TRUST_CONTENT); LogDebug(COMPONENT_NFS_READDIR, "cache_inode_lock_trust_attrs " "returned %s for %s - skipping entry", cache_inode_err_str(tmp_status), dirent->name); continue; } status = tmp_status; LogCrit(COMPONENT_NFS_READDIR, "cache_inode_lock_trust_attrs returned %s for " "%s - bailing out", cache_inode_err_str(status), dirent->name); goto unlock_dir; } (*nbfound)++; cache_inode_lru_unref(entry, LRU_FLAG_NONE); if (!cb_parms.in_result) { LogDebug(COMPONENT_NFS_READDIR, "bailing out due to entry not in result"); break; } } /* We have reached the last node and every node traversed was added to the result */ LogDebug(COMPONENT_NFS_READDIR, "dirent_node = %p, nbfound = %u, in_result = %s", dirent_node, *nbfound, cb_parms.in_result ? "TRUE" : "FALSE"); if (!dirent_node && cb_parms.in_result) *eod_met = true; else *eod_met = false; unlock_dir: PTHREAD_RWLOCK_unlock(&directory->content_lock); return status; } /* cache_inode_readdir */