/* * routine to check that the specified directory is empty (for rmdir) */ int ocfs2_empty_dir(struct inode *inode) { unsigned long offset; struct buffer_head * bh; struct ocfs2_dir_entry * de, * de1; struct super_block * sb; int err; sb = inode->i_sb; if ((i_size_read(inode) < (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) || !(bh = ocfs2_bread(inode, 0, &err, 0))) { mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - " "no data block\n", OCFS2_I(inode)->ip_blkno); return 1; } de = (struct ocfs2_dir_entry *) bh->b_data; de1 = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len)); if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) || !le64_to_cpu(de1->inode) || strcmp(".", de->name) || strcmp("..", de1->name)) { mlog(ML_ERROR, "bad directory (dir #%"MLFu64") - " "no `.' or `..'\n", OCFS2_I(inode)->ip_blkno); brelse(bh); return 1; } offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len)); while (offset < i_size_read(inode) ) { if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) { brelse(bh); bh = ocfs2_bread(inode, offset >> sb->s_blocksize_bits, &err, 0); if (!bh) { mlog(ML_ERROR, "directory #%"MLFu64" contains " "a hole at offset %lu\n", OCFS2_I(inode)->ip_blkno, offset); offset += sb->s_blocksize; continue; } de = (struct ocfs2_dir_entry *) bh->b_data; } if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { brelse(bh); return 1; } if (le64_to_cpu(de->inode)) { brelse(bh); return 0; } offset += le16_to_cpu(de->rec_len); de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len)); }
static int corrupt_dirent_lengths(struct ocfs2_dir_entry *dirent, int left) { if ((dirent->rec_len >= OCFS2_DIR_REC_LEN(1)) && ((dirent->rec_len & OCFS2_DIR_ROUND) == 0) && (dirent->rec_len <= left) && (OCFS2_DIR_REC_LEN(dirent->name_len) <= dirent->rec_len) && !dirent_leaves_partial(dirent, left)) return 0; verbosef("corrupt dirent: %"PRIu64" rec_len %u name_len %u\n", (uint64_t)dirent->inode, dirent->rec_len, dirent->name_len); return 1; }
static int expected_dots(o2fsck_state *ost, o2fsck_dirblock_entry *dbe, int offset) { int inline_off = offsetof(struct ocfs2_dinode, id2.i_data.id_data); if (dbe->e_blkcount == 0) { if (offset == 0 || (dbe->e_ino == dbe->e_blkno && offset == inline_off)) return 1; if (offset == OCFS2_DIR_REC_LEN(1) || (dbe->e_ino == dbe->e_blkno && offset == inline_off + OCFS2_DIR_REC_LEN(1))) return 2; } return 0; }
/* * ocfs2_readdir() * */ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) { int error = 0; unsigned long offset, blk; int i, num, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; int err; struct inode *inode = filp->f_dentry->d_inode; struct super_block * sb = inode->i_sb; int have_disk_lock = 0; mlog_entry("dirino=%"MLFu64"\n", OCFS2_I(inode)->ip_blkno); stored = 0; bh = NULL; error = ocfs2_meta_lock(inode, NULL, NULL, 0); if (error < 0) { if (error != -ENOENT) mlog_errno(error); /* we haven't got any yet, so propagate the error. */ stored = error; goto bail; } have_disk_lock = 1; offset = filp->f_pos & (sb->s_blocksize - 1); while (!error && !stored && filp->f_pos < i_size_read(inode)) { blk = (filp->f_pos) >> sb->s_blocksize_bits; bh = ocfs2_bread(inode, blk, &err, 0); if (!bh) { mlog(ML_ERROR, "directory #%"MLFu64" contains a hole " "at offset %lld\n", OCFS2_I(inode)->ip_blkno, filp->f_pos); filp->f_pos += sb->s_blocksize - offset; continue; } /* * Do the readahead (8k) */ if (!offset) { for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0; i > 0; i--) { tmp = ocfs2_bread(inode, ++blk, &err, 1); if (tmp) brelse(tmp); } } revalidate: /* If the dir block has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ if (filp->f_version != inode->i_version) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ocfs2_dir_entry *) (bh->b_data + i); /* It's too expensive to do a full * dirent test each time round this * loop, but we do have to test at * least that it is non-zero. A * failure will be detected in the * dirent test below. */ if (le16_to_cpu(de->rec_len) < OCFS2_DIR_REC_LEN(1)) break; i += le16_to_cpu(de->rec_len); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | offset; filp->f_version = inode->i_version; } while (!error && filp->f_pos < i_size_read(inode) && offset < sb->s_blocksize) { de = (struct ocfs2_dir_entry *) (bh->b_data + offset); if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { /* On error, skip the f_pos to the next block. */ filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + 1; brelse(bh); goto bail; } offset += le16_to_cpu(de->rec_len); if (le64_to_cpu(de->inode)) { /* We might block in the next section * if the data destination is * currently swapped out. So, use a * version stamp to detect whether or * not the directory has been modified * during the copy operation. */ unsigned long version = filp->f_version; unsigned char d_type = DT_UNKNOWN; if (de->file_type < OCFS2_FT_MAX) d_type = ocfs2_filetype_table[de->file_type]; error = filldir(dirent, de->name, de->name_len, filp->f_pos, ino_from_blkno(sb, le64_to_cpu(de->inode)), d_type); if (error) break; if (version != filp->f_version) goto revalidate; stored ++; } filp->f_pos += le16_to_cpu(de->rec_len); } offset = 0; brelse(bh); } stored = 0; bail: if (have_disk_lock) ocfs2_meta_unlock(inode, 0); mlog_exit(stored); return stored; }
/* * The caller has found that either of rec_len or name_len are garbage. The * caller trusts us to fix them up in place and will be checking them again * before proceeding. We have to update the lengths to make forward progress. * 'left' is the number of bytes from the start of this dirent struct that * remain in the block. * * We're called for invalid dirents, and having a dirent * that leaves a partial dirent at the end of the block is considered invalid, * and we pad out partials at the end of this call so we can't be called here * with left < OCFS2_DIR_MEMBER_LEN. * * we're pretty limited in the repairs we can make: * * - We can't just set name_len if rec_len looks valid, we might guess * name_len wrong and create a bogus file name. * - we can't just set rec_len based on name_len. rec_len could have * included an arbitrary part of the name from a previously freed dirent. */ static void fix_dirent_lengths(struct ocfs2_dir_entry *dirent, int left, struct ocfs2_dir_entry *prev, unsigned int *flags) { /* * as described above we can't reconstruct either value if it is * complete nonsense. We can only proceed if we can work off of * one that is kind of valid looking. * name_len could well be 0 from the dirent being cleared. */ if (dirent->rec_len < OCFS2_DIR_MEMBER_LEN || (dirent->rec_len > left || dirent->name_len > left)) goto wipe; /* if we see a dirent with no file name then we remove it by * shifting the remaining dirents forward */ if ((dirent->rec_len == OCFS2_DIR_MEMBER_LEN)) { char *cp = (char *)dirent; left -= dirent->rec_len; memmove(cp, cp + dirent->rec_len, left); memset(cp + left, 0, dirent->rec_len); goto out; } /* if rec_len just appears to be mis-rounded in a way that doesn't * affect following dirents then we can probably save this dirent */ if (OCFS2_DIR_REC_LEN(dirent->name_len) != dirent->rec_len && OCFS2_DIR_REC_LEN(dirent->name_len) == OCFS2_DIR_REC_LEN(dirent->rec_len)) { dirent->rec_len = OCFS2_DIR_REC_LEN(dirent->name_len); left -= dirent->rec_len; goto out; } /* if name_len is too far off, however, we're going to lose this * dirent.. we might be able to just lose this one dirent if rec_len * appears to be intact. */ if ((dirent->rec_len & OCFS2_DIR_ROUND) == 0 && !dirent_leaves_partial(dirent, left)) { left -= dirent->rec_len; dirent->name_len = 0; dirent->inode = 0; dirent->file_type = OCFS2_FT_UNKNOWN; goto out; } /* * if we can't trust rec_len, however, then we don't know where the * next dirent might begin. We've lost the trail of dirents created by * the file system and run the risk of parsing file names as dirents. * So we're forced to wipe the block and leave the rest to lost+found. */ wipe: dirent->rec_len = left; dirent->name_len = 0; dirent->inode = 0; dirent->file_type = OCFS2_FT_UNKNOWN; left = 0; out: /* * rec_len must be valid and left must reflect the space *after* the * current dirent by this point. if there isn't enough room for * another dirent after the one we've just repaired then we tack the * remaining space onto the current dirent. */ if (dirent_leaves_partial(dirent, left)) dirent->rec_len += left; *flags |= OCFS2_DIRENT_CHANGED; }
static errcode_t fix_dirent_dots(o2fsck_state *ost, o2fsck_dirblock_entry *dbe, struct ocfs2_dir_entry *dirent, int offset, int left, unsigned int *flags) { int expect_dots = expected_dots(ost, dbe, offset); int changed_len = 0; struct ocfs2_dir_entry *next; uint16_t new_len; errcode_t ret = 0; if (!expect_dots) { if (!dirent->inode || (!dirent_has_dots(dirent, 1) && !dirent_has_dots(dirent, 2))) goto out; if (prompt(ost, PY, PR_DIRENT_DOTTY_DUP, "Duplicate '%.*s' directory entry found, remove " "it?", dirent->name_len, dirent->name)) { dirent->inode = 0; *flags |= OCFS2_DIRENT_CHANGED; goto out; } } if (!dirent_has_dots(dirent, expect_dots) && prompt(ost, PY, PR_DIRENT_NOT_DOTTY, "The %s directory entry in directory inode " "%"PRIu64" is '%.*s' instead of '%.*s'. Clobber the " "current name with the expected dot name?", expect_dots == 1 ? "first" : "second", dbe->e_ino, dirent->name_len, dirent->name, expect_dots, "..")) { dirent->name_len = expect_dots; memset(dirent->name, '.', expect_dots); dirent->file_type = OCFS2_FT_DIR; changed_len = 1; *flags |= OCFS2_DIRENT_CHANGED; } /* we only record where .. points for now and that ends the * checks for .. */ if (expect_dots == 2) { o2fsck_dir_parent *dp; dp = o2fsck_dir_parent_lookup(&ost->ost_dir_parents, dbe->e_ino); if (dp == NULL) { ret = OCFS2_ET_INTERNAL_FAILURE; com_err(whoami, ret, "no dir parents for '..' entry " "for inode %"PRIu64, dbe->e_ino); } else dp->dp_dot_dot = dirent->inode; goto out; } if ((dirent->inode != dbe->e_ino) && prompt(ost, PY, PR_DIRENT_DOT_INODE, "The '.' entry in directory inode %"PRIu64" " "points to inode %"PRIu64" instead of itself. Fix " "the '.' entry?", dbe->e_ino, (uint64_t)dirent->inode)) { dirent->inode = dbe->e_ino; *flags |= OCFS2_DIRENT_CHANGED; } /* * we might have slop at the end of this "." dirent. split * it into another seperate dirent if there is enough room and * we've just updated it's name_len or the user says we should. */ new_len = OCFS2_DIR_REC_LEN(dirent->name_len) - dirent->rec_len; if (new_len && (changed_len || prompt(ost, PY, PR_DIRENT_DOT_EXCESS, "The '.' entry in directory inode " "%"PRIu64" is too long. Try to create another " "directory entry from the excess?", dbe->e_ino))) { dirent->rec_len = OCFS2_DIR_REC_LEN(dirent->name_len); next = (struct ocfs2_dir_entry *)((char *)dirent + dirent->rec_len); next->inode = 0; next->name_len = 0; next->rec_len = OCFS2_DIR_REC_LEN(next->rec_len); *flags |= OCFS2_DIRENT_CHANGED; } out: return ret; }
/* * ocfs2_readdir() * */ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) { int error = 0; unsigned long offset, blk, last_ra_blk = 0; int i, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; int err; struct inode *inode = filp->f_path.dentry->d_inode; struct super_block * sb = inode->i_sb; unsigned int ra_sectors = 16; int lock_level = 0; mlog_entry("dirino=%llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); stored = 0; bh = NULL; error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); if (lock_level && error >= 0) { /* We release EX lock which used to update atime * and get PR lock again to reduce contention * on commonly accessed directories. */ ocfs2_meta_unlock(inode, 1); lock_level = 0; error = ocfs2_meta_lock(inode, NULL, 0); } if (error < 0) { if (error != -ENOENT) mlog_errno(error); /* we haven't got any yet, so propagate the error. */ stored = error; goto bail_nolock; } offset = filp->f_pos & (sb->s_blocksize - 1); while (!error && !stored && filp->f_pos < i_size_read(inode)) { blk = (filp->f_pos) >> sb->s_blocksize_bits; bh = ocfs2_bread(inode, blk, &err, 0); if (!bh) { mlog(ML_ERROR, "directory #%llu contains a hole at offset %lld\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, filp->f_pos); filp->f_pos += sb->s_blocksize - offset; continue; } /* The idea here is to begin with 8k read-ahead and to stay * 4k ahead of our current position. * * TODO: Use the pagecache for this. We just need to * make sure it's cluster-safe... */ if (!last_ra_blk || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { for (i = ra_sectors >> (sb->s_blocksize_bits - 9); i > 0; i--) { tmp = ocfs2_bread(inode, ++blk, &err, 1); if (tmp) brelse(tmp); } last_ra_blk = blk; ra_sectors = 8; } revalidate: /* If the dir block has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ if (filp->f_version != inode->i_version) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ocfs2_dir_entry *) (bh->b_data + i); /* It's too expensive to do a full * dirent test each time round this * loop, but we do have to test at * least that it is non-zero. A * failure will be detected in the * dirent test below. */ if (le16_to_cpu(de->rec_len) < OCFS2_DIR_REC_LEN(1)) break; i += le16_to_cpu(de->rec_len); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | offset; filp->f_version = inode->i_version; } while (!error && filp->f_pos < i_size_read(inode) && offset < sb->s_blocksize) { de = (struct ocfs2_dir_entry *) (bh->b_data + offset); if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { /* On error, skip the f_pos to the next block. */ filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + 1; brelse(bh); goto bail; } offset += le16_to_cpu(de->rec_len); if (le64_to_cpu(de->inode)) { /* We might block in the next section * if the data destination is * currently swapped out. So, use a * version stamp to detect whether or * not the directory has been modified * during the copy operation. */ unsigned long version = filp->f_version; unsigned char d_type = DT_UNKNOWN; if (de->file_type < OCFS2_FT_MAX) d_type = ocfs2_filetype_table[de->file_type]; error = filldir(dirent, de->name, de->name_len, filp->f_pos, ino_from_blkno(sb, le64_to_cpu(de->inode)), d_type); if (error) break; if (version != filp->f_version) goto revalidate; stored ++; } filp->f_pos += le16_to_cpu(de->rec_len); } offset = 0; brelse(bh); } stored = 0; bail: ocfs2_meta_unlock(inode, lock_level); bail_nolock: mlog_exit(stored); return stored; }