static int ocfs2_filecheck_read_inode_block_full(struct inode *inode, struct buffer_head **bh, int flags, int type) { int rc; struct buffer_head *tmp = *bh; if (!type) /* Check inode block */ rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno, 1, &tmp, flags, ocfs2_filecheck_validate_inode_block); else /* Repair inode block */ rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno, 1, &tmp, flags, ocfs2_filecheck_repair_inode_block); /* If ocfs2_read_blocks() got us a new bh, pass it up. */ if (!rc && !*bh) *bh = tmp; return rc; }
/* * Go through the dirblocks pre-filling them. We try to coalesce adjacent * ones. Don't care to return errors, because it's a cache pre-fill. */ static int try_to_cache(ocfs2_filesys *fs, struct rb_node *node, char *pre_cache_buf, int pre_cache_blocks) { int cached_blocks = 0; o2fsck_dirblock_entry *dbe; uint64_t io_blkno = 0, next_blkno = 0; int count = 0; errcode_t err; uint64_t blocks_seen = 0; o2fsck_reset_blocks_cached(); for (; node; node = rb_next(node)) { blocks_seen++; dbe = rb_entry(node, o2fsck_dirblock_entry, e_node); if (io_blkno) { assert(count); assert(next_blkno > io_blkno); if ((next_blkno == dbe->e_blkno) && (count < pre_cache_blocks)) { count++; next_blkno++; continue; } if (!o2fsck_worth_caching(count)) { io_blkno = 0; break; } err = ocfs2_read_blocks(fs, io_blkno, count, pre_cache_buf); io_blkno = 0; next_blkno = 0; if (err) break; cached_blocks += count; count = 0; } assert(!io_blkno); io_blkno = dbe->e_blkno; next_blkno = io_blkno + 1; count = 1; } /* Catch the last pre-fill buffer */ if (io_blkno && o2fsck_worth_caching(count)) { assert(count); err = ocfs2_read_blocks(fs, io_blkno, count, pre_cache_buf); if (!err) cached_blocks += count; } return cached_blocks; }
static int walk_blocks_func(ocfs2_filesys *fs, uint64_t blkno, uint64_t bcount, uint16_t ext_flags, void *priv_data) { struct walk_block *wb = priv_data; errcode_t ret; int i; uint32_t *up; ret = ocfs2_read_blocks(fs, blkno, 1, wb->buf); if (ret) { com_err("walk_blocks_func", ret, "while reading block %"PRIu64, blkno); return OCFS2_BLOCK_ABORT; } /* set every other bit */ up = (uint32_t *) wb->buf; for(i = 0; i < (fs->fs_blocksize / sizeof(uint32_t)); i++) { up[i] |= 0x55555555; wb->used += BITCOUNT(up[i]); } ret = io_write_block(fs->fs_io, blkno, 1, wb->buf); if (ret) { com_err("walk_blocks_func", ret, "while writing block %"PRIu64, blkno); return OCFS2_BLOCK_ABORT; } return 0; }
static errcode_t ocfs2_validate_ocfs1_header(ocfs2_filesys *fs) { errcode_t ret; char *blk; struct ocfs1_vol_disk_hdr *hdr; ret = ocfs2_malloc_block(fs->fs_io, &blk); if (ret) return ret; ret = ocfs2_read_blocks(fs, 0, 1, blk); if (ret) goto out; hdr = (struct ocfs1_vol_disk_hdr *)blk; ret = OCFS2_ET_OCFS_REV; if (le32_to_cpu(hdr->major_version) == OCFS1_MAJOR_VERSION) goto out; if (!memcmp(hdr->signature, OCFS1_VOLUME_SIGNATURE, strlen(OCFS1_VOLUME_SIGNATURE))) goto out; ret = 0; out: ocfs2_free(&blk); return ret; }
int ocfs2_refresh_slot_info(struct ocfs2_super *osb) { int ret; struct ocfs2_slot_info *si = osb->slot_info; if (si == NULL) return 0; BUG_ON(si->si_blocks == 0); BUG_ON(si->si_bh == NULL); trace_ocfs2_refresh_slot_info(si->si_blocks); /* * We pass -1 as blocknr because we expect all of si->si_bh to * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If * this is not true, the read of -1 (UINT64_MAX) will fail. */ ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks, si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL); if (ret == 0) { spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); spin_unlock(&osb->osb_lock); } return ret; }
int ocfs2_refresh_slot_info(struct ocfs2_super *osb) { int ret; struct ocfs2_slot_info *si = osb->slot_info; if (si == NULL) return 0; BUG_ON(si->si_blocks == 0); BUG_ON(si->si_bh == NULL); trace_ocfs2_refresh_slot_info(si->si_blocks); /* */ ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks, si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL); if (ret == 0) { spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); spin_unlock(&osb->osb_lock); } return ret; }
/* * JBD Might read a cached version of another nodes journal file. We * don't want this as this file changes often and we get no * notification on those changes. The only way to be sure that we've * got the most up to date version of those blocks then is to force * read them off disk. Just searching through the buffer cache won't * work as there may be pages backing this file which are still marked * up to date. We know things can't change on this file underneath us * as we have the lock by now :) */ static int ocfs2_force_read_journal(struct inode *inode) { int status = 0; int i; u64 v_blkno, p_blkno, p_blocks, num_blocks; #define CONCURRENT_JOURNAL_FILL 32ULL struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; mlog_entry_void(); memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); v_blkno = 0; while (v_blkno < num_blocks) { status = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, &p_blocks, NULL); if (status < 0) { mlog_errno(status); goto bail; } if (p_blocks > CONCURRENT_JOURNAL_FILL) p_blocks = CONCURRENT_JOURNAL_FILL; /* We are reading journal data which should not * be put in the uptodate cache */ status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), p_blkno, p_blocks, bhs, 0, NULL); if (status < 0) { mlog_errno(status); goto bail; } for(i = 0; i < p_blocks; i++) { brelse(bhs[i]); bhs[i] = NULL; } v_blkno += p_blocks; } bail: for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) if (bhs[i]) brelse(bhs[i]); mlog_exit(status); return status; }
int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, int flags) { int rc; struct buffer_head *tmp = *bh; rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno, 1, &tmp, flags, ocfs2_validate_inode_block); /* If ocfs2_read_blocks() got us a new bh, pass it up. */ if (!rc && !*bh) *bh = tmp; return rc; }
errcode_t ocfs2_read_extent_block_nocheck(ocfs2_filesys *fs, uint64_t blkno, char *eb_buf) { errcode_t ret; char *blk; struct ocfs2_extent_block *eb; if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) || (blkno > fs->fs_blocks)) return OCFS2_ET_BAD_BLKNO; ret = ocfs2_malloc_block(fs->fs_io, &blk); if (ret) return ret; ret = ocfs2_read_blocks(fs, blkno, 1, blk); if (ret) goto out; eb = (struct ocfs2_extent_block *)blk; ret = ocfs2_validate_meta_ecc(fs, blk, &eb->h_check); if (ret) goto out; if (memcmp(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE, strlen(OCFS2_EXTENT_BLOCK_SIGNATURE))) { ret = OCFS2_ET_BAD_EXTENT_BLOCK_MAGIC; goto out; } memcpy(eb_buf, blk, fs->fs_blocksize); eb = (struct ocfs2_extent_block *) eb_buf; ocfs2_swap_extent_block_to_cpu(fs, eb); out: ocfs2_free(&blk); return ret; }
static errcode_t read_journal_block(ocfs2_filesys *fs, struct journal_info *ji, uint64_t blkoff, char *buf, int check_dup) { errcode_t err; uint64_t blkno; err = lookup_journal_block(fs, ji, blkoff, &blkno, check_dup); if (err) return err; err = ocfs2_read_blocks(fs, blkno, 1, buf); if (err) com_err(whoami, err, "while reading block %"PRIu64" of slot " "%d's journal", blkno, ji->ji_slot); return err; }
errcode_t ocfs2_read_group_desc(ocfs2_filesys *fs, uint64_t blkno, char *gd_buf) { errcode_t ret; char *blk; struct ocfs2_group_desc *gd; if ((blkno < OCFS2_SUPER_BLOCK_BLKNO) || (blkno > fs->fs_blocks)) return OCFS2_ET_BAD_BLKNO; ret = ocfs2_malloc_block(fs->fs_io, &blk); if (ret) return ret; ret = ocfs2_read_blocks(fs, blkno, 1, blk); if (ret) goto out; gd = (struct ocfs2_group_desc *)blk; ret = ocfs2_validate_meta_ecc(fs, blk, &gd->bg_check); if (ret) goto out; ret = OCFS2_ET_BAD_GROUP_DESC_MAGIC; if (memcmp(gd->bg_signature, OCFS2_GROUP_DESC_SIGNATURE, strlen(OCFS2_GROUP_DESC_SIGNATURE))) goto out; memcpy(gd_buf, blk, fs->fs_blocksize); gd = (struct ocfs2_group_desc *)gd_buf; ocfs2_swap_group_desc_to_cpu(fs, gd); ret = 0; out: ocfs2_free(&blk); return ret; }
static int read_whole_func(ocfs2_filesys *fs, uint64_t blkno, uint64_t bcount, uint16_t ext_flags, void *priv_data) { struct read_whole_context *ctx = priv_data; if (ext_flags & OCFS2_EXT_UNWRITTEN) { memset(ctx->ptr, 0, fs->fs_blocksize); ctx->errcode = 0; } else ctx->errcode = ocfs2_read_blocks(fs, blkno, 1, ctx->ptr); if (ctx->errcode) return OCFS2_BLOCK_ABORT; ctx->ptr += fs->fs_blocksize; ctx->offset += fs->fs_blocksize; return 0; }
static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, struct ocfs2_slot_info *si) { int status = 0; u64 blkno; unsigned long long blocks, bytes = 0; unsigned int i; struct buffer_head *bh; status = ocfs2_slot_map_physical_size(osb, si->si_inode, &bytes); if (status) goto bail; blocks = ocfs2_blocks_for_bytes(si->si_inode->i_sb, bytes); BUG_ON(blocks > UINT_MAX); si->si_blocks = blocks; if (!si->si_blocks) goto bail; if (si->si_extended) si->si_slots_per_block = (osb->sb->s_blocksize / sizeof(struct ocfs2_extended_slot)); else si->si_slots_per_block = osb->sb->s_blocksize / sizeof(__le16); /* The size checks above should ensure this */ BUG_ON((osb->max_slots / si->si_slots_per_block) > blocks); trace_ocfs2_map_slot_buffers(bytes, si->si_blocks); si->si_bh = kcalloc(si->si_blocks, sizeof(struct buffer_head *), GFP_KERNEL); if (!si->si_bh) { status = -ENOMEM; mlog_errno(status); goto bail; } for (i = 0; i < si->si_blocks; i++) { status = ocfs2_extent_map_get_blocks(si->si_inode, i, &blkno, NULL, NULL); if (status < 0) { mlog_errno(status); goto bail; } trace_ocfs2_map_slot_buffers_block((unsigned long long)blkno, i); bh = NULL; /* Acquire a fresh bh */ status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno, 1, &bh, OCFS2_BH_IGNORE_CACHE, NULL); if (status < 0) { mlog_errno(status); goto bail; } si->si_bh[i] = bh; } bail: return status; }
static int ocfs2_read_locked_inode(struct inode *inode, struct ocfs2_find_inode_args *args) { struct super_block *sb; struct ocfs2_super *osb; struct ocfs2_dinode *fe; struct buffer_head *bh = NULL; int status, can_lock; u32 generation = 0; mlog_entry("(0x%p, 0x%p)\n", inode, args); status = -EINVAL; if (inode == NULL || inode->i_sb == NULL) { mlog(ML_ERROR, "bad inode\n"); return status; } sb = inode->i_sb; osb = OCFS2_SB(sb); if (!args) { mlog(ML_ERROR, "bad inode args\n"); make_bad_inode(inode); return status; } /* * To improve performance of cold-cache inode stats, we take * the cluster lock here if possible. * * Generally, OCFS2 never trusts the contents of an inode * unless it's holding a cluster lock, so taking it here isn't * a correctness issue as much as it is a performance * improvement. * * There are three times when taking the lock is not a good idea: * * 1) During startup, before we have initialized the DLM. * * 2) If we are reading certain system files which never get * cluster locks (local alloc, truncate log). * * 3) If the process doing the iget() is responsible for * orphan dir recovery. We're holding the orphan dir lock and * can get into a deadlock with another process on another * node in ->delete_inode(). * * #1 and #2 can be simply solved by never taking the lock * here for system files (which are the only type we read * during mount). It's a heavier approach, but our main * concern is user-accesible files anyway. * * #3 works itself out because we'll eventually take the * cluster lock before trusting anything anyway. */ can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) && !ocfs2_mount_local(osb); /* * To maintain backwards compatibility with older versions of * ocfs2-tools, we still store the generation value for system * files. The only ones that actually matter to userspace are * the journals, but it's easier and inexpensive to just flag * all system files similarly. */ if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) generation = osb->fs_generation; ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_inode_lockres, OCFS2_LOCK_TYPE_META, generation, inode); ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, OCFS2_LOCK_TYPE_OPEN, 0, inode); if (can_lock) { status = ocfs2_open_lock(inode); if (status) { make_bad_inode(inode); mlog_errno(status); return status; } status = ocfs2_inode_lock(inode, NULL, 0); if (status) { make_bad_inode(inode); mlog_errno(status); return status; } } if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { status = ocfs2_try_open_lock(inode, 0); if (status) { make_bad_inode(inode); return status; } } if (can_lock) status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, OCFS2_BH_IGNORE_CACHE); else status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); if (status < 0) { mlog_errno(status); goto bail; } status = -EINVAL; fe = (struct ocfs2_dinode *) bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { mlog(0, "Invalid dinode #%llu: signature = %.*s\n", (unsigned long long)args->fi_blkno, 7, fe->i_signature); goto bail; } /* * This is a code bug. Right now the caller needs to * understand whether it is asking for a system file inode or * not so the proper lock names can be built. */ mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), "Inode %llu: system file state is ambigous\n", (unsigned long long)args->fi_blkno); if (S_ISCHR(le16_to_cpu(fe->i_mode)) || S_ISBLK(le16_to_cpu(fe->i_mode))) inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); if (ocfs2_populate_inode(inode, fe, 0) < 0) goto bail; BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); status = 0; bail: if (can_lock) ocfs2_inode_unlock(inode, 0); if (status < 0) make_bad_inode(inode); if (args && bh) brelse(bh); mlog_exit(status); return status; }
errcode_t ocfs2_read_super(ocfs2_filesys *fs, uint64_t superblock, char *sb) { errcode_t ret; char *blk, *swapblk; struct ocfs2_dinode *di, *orig_super; int orig_blocksize; int blocksize = io_get_blksize(fs->fs_io); ret = ocfs2_malloc_block(fs->fs_io, &blk); if (ret) return ret; ret = ocfs2_read_blocks(fs, superblock, 1, blk); if (ret) goto out_blk; di = (struct ocfs2_dinode *)blk; ret = OCFS2_ET_BAD_MAGIC; if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE, strlen(OCFS2_SUPER_BLOCK_SIGNATURE))) goto out_blk; /* * We want to use the latest superblock to validate. We need * a local-endian copy in fs->fs_super, and the unswapped copy to * check in blk. ocfs2_validate_meta_ecc() uses fs->fs_super and * fs->fs_blocksize. */ ret = ocfs2_malloc_block(fs->fs_io, &swapblk); if (ret) goto out_blk; memcpy(swapblk, blk, blocksize); orig_super = fs->fs_super; orig_blocksize = fs->fs_blocksize; fs->fs_super = (struct ocfs2_dinode *)swapblk; fs->fs_blocksize = blocksize; ocfs2_swap_inode_to_cpu(fs, fs->fs_super); ret = ocfs2_validate_meta_ecc(fs, blk, &di->i_check); fs->fs_super = orig_super; fs->fs_blocksize = orig_blocksize; ocfs2_free(&swapblk); if (ret) goto out_blk; ocfs2_swap_inode_to_cpu(fs, di); if (!sb) fs->fs_super = di; else { memcpy(sb, blk, fs->fs_blocksize); ocfs2_free(&blk); } return 0; out_blk: ocfs2_free(&blk); return ret; }
/* * Zero the area past i_size but still within an allocated * cluster. This avoids exposing nonzero data on subsequent file * extends. */ static errcode_t ocfs2_zero_tail_for_truncate(ocfs2_cached_inode *ci, uint64_t new_size) { errcode_t ret; char *buf = NULL; ocfs2_filesys *fs = ci->ci_fs; uint64_t start_blk, p_blkno, contig_blocks, start_off; int count, byte_counts, bpc = fs->fs_clustersize /fs->fs_blocksize; uint16_t ext_flags; if (new_size == 0) return 0; start_blk = new_size / fs->fs_blocksize; ret = ocfs2_extent_map_get_blocks(ci, start_blk, 1, &p_blkno, &contig_blocks, &ext_flags); if (ret) goto out; /* Tail is a hole. */ if (!p_blkno) goto out; if (ext_flags & OCFS2_EXT_REFCOUNTED) { uint32_t cpos = ocfs2_blocks_to_clusters(fs, start_blk); ret = ocfs2_refcount_cow(ci, cpos, 1, cpos + 1); if (ret) goto out; ret = ocfs2_extent_map_get_blocks(ci, start_blk, 1, &p_blkno, &contig_blocks, &ext_flags); if (ret) goto out; assert(!(ext_flags & OCFS2_EXT_REFCOUNTED) && p_blkno); } /* calculate the total blocks we need to empty. */ count = bpc - (p_blkno & (bpc - 1)); ret = ocfs2_malloc_blocks(fs->fs_io, count, &buf); if (ret) goto out; ret = ocfs2_read_blocks(fs, p_blkno, count, buf); if (ret) goto out; /* empty the content after the new_size and within the same cluster. */ start_off = new_size % fs->fs_blocksize; byte_counts = count * fs->fs_blocksize - start_off; memset(buf + start_off, 0, byte_counts); ret = io_write_block(fs->fs_io, p_blkno, count, buf); out: if (buf) ocfs2_free(&buf); return ret; }
errcode_t ocfs2_file_read(ocfs2_cached_inode *ci, void *buf, uint32_t count, uint64_t offset, uint32_t *got) { ocfs2_filesys *fs = ci->ci_fs; errcode_t ret = 0; char *ptr = (char *) buf; uint32_t wanted_blocks; uint64_t contig_blocks; uint64_t v_blkno; uint64_t p_blkno; uint32_t tmp; uint64_t num_blocks; uint16_t extent_flags; if (ci->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL) return ocfs2_inline_data_read(ci->ci_inode, buf, count, offset, got); /* o_direct requires aligned io */ tmp = fs->fs_blocksize - 1; if ((count & tmp) || (offset & (uint64_t)tmp) || ((unsigned long)ptr & tmp)) return OCFS2_ET_INVALID_ARGUMENT; wanted_blocks = count >> OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits; v_blkno = offset >> OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits; *got = 0; num_blocks = (ci->ci_inode->i_size + fs->fs_blocksize - 1) >> OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits; if (v_blkno >= num_blocks) return 0; if (v_blkno + wanted_blocks > num_blocks) wanted_blocks = (uint32_t) (num_blocks - v_blkno); while(wanted_blocks) { ret = ocfs2_extent_map_get_blocks(ci, v_blkno, 1, &p_blkno, &contig_blocks, &extent_flags); if (ret) return ret; if (contig_blocks > wanted_blocks) contig_blocks = wanted_blocks; if (!p_blkno || extent_flags & OCFS2_EXT_UNWRITTEN) { /* * we meet with a hole or an unwritten extent, * so just empty the content. */ memset(ptr, 0, contig_blocks * fs->fs_blocksize); } else { ret = ocfs2_read_blocks(fs, p_blkno, contig_blocks, ptr); if (ret) return ret; } *got += (contig_blocks << OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits); wanted_blocks -= contig_blocks; if (wanted_blocks) { ptr += (contig_blocks << OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits); v_blkno += (uint64_t)contig_blocks; } else { if (*got + offset > ci->ci_inode->i_size) *got = (uint32_t) (ci->ci_inode->i_size - offset); /* break */ } } return ret; }