uint64_t ocfs2_get_block_from_group(ocfs2_filesys *fs, struct ocfs2_group_desc *grp, int bpc, int bit_offset) { int cpos, i; struct ocfs2_extent_rec *rec = NULL; int block_per_bit = ocfs2_clusters_to_blocks(fs, 1) / bpc; if (!ocfs2_gd_is_discontig(grp)) return grp->bg_blkno + bit_offset * block_per_bit; /* handle discontiguous group. */ cpos = bit_offset / bpc; for (i = 0; i < grp->bg_list.l_next_free_rec; i++) { rec = &grp->bg_list.l_recs[i]; if (rec->e_cpos <= cpos && rec->e_cpos + rec->e_leaf_clusters > cpos) break; } if (!rec || i == grp->bg_list.l_next_free_rec) abort(); return rec->e_blkno + (bit_offset * block_per_bit - ocfs2_clusters_to_blocks(fs, rec->e_cpos)); }
static int block_iterate_func(ocfs2_filesys *fs, struct ocfs2_extent_rec *rec, int tree_depth, uint32_t ccount, uint64_t ref_blkno, int ref_recno, void *priv_data) { struct block_context *ctxt = priv_data; uint64_t blkno, bcount, bend; int iret = 0; bcount = ocfs2_clusters_to_blocks(fs, rec->e_cpos); bend = bcount + ocfs2_clusters_to_blocks(fs, ocfs2_rec_clusters(tree_depth, rec)); for (blkno = rec->e_blkno; bcount < bend; blkno++, bcount++) { if (((bcount * fs->fs_blocksize) >= ctxt->inode->i_size) && !(ctxt->flags & OCFS2_BLOCK_FLAG_APPEND)) break; iret = (*ctxt->func)(fs, blkno, bcount, rec->e_flags, ctxt->priv_data); if (iret & OCFS2_BLOCK_ABORT) break; } return iret; }
/* given a cluster offset, calculate which block group it belongs to * and return that block offset. */ static inline u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u32 group_no; BUG_ON(!ocfs2_is_cluster_bitmap(inode)); group_no = cluster / osb->bitmap_cpg; if (!group_no) return osb->first_cluster_group_blkno; return ocfs2_clusters_to_blocks(inode->i_sb, group_no * osb->bitmap_cpg); }
errcode_t ocfs2_extent_map_get_blocks(ocfs2_cached_inode *cinode, uint64_t v_blkno, int count, uint64_t *p_blkno, uint64_t *ret_count, uint16_t *extent_flags) { errcode_t ret; int bpc; uint32_t cpos, num_clusters = -1, p_cluster = -1; uint64_t boff = 0; ocfs2_filesys *fs = cinode->ci_fs; bpc = ocfs2_clusters_to_blocks(fs, 1); cpos = ocfs2_blocks_to_clusters(fs, v_blkno); ret = ocfs2_get_clusters(cinode, cpos, &p_cluster, &num_clusters, extent_flags); if (ret) goto out; /* * p_cluster == 0 indicates a hole. */ if (p_cluster) { boff = ocfs2_clusters_to_blocks(fs, p_cluster); boff += (v_blkno & (uint64_t)(bpc - 1)); } *p_blkno = boff; if (ret_count) { *ret_count = ocfs2_clusters_to_blocks(fs, num_clusters); *ret_count -= v_blkno & (uint64_t)(bpc - 1); } out: return ret; }
static struct buffer_head * ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle, struct inode *alloc_inode, struct ocfs2_alloc_context *ac, struct ocfs2_chain_list *cl) { int status; u32 bit_off, num_bits; u64 bg_blkno; struct buffer_head *bg_bh; unsigned int alloc_rec = ocfs2_find_smallest_chain(cl); status = ocfs2_claim_clusters(handle, ac, le16_to_cpu(cl->cl_cpg), &bit_off, &num_bits); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } /* setup the group */ bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); trace_ocfs2_block_group_alloc_contig( (unsigned long long)bg_blkno, alloc_rec); bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { status = -EIO; mlog_errno(status); goto bail; } ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh); status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh, bg_blkno, num_bits, alloc_rec, cl); if (status < 0) { brelse(bg_bh); mlog_errno(status); } bail: return status ? ERR_PTR(status) : bg_bh; }
static PyObject * fs_blocks_to_clusters (Filesystem *self, PyObject *args, PyObject *kwargs) { unsigned long long blocks; uint32_t clusters; static char *kwlist[] = { "blocks", NULL }; if (!PyArg_ParseTupleAndKeywords (args, kwargs, "K:blocks_to_clusters", kwlist, &blocks)) return NULL; clusters = ocfs2_clusters_to_blocks (self->fs, blocks); return PyInt_FromLong (clusters); }
static PyObject * fs_clusters_to_blocks (Filesystem *self, PyObject *args, PyObject *kwargs) { unsigned int clusters; uint64_t blocks; static char *kwlist[] = { "clusters", NULL }; if (!PyArg_ParseTupleAndKeywords (args, kwargs, "I:clusters_to_blocks", kwlist, &clusters)) return NULL; blocks = ocfs2_clusters_to_blocks (self->fs, clusters); return PyLong_FromUnsignedLongLong (blocks); }
int ocfs2_free_clusters(handle_t *handle, struct inode *bitmap_inode, struct buffer_head *bitmap_bh, u64 start_blk, unsigned int num_clusters) { int status; u16 bg_start_bit; u64 bg_blkno; struct ocfs2_dinode *fe; /* You can't ever have a contiguous set of clusters * bigger than a block group bitmap so we never have to worry * about looping on them. */ mlog_entry_void(); /* This is expensive. We can safely remove once this stuff has * gotten tested really well. */ BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); fe = (struct ocfs2_dinode *) bitmap_bh->b_data; ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, &bg_start_bit); mlog(0, "want to free %u clusters starting at block %llu\n", num_clusters, (unsigned long long)start_blk); mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", (unsigned long long)bg_blkno, bg_start_bit); status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, bg_start_bit, bg_blkno, num_clusters); if (status < 0) mlog_errno(status); mlog_exit(status); return status; }
/* Check to see if the local alloc window is within ac->ac_max_block */ static int ocfs2_local_alloc_in_range(struct inode *inode, struct ocfs2_alloc_context *ac, u32 bits_wanted) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *alloc; struct ocfs2_local_alloc *la; int start; u64 block_off; if (!ac->ac_max_block) return 1; alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; la = OCFS2_LOCAL_ALLOC(alloc); start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); if (start == -1) { mlog_errno(-ENOSPC); return 0; } /* * Converting (bm_off + start + bits_wanted) to blocks gives us * the blkno just past our actual allocation. This is perfect * to compare with ac_max_block. */ block_off = ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(la->la_bm_off) + start + bits_wanted); mlog(0, "Checking %llu against %llu\n", (unsigned long long)block_off, (unsigned long long)ac->ac_max_block); if (block_off > ac->ac_max_block) return 0; return 1; }
/* return 0 on success, -ENOSPC to keep searching and any other < 0 * value on error. */ static int ocfs2_cluster_group_search(struct inode *inode, struct buffer_head *group_bh, u32 bits_wanted, u32 min_bits, u64 max_block, u16 *bit_off, u16 *bits_found) { int search = -ENOSPC; int ret; u64 blkoff; struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); u16 tmp_off, tmp_found; unsigned int max_bits, gd_cluster_off; BUG_ON(!ocfs2_is_cluster_bitmap(inode)); if (gd->bg_free_bits_count) { max_bits = le16_to_cpu(gd->bg_bits); /* Tail groups in cluster bitmaps which aren't cpg * aligned are prone to partial extention by a failed * fs resize. If the file system resize never got to * update the dinode cluster count, then we don't want * to trust any clusters past it, regardless of what * the group descriptor says. */ gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb, le64_to_cpu(gd->bg_blkno)); if ((gd_cluster_off + max_bits) > OCFS2_I(inode)->ip_clusters) { max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", (unsigned long long)le64_to_cpu(gd->bg_blkno), le16_to_cpu(gd->bg_bits), OCFS2_I(inode)->ip_clusters, max_bits); } ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, max_bits, &tmp_off, &tmp_found); if (ret) return ret; if (max_block) { blkoff = ocfs2_clusters_to_blocks(inode->i_sb, gd_cluster_off + tmp_off + tmp_found); mlog(0, "Checking %llu against %llu\n", (unsigned long long)blkoff, (unsigned long long)max_block); if (blkoff > max_block) return -ENOSPC; } /* ocfs2_block_group_find_clear_bits() might * return success, but we still want to return * -ENOSPC unless it found the minimum number * of bits. */ if (min_bits <= tmp_found) { *bit_off = tmp_off; *bits_found = tmp_found; search = 0; /* success */ } else if (tmp_found) { /* * Don't show bits which we'll be returning * for allocation to the local alloc bitmap. */ ocfs2_local_alloc_seen_free_bits(osb, tmp_found); } } return search; }
/* * sync the local alloc to main bitmap. * * assumes you've already locked the main bitmap -- the bitmap inode * passed is used for caching. */ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_dinode *alloc, struct inode *main_bm_inode, struct buffer_head *main_bm_bh) { int status = 0; int bit_off, left, count, start; u64 la_start_blk; u64 blkno; void *bitmap; struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); mlog_entry("total = %u, used = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total), le32_to_cpu(alloc->id1.bitmap1.i_used)); if (!alloc->id1.bitmap1.i_total) { mlog(0, "nothing to sync!\n"); goto bail; } if (le32_to_cpu(alloc->id1.bitmap1.i_used) == le32_to_cpu(alloc->id1.bitmap1.i_total)) { mlog(0, "all bits were taken!\n"); goto bail; } la_start_blk = ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(la->la_bm_off)); bitmap = la->la_bitmap; start = count = bit_off = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) != -1) { if ((bit_off < left) && (bit_off == start)) { count++; start++; continue; } if (count) { blkno = la_start_blk + ocfs2_clusters_to_blocks(osb->sb, start - count); mlog(0, "freeing %u bits starting at local alloc bit " "%u (la_start_blk = %llu, blkno = %llu)\n", count, start - count, (unsigned long long)la_start_blk, (unsigned long long)blkno); status = ocfs2_free_clusters(handle, main_bm_inode, main_bm_bh, blkno, count); if (status < 0) { mlog_errno(status); goto bail; } } if (bit_off >= left) break; count = 1; start = bit_off + 1; } bail: mlog_exit(status); return status; }
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; int status; struct ocfs2_dinode *fe = NULL; struct buffer_head *bh = NULL; struct buffer_head *buffer_cache_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); void *kaddr; trace_ocfs2_symlink_get_block( (unsigned long long)OCFS2_I(inode)->ip_blkno, (unsigned long long)iblock, bh_result, create); BUG_ON(ocfs2_inode_is_fast_symlink(inode)); if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { mlog(ML_ERROR, "block offset > PATH_MAX: %llu", (unsigned long long)iblock); goto bail; } status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto bail; } fe = (struct ocfs2_dinode *) bh->b_data; if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { mlog(ML_ERROR, "block offset is outside the allocated size: " "%llu\n", (unsigned long long)iblock); goto bail; } if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock; buffer_cache_bh = sb_getblk(osb->sb, blkno); if (!buffer_cache_bh) { mlog(ML_ERROR, "couldn't getblock for symlink!\n"); goto bail; } if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) { kaddr = kmap_atomic(bh_result->b_page); if (!kaddr) { mlog(ML_ERROR, "couldn't kmap!\n"); goto bail; } memcpy(kaddr + (bh_result->b_size * iblock), buffer_cache_bh->b_data, bh_result->b_size); kunmap_atomic(kaddr); set_buffer_uptodate(bh_result); } brelse(buffer_cache_bh); } map_bh(bh_result, inode->i_sb, le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); err = 0; bail: brelse(bh); return err; }
static struct buffer_head * ocfs2_block_group_alloc_discontig(handle_t *handle, struct inode *alloc_inode, struct ocfs2_alloc_context *ac, struct ocfs2_chain_list *cl) { int status; u32 bit_off, num_bits; u64 bg_blkno; unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1; struct buffer_head *bg_bh = NULL; unsigned int alloc_rec = ocfs2_find_smallest_chain(cl); struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); if (!ocfs2_supports_discontig_bg(osb)) { status = -ENOSPC; goto bail; } status = ocfs2_extend_trans(handle, ocfs2_calc_bg_discontig_credits(osb->sb)); if (status) { mlog_errno(status); goto bail; } /* * We're going to be grabbing from multiple cluster groups. * We don't have enough credits to relink them all, and the * cluster groups will be staying in cache for the duration of * this operation. */ ac->ac_allow_chain_relink = 0; /* Claim the first region */ status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, &bit_off, &num_bits); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } min_bits = num_bits; /* setup the group */ bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); trace_ocfs2_block_group_alloc_discontig( (unsigned long long)bg_blkno, alloc_rec); bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { status = -EIO; mlog_errno(status); goto bail; } ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh); status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh, bg_blkno, num_bits, alloc_rec, cl); if (status < 0) { mlog_errno(status); goto bail; } status = ocfs2_block_group_grow_discontig(handle, alloc_inode, bg_bh, ac, cl, min_bits); if (status) mlog_errno(status); bail: if (status) ocfs2_bg_alloc_cleanup(handle, ac, alloc_inode, bg_bh); return status ? ERR_PTR(status) : bg_bh; }
static int ocfs2_block_group_grow_discontig(handle_t *handle, struct inode *alloc_inode, struct buffer_head *bg_bh, struct ocfs2_alloc_context *ac, struct ocfs2_chain_list *cl, unsigned int min_bits) { int status; struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *)bg_bh->b_data; unsigned int needed = le16_to_cpu(cl->cl_cpg) - le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc); u32 p_cpos, clusters; u64 p_blkno; struct ocfs2_extent_list *el = &bg->bg_list; status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), bg_bh, OCFS2_JOURNAL_ACCESS_CREATE); if (status < 0) { mlog_errno(status); goto bail; } while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) < le16_to_cpu(el->l_count))) { if (min_bits > needed) min_bits = needed; status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits, &p_cpos, &clusters); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } p_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cpos); ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno, clusters); min_bits = clusters; needed = le16_to_cpu(cl->cl_cpg) - le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc); } if (needed > 0) { /* * We have used up all the extent rec but can't fill up * the cpg. So bail out. */ status = -ENOSPC; goto bail; } ocfs2_journal_dirty(handle, bg_bh); bail: return status; }
/* * We expect the block group allocator to already be locked. */ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, struct inode *alloc_inode, struct buffer_head *bh) { int status, credits; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; struct ocfs2_chain_list *cl; struct ocfs2_alloc_context *ac = NULL; handle_t *handle = NULL; u32 bit_off, num_bits; u16 alloc_rec; u64 bg_blkno; struct buffer_head *bg_bh = NULL; struct ocfs2_group_desc *bg; BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); mlog_entry_void(); cl = &fe->id2.i_chain; status = ocfs2_reserve_clusters(osb, le16_to_cpu(cl->cl_cpg), &ac); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } credits = ocfs2_calc_group_alloc_credits(osb->sb, le16_to_cpu(cl->cl_cpg)); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); handle = NULL; mlog_errno(status); goto bail; } status = ocfs2_claim_clusters(osb, handle, ac, le16_to_cpu(cl->cl_cpg), &bit_off, &num_bits); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto bail; } alloc_rec = ocfs2_find_smallest_chain(cl); /* setup the group */ bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); mlog(0, "new descriptor, record %u, at block %llu\n", alloc_rec, (unsigned long long)bg_blkno); bg_bh = sb_getblk(osb->sb, bg_blkno); if (!bg_bh) { status = -EIO; mlog_errno(status); goto bail; } ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh, bg_blkno, alloc_rec, cl); if (status < 0) { mlog_errno(status); goto bail; } bg = (struct ocfs2_group_desc *) bg_bh->b_data; status = ocfs2_journal_access(handle, alloc_inode, bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail; } le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, le16_to_cpu(bg->bg_free_bits_count)); le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) le16_add_cpu(&cl->cl_next_free_rec, 1); le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - le16_to_cpu(bg->bg_free_bits_count)); le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); status = ocfs2_journal_dirty(handle, bh); if (status < 0) { mlog_errno(status); goto bail; } spin_lock(&OCFS2_I(alloc_inode)->ip_lock); OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, le32_to_cpu(fe->i_clusters))); spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); status = 0; bail: if (handle) ocfs2_commit_trans(osb, handle); if (ac) ocfs2_free_alloc_context(ac); if (bg_bh) brelse(bg_bh); mlog_exit(status); return status; }
/* * sync the local alloc to main bitmap. * * assumes you've already locked the main bitmap -- the bitmap inode * passed is used for caching. */ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_dinode *alloc, struct inode *main_bm_inode, struct buffer_head *main_bm_bh) { int status = 0; int bit_off, left, count, start; u64 la_start_blk; u64 blkno; void *bitmap; struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); trace_ocfs2_sync_local_to_main( le32_to_cpu(alloc->id1.bitmap1.i_total), le32_to_cpu(alloc->id1.bitmap1.i_used)); if (!alloc->id1.bitmap1.i_total) { goto bail; } if (le32_to_cpu(alloc->id1.bitmap1.i_used) == le32_to_cpu(alloc->id1.bitmap1.i_total)) { goto bail; } la_start_blk = ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(la->la_bm_off)); bitmap = la->la_bitmap; start = count = bit_off = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) != -1) { if ((bit_off < left) && (bit_off == start)) { count++; start++; continue; } if (count) { blkno = la_start_blk + ocfs2_clusters_to_blocks(osb->sb, start - count); trace_ocfs2_sync_local_to_main_free( count, start - count, (unsigned long long)la_start_blk, (unsigned long long)blkno); status = ocfs2_release_clusters(handle, main_bm_inode, main_bm_bh, blkno, count); if (status < 0) { mlog_errno(status); goto bail; } } if (bit_off >= left) break; count = 1; start = bit_off + 1; } bail: if (status) mlog_errno(status); return status; }
static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int err = -EIO; int status; struct ocfs2_dinode *fe = NULL; struct buffer_head *bh = NULL; struct buffer_head *buffer_cache_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); void *kaddr; mlog_entry("(0x%p, %llu, 0x%p, %d)\n", inode, (unsigned long long)iblock, bh_result, create); BUG_ON(ocfs2_inode_is_fast_symlink(inode)); if ((iblock << inode->i_sb->s_blocksize_bits) > PATH_MAX + 1) { mlog(ML_ERROR, "block offset > PATH_MAX: %llu", (unsigned long long)iblock); goto bail; } status = ocfs2_read_inode_block(inode, &bh); if (status < 0) { mlog_errno(status); goto bail; } fe = (struct ocfs2_dinode *) bh->b_data; if ((u64)iblock >= ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(fe->i_clusters))) { mlog(ML_ERROR, "block offset is outside the allocated size: " "%llu\n", (unsigned long long)iblock); goto bail; } /* We don't use the page cache to create symlink data, so if * need be, copy it over from the buffer cache. */ if (!buffer_uptodate(bh_result) && ocfs2_inode_is_new(inode)) { u64 blkno = le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock; buffer_cache_bh = sb_getblk(osb->sb, blkno); if (!buffer_cache_bh) { mlog(ML_ERROR, "couldn't getblock for symlink!\n"); goto bail; } /* we haven't locked out transactions, so a commit * could've happened. Since we've got a reference on * the bh, even if it commits while we're doing the * copy, the data is still good. */ if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) { kaddr = kmap_atomic(bh_result->b_page, KM_USER0); if (!kaddr) { mlog(ML_ERROR, "couldn't kmap!\n"); goto bail; } memcpy(kaddr + (bh_result->b_size * iblock), buffer_cache_bh->b_data, bh_result->b_size); kunmap_atomic(kaddr, KM_USER0); set_buffer_uptodate(bh_result); } brelse(buffer_cache_bh); } map_bh(bh_result, inode->i_sb, le64_to_cpu(fe->id2.i_list.l_recs[0].e_blkno) + iblock); err = 0; bail: brelse(bh); mlog_exit(err); return err; }
static int __ocfs2_move_extent(handle_t *handle, struct ocfs2_move_extents_context *context, u32 cpos, u32 len, u32 p_cpos, u32 new_p_cpos, int ext_flags) { int ret = 0, index; struct inode *inode = context->inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_extent_rec *rec, replace_rec; struct ocfs2_path *path = NULL; struct ocfs2_extent_list *el; u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos, p_cpos, new_p_cpos, len); if (ret) { mlog_errno(ret); goto out; } memset(&replace_rec, 0, sizeof(replace_rec)); replace_rec.e_cpos = cpu_to_le32(cpos); replace_rec.e_leaf_clusters = cpu_to_le16(len); replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb, new_p_cpos)); path = ocfs2_new_path_from_et(&context->et); if (!path) { ret = -ENOMEM; mlog_errno(ret); goto out; } ret = ocfs2_find_path(INODE_CACHE(inode), path, cpos); if (ret) { mlog_errno(ret); goto out; } el = path_leaf_el(path); index = ocfs2_search_extent_list(el, cpos); if (index == -1) { ret = ocfs2_error(inode->i_sb, "Inode %llu has an extent at cpos %u which can no longer be found\n", (unsigned long long)ino, cpos); goto out; } rec = &el->l_recs[index]; BUG_ON(ext_flags != rec->e_flags); /* * after moving/defraging to new location, the extent is not going * to be refcounted anymore. */ replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), context->et.et_root_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (ret) { mlog_errno(ret); goto out; } ret = ocfs2_split_extent(handle, &context->et, path, index, &replace_rec, context->meta_ac, &context->dealloc); if (ret) { mlog_errno(ret); goto out; } ocfs2_journal_dirty(handle, context->et.et_root_bh); context->new_phys_cpos = new_p_cpos; /* * need I to append truncate log for old clusters? */ if (old_blkno) { if (ext_flags & OCFS2_EXT_REFCOUNTED) ret = ocfs2_decrease_refcount(inode, handle, ocfs2_blocks_to_clusters(osb->sb, old_blkno), len, context->meta_ac, &context->dealloc, 1); else ret = ocfs2_truncate_log_append(osb, handle, old_blkno, len); } ocfs2_update_inode_fsync_trans(handle, inode, 0); out: ocfs2_free_path(path); return ret; }
static void mess_up_sys_file(ocfs2_filesys *fs, uint64_t blkno, enum fsck_type type) { errcode_t ret; char *buf = NULL, *bufgroup = NULL; struct ocfs2_dinode *di; struct ocfs2_chain_list *cl; struct ocfs2_chain_rec *cr; uint64_t oldblkno; struct ocfs2_group_desc *bg = NULL; ret = ocfs2_malloc_block(fs->fs_io, &buf); if (ret) FSWRK_COM_FATAL(progname, ret); ret = ocfs2_read_inode(fs, blkno, buf); if (ret) FSWRK_COM_FATAL(progname, ret); di = (struct ocfs2_dinode *)buf; if (!(di->i_flags & OCFS2_BITMAP_FL)) FSWRK_COM_FATAL(progname, ret); if (!(di->i_flags & OCFS2_CHAIN_FL)) FSWRK_COM_FATAL(progname, ret); cl = &(di->id2.i_chain); /* for CHAIN_EMPTY, CHAIN_HEAD_LINK_RANGE, CHAIN_LINK_RANGE, * CHAIN_BITS, CHAIN_LINK_GEN, CHAIN_LINK_MAGIC, * we need to corrupt some chain rec, so check it first. */ if (type == CHAIN_EMPTY || type == CHAIN_HEAD_LINK_RANGE || type == CHAIN_LINK_RANGE || type == CHAIN_BITS || type == CHAIN_LINK_GEN || type == CHAIN_LINK_MAGIC) if (!cl->cl_next_free_rec) { FSWRK_WARN("No chain record found at block#%"PRIu64 ",so can't corrupt it for type[%d].\n", blkno, type); goto bail; } switch (type) { case CHAIN_COUNT: fprintf(stdout, "Corrupt CHAIN_COUNT: " "Modified cl_count " "in block#%"PRIu64" from %u to %u\n", blkno, cl->cl_count, (cl->cl_count + 100)); cl->cl_count += 100; break; case CHAIN_NEXT_FREE: fprintf(stdout, "Corrupt CHAIN_NEXT_FREE:" " Modified cl_next_free_rec " "in block#%"PRIu64" from %u to %u\n", blkno, cl->cl_next_free_rec, (cl->cl_count + 10)); cl->cl_next_free_rec = cl->cl_count + 10; break; case CHAIN_EMPTY: cr = cl->cl_recs; fprintf(stdout, "Corrupt CHAIN_EMPTY:" " Modified e_blkno " "in block#%"PRIu64" from %"PRIu64" to 0\n", blkno, (uint64_t)cr->c_blkno); cr->c_blkno = 0; break; case CHAIN_I_CLUSTERS: fprintf(stdout, "Corrupt CHAIN_I_CLUSTERS:" "change i_clusters in block#%"PRIu64" from %u to %u\n", blkno, di->i_clusters, (di->i_clusters + 10)); di->i_clusters += 10; break; case CHAIN_I_SIZE: fprintf(stdout, "Corrupt CHAIN_I_SIZE:" "change i_size " "in block#%"PRIu64" from %"PRIu64" to %"PRIu64"\n", blkno, (uint64_t)di->i_size, ((uint64_t)di->i_size + 10)); di->i_size += 10; break; case CHAIN_GROUP_BITS: fprintf(stdout, "Corrupt CHAIN_GROUP_BITS:" "change i_used of bitmap " "in block#%"PRIu64" from %u to %u\n", blkno, di->id1.bitmap1.i_used, (di->id1.bitmap1.i_used + 10)); di->id1.bitmap1.i_used += 10; break; case CHAIN_HEAD_LINK_RANGE: cr = cl->cl_recs; oldblkno = cr->c_blkno; cr->c_blkno = ocfs2_clusters_to_blocks(fs, fs->fs_clusters) + 10; fprintf(stdout, "Corrupt CHAIN_HEAD_LINK_RANGE:" "change " "in block#%"PRIu64" from %"PRIu64" to %"PRIu64"\n", blkno, oldblkno, (uint64_t)cr->c_blkno); break; case CHAIN_LINK_GEN: case CHAIN_LINK_MAGIC: case CHAIN_LINK_RANGE: ret = ocfs2_malloc_block(fs->fs_io, &bufgroup); if (ret) FSWRK_COM_FATAL(progname, ret); bg = (struct ocfs2_group_desc *)bufgroup; cr = cl->cl_recs; ret = ocfs2_read_group_desc(fs, cr->c_blkno, (char *)bg); if (ret) FSWRK_COM_FATAL(progname, ret); if (type == CHAIN_LINK_GEN) { fprintf(stdout, "Corrupt CHAIN_LINK_GEN: " "change generation num from %u to 0x1234\n", bg->bg_generation); bg->bg_generation = 0x1234; } else if (type == CHAIN_LINK_MAGIC) { fprintf(stdout, "Corrupt CHAIN_LINK_MAGIC: " "change signature to '1234'\n"); sprintf((char *)bg->bg_signature,"1234"); } else { oldblkno = bg->bg_next_group; bg->bg_next_group = ocfs2_clusters_to_blocks(fs, fs->fs_clusters) + 10; fprintf(stdout, "Corrupt CHAIN_LINK_RANGE: " "change next group from %"PRIu64" to %"PRIu64 " \n", oldblkno, (uint64_t)bg->bg_next_group); } ret = ocfs2_write_group_desc(fs, cr->c_blkno, (char *)bg); if (ret) FSWRK_COM_FATAL(progname, ret); break; case CHAIN_BITS: cr = cl->cl_recs; fprintf(stdout, "Corrupt CHAIN_BITS:" "change inode#%"PRIu64" c_total from %u to %u\n", blkno, cr->c_total, (cr->c_total + 10)); cr->c_total += 10; break; case CHAIN_CPG: fprintf(stdout, "Corrupt CHAIN_CPG: " "change cl_cpg of global_bitmap from %u to %u.\n", cl->cl_cpg, (cl->cl_cpg + 16)); cl->cl_cpg += 16; cl->cl_next_free_rec = 1; break; default: FSWRK_FATAL("Unknown fsck_type[%d]\n", type); } ret = ocfs2_write_inode(fs, blkno, buf); if (ret) FSWRK_COM_FATAL(progname, ret); bail: if (bufgroup) ocfs2_free(&bufgroup); if (buf) ocfs2_free(&buf); return ; }
errcode_t ocfs2_allocate_unwritten_extents(ocfs2_filesys *fs, uint64_t ino, uint64_t offset, uint64_t len) { errcode_t ret = 0; uint32_t n_clusters = 0, cpos; uint64_t p_blkno = 0, v_blkno, v_end, contig_blocks, wanted_blocks; ocfs2_cached_inode *ci = NULL; if (!(fs->fs_flags & OCFS2_FLAG_RW)) return OCFS2_ET_RO_FILESYS; if (!ocfs2_writes_unwritten_extents(OCFS2_RAW_SB(fs->fs_super))) return OCFS2_ET_RO_UNSUPP_FEATURE; ret = ocfs2_read_cached_inode(fs, ino, &ci); if (ret) goto out; if (!(ci->ci_inode->i_flags & OCFS2_VALID_FL)) return OCFS2_ET_INODE_NOT_VALID; if (ci->ci_inode->i_flags & OCFS2_SYSTEM_FL) return OCFS2_ET_INVALID_ARGUMENT; if (!S_ISREG(ci->ci_inode->i_mode)) return OCFS2_ET_INVALID_ARGUMENT; v_blkno = offset / fs->fs_blocksize; v_end = (offset + len - 1) / fs->fs_blocksize; while (v_blkno <= v_end) { ret = ocfs2_extent_map_get_blocks(ci, v_blkno, 1, &p_blkno, &contig_blocks, NULL); if (ret) continue; if (p_blkno) { v_blkno += contig_blocks; continue; } /* * There is a hole, so we have to allocate the space and * insert the unwritten extents. */ wanted_blocks = ocfs2_min(contig_blocks, v_end - v_blkno + 1); n_clusters = ocfs2_clusters_in_blocks(fs, wanted_blocks); ret = ocfs2_new_clusters(fs, 1, n_clusters, &p_blkno, &n_clusters); if (ret || n_clusters == 0) break; cpos = ocfs2_blocks_to_clusters(fs, v_blkno); ret = ocfs2_cached_inode_insert_extent(ci, cpos, p_blkno, n_clusters, OCFS2_EXT_UNWRITTEN); if (ret) { /* * XXX: We don't wan't to overwrite the error * from insert_extent(). But we probably need * to BE LOUDLY UPSET. */ ocfs2_free_clusters(fs, n_clusters, p_blkno); goto out; } /* save up what we have done. */ ret = ocfs2_write_cached_inode(fs, ci); if (ret) goto out; v_blkno = ocfs2_clusters_to_blocks(fs, cpos + n_clusters); } if (ci->ci_inode->i_size <= offset + len) { ci->ci_inode->i_size = offset + len; ret = ocfs2_write_cached_inode(fs, ci); } out: if (ci) ocfs2_free_cached_inode(fs, ci); return ret; }
/* * Using one journal handle to guarantee the data consistency in case * crash happens anywhere. * * XXX: defrag can end up with finishing partial extent as requested, * due to not enough contiguous clusters can be found in allocator. */ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) { int ret, credits = 0, extra_blocks = 0, partial = context->partial; handle_t *handle; struct inode *inode = context->inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_refcount_tree *ref_tree = NULL; u32 new_phys_cpos, new_len; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); BUG_ON(!context->refcount_loc); ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, &ref_tree, NULL); if (ret) { mlog_errno(ret); return ret; } ret = ocfs2_prepare_refcount_change_for_del(inode, context->refcount_loc, phys_blkno, *len, &credits, &extra_blocks); if (ret) { mlog_errno(ret); goto out; } } ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, &context->meta_ac, &context->data_ac, extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; } /* * should be using allocation reservation strategy there? * * if (context->data_ac) * context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv; */ inode_lock(tl_inode); if (ocfs2_truncate_log_needs_flush(osb)) { ret = __ocfs2_flush_truncate_log(osb); if (ret < 0) { mlog_errno(ret); goto out_unlock_mutex; } } handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out_unlock_mutex; } ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, &new_phys_cpos, &new_len); if (ret) { mlog_errno(ret); goto out_commit; } /* * allowing partial extent moving is kind of 'pros and cons', it makes * whole defragmentation less likely to fail, on the contrary, the bad * thing is it may make the fs even more fragmented after moving, let * userspace make a good decision here. */ if (new_len != *len) { mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); if (!partial) { context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; ret = -ENOSPC; goto out_commit; } } mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, phys_cpos, new_phys_cpos); ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, new_phys_cpos, ext_flags); if (ret) mlog_errno(ret); if (partial && (new_len != *len)) *len = new_len; /* * Here we should write the new page out first if we are * in write-back mode. */ ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); if (ret) mlog_errno(ret); out_commit: ocfs2_commit_trans(osb, handle); out_unlock_mutex: inode_unlock(tl_inode); if (context->data_ac) { ocfs2_free_alloc_context(context->data_ac); context->data_ac = NULL; } if (context->meta_ac) { ocfs2_free_alloc_context(context->meta_ac); context->meta_ac = NULL; } out: if (ref_tree) ocfs2_unlock_refcount_tree(osb, ref_tree, 1); return ret; }
static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, u32 len, int ext_flags) { int ret, credits = 0, extra_blocks = 0, goal_bit = 0; handle_t *handle; struct inode *inode = context->inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; struct inode *gb_inode = NULL; struct buffer_head *gb_bh = NULL; struct buffer_head *gd_bh = NULL; struct ocfs2_group_desc *gd; struct ocfs2_refcount_tree *ref_tree = NULL; u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb, context->range->me_threshold); u64 phys_blkno, new_phys_blkno; phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); BUG_ON(!context->refcount_loc); ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, &ref_tree, NULL); if (ret) { mlog_errno(ret); return ret; } ret = ocfs2_prepare_refcount_change_for_del(inode, context->refcount_loc, phys_blkno, len, &credits, &extra_blocks); if (ret) { mlog_errno(ret); goto out; } } ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, &context->meta_ac, NULL, extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; } /* * need to count 2 extra credits for global_bitmap inode and * group descriptor. */ credits += OCFS2_INODE_UPDATE_CREDITS + 1; /* * ocfs2_move_extent() didn't reserve any clusters in lock_allocators() * logic, while we still need to lock the global_bitmap. */ gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!gb_inode) { mlog(ML_ERROR, "unable to get global_bitmap inode\n"); ret = -EIO; goto out; } inode_lock(gb_inode); ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); if (ret) { mlog_errno(ret); goto out_unlock_gb_mutex; } inode_lock(tl_inode); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out_unlock_tl_inode; } new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT, &goal_bit, &gd_bh); if (ret) { mlog_errno(ret); goto out_commit; } /* * probe the victim cluster group to find a proper * region to fit wanted movement, it even will perfrom * a best-effort attempt by compromising to a threshold * around the goal. */ ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, new_phys_cpos); if (!*new_phys_cpos)
static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, u32 cpos, u32 phys_cpos, u32 *new_phys_cpos, u32 len, int ext_flags) { int ret, credits = 0, extra_blocks = 0, goal_bit = 0; handle_t *handle; struct inode *inode = context->inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; struct inode *gb_inode = NULL; struct buffer_head *gb_bh = NULL; struct buffer_head *gd_bh = NULL; struct ocfs2_group_desc *gd; struct ocfs2_refcount_tree *ref_tree = NULL; u32 move_max_hop = ocfs2_blocks_to_clusters(inode->i_sb, context->range->me_threshold); u64 phys_blkno, new_phys_blkno; phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); if ((ext_flags & OCFS2_EXT_REFCOUNTED) && len) { BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); BUG_ON(!context->refcount_loc); ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, &ref_tree, NULL); if (ret) { mlog_errno(ret); return ret; } ret = ocfs2_prepare_refcount_change_for_del(inode, context->refcount_loc, phys_blkno, len, &credits, &extra_blocks); if (ret) { mlog_errno(ret); goto out; } } ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1, &context->meta_ac, NULL, extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; } credits += OCFS2_INODE_UPDATE_CREDITS + 1; gb_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); if (!gb_inode) { mlog(ML_ERROR, "unable to get global_bitmap inode\n"); ret = -EIO; goto out; } mutex_lock(&gb_inode->i_mutex); ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1); if (ret) { mlog_errno(ret); goto out_unlock_gb_mutex; } mutex_lock(&tl_inode->i_mutex); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out_unlock_tl_inode; } new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos); ret = ocfs2_find_victim_alloc_group(inode, new_phys_blkno, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT, &goal_bit, &gd_bh); if (ret) { mlog_errno(ret); goto out_commit; } ocfs2_probe_alloc_group(inode, gd_bh, &goal_bit, len, move_max_hop, new_phys_cpos); if (!*new_phys_cpos) { ret = -ENOSPC; goto out_commit; } ret = __ocfs2_move_extent(handle, context, cpos, len, phys_cpos, *new_phys_cpos, ext_flags); if (ret) { mlog_errno(ret); goto out_commit; } gd = (struct ocfs2_group_desc *)gd_bh->b_data; ret = ocfs2_alloc_dinode_update_counts(gb_inode, handle, gb_bh, len, le16_to_cpu(gd->bg_chain)); if (ret) { mlog_errno(ret); goto out_commit; } ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, goal_bit, len); if (ret) mlog_errno(ret); ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, len); if (ret) mlog_errno(ret); out_commit: ocfs2_commit_trans(osb, handle); brelse(gd_bh); out_unlock_tl_inode: mutex_unlock(&tl_inode->i_mutex); ocfs2_inode_unlock(gb_inode, 1); out_unlock_gb_mutex: mutex_unlock(&gb_inode->i_mutex); brelse(gb_bh); iput(gb_inode); out: if (context->meta_ac) { ocfs2_free_alloc_context(context->meta_ac); context->meta_ac = NULL; } if (ref_tree) ocfs2_unlock_refcount_tree(osb, ref_tree, 1); return ret; }
int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, int count, u64 *p_blkno, int *ret_count) { int ret; u64 boff; u32 cpos, clusters; int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); struct ocfs2_extent_map_entry *ent = NULL; struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map; struct ocfs2_extent_rec *rec; *p_blkno = 0; cpos = ocfs2_blocks_to_clusters(inode->i_sb, v_blkno); clusters = ocfs2_blocks_to_clusters(inode->i_sb, (u64)count + bpc - 1); if ((cpos + clusters) > OCFS2_I(inode)->ip_clusters) { ret = -EINVAL; mlog_errno(ret); return ret; } if ((cpos + clusters) > em->em_clusters) { /* * Size changed underneath us on disk. Drop any * straddling records and update our idea of * i_clusters */ ocfs2_extent_map_drop(inode, em->em_clusters - 1); em->em_clusters = OCFS2_I(inode)->ip_clusters; } ret = ocfs2_extent_map_lookup_read(inode, cpos, clusters, &ent); if (ret) { mlog_errno(ret); return ret; } if (ent) { rec = &ent->e_rec; /* We should never find ourselves straddling an interval */ if (!ocfs2_extent_rec_contains_clusters(rec, cpos, clusters)) { ret = -ESRCH; mlog_errno(ret); return ret; } boff = ocfs2_clusters_to_blocks(inode->i_sb, cpos - le32_to_cpu(rec->e_cpos)); boff += (v_blkno & (u64)(bpc - 1)); *p_blkno = le64_to_cpu(rec->e_blkno) + boff; if (ret_count) { *ret_count = ocfs2_clusters_to_blocks(inode->i_sb, le32_to_cpu(rec->e_clusters)) - boff; } return 0; } return -ENOENT; }
/* the caller will check if er->e_blkno is out of range to determine if it * should try removing the record */ static errcode_t check_er(o2fsck_state *ost, struct extent_info *ei, struct ocfs2_dinode *di, struct ocfs2_extent_list *el, struct ocfs2_extent_rec *er, int *changed) { errcode_t ret = 0; uint64_t first_block; uint32_t last_cluster, clusters; clusters = ocfs2_rec_clusters(el->l_tree_depth, er); verbosef("cpos %u clusters %u blkno %"PRIu64"\n", er->e_cpos, clusters, (uint64_t)er->e_blkno); if (ocfs2_block_out_of_range(ost->ost_fs, er->e_blkno)) goto out; if (el->l_tree_depth) { int is_valid = 0; /* we only expect a given depth when we descend to extent blocks * from a previous depth. these start at 0 when the inode * is checked */ ei->ei_expect_depth = 1; ei->ei_expected_depth = el->l_tree_depth - 1; check_eb(ost, ei, di, er->e_blkno, &is_valid); if (!is_valid && prompt(ost, PY, PR_EXTENT_EB_INVALID, "The extent record for cluster offset " "%"PRIu32" in inode %"PRIu64" refers to an invalid " "extent block at %"PRIu64". Clear the reference " "to this invalid block?", er->e_cpos, (uint64_t)di->i_blkno, (uint64_t)er->e_blkno)) { er->e_blkno = 0; *changed = 1; } ret = 0; goto out; } if (!ocfs2_writes_unwritten_extents(OCFS2_RAW_SB(ost->ost_fs->fs_super)) && (er->e_flags & OCFS2_EXT_UNWRITTEN) && prompt(ost, PY, PR_EXTENT_MARKED_UNWRITTEN, "The extent record for cluster offset %"PRIu32" " "in inode %"PRIu64" has the UNWRITTEN flag set, but " "this filesystem does not support unwritten extents. " "Clear the UNWRITTEN flag?", er->e_cpos, (uint64_t)di->i_blkno)) { er->e_flags &= ~OCFS2_EXT_UNWRITTEN; } first_block = ocfs2_blocks_to_clusters(ost->ost_fs, er->e_blkno); first_block = ocfs2_clusters_to_blocks(ost->ost_fs, first_block); if (first_block != er->e_blkno && prompt(ost, PY, PR_EXTENT_BLKNO_UNALIGNED, "The extent record for cluster offset %"PRIu32" " "in inode %"PRIu64" refers to block %"PRIu64" which isn't " "aligned with the start of a cluster. Point the extent " "record at block %"PRIu64" which starts this cluster?", er->e_cpos, (uint64_t)di->i_blkno, (uint64_t)er->e_blkno, first_block)) { er->e_blkno = first_block; *changed = 1; } /* imagine blkno 0, 1 er_clusters. last_cluster is 1 and * fs_clusters is 1, which is ok.. */ last_cluster = ocfs2_blocks_to_clusters(ost->ost_fs, er->e_blkno) + clusters; if (last_cluster > ost->ost_fs->fs_clusters && prompt(ost, PY, PR_EXTENT_CLUSTERS_OVERRUN, "The extent record for cluster offset %"PRIu32" " "in inode %"PRIu64" refers to an extent that goes beyond " "the end of the volume. Truncate the extent by %"PRIu32" " "clusters to fit it in the volume?", er->e_cpos, (uint64_t)di->i_blkno, last_cluster - ost->ost_fs->fs_clusters)) { clusters -= last_cluster - ost->ost_fs->fs_clusters; ocfs2_set_rec_clusters(el->l_tree_depth, er, clusters); *changed = 1; } /* XXX offer to remove leaf records with er_clusters set to 0? */ /* XXX check that the blocks that are referenced aren't already * used */ out: return ret; }
/* * Delete and free clusters if needed. This only works with DEPTH_TRAVERSE. */ static int truncate_iterate(ocfs2_filesys *fs, struct ocfs2_extent_rec *rec, int tree_depth, uint32_t ccount, uint64_t ref_blkno, int ref_recno, void *priv_data) { struct truncate_ctxt *ctxt = (struct truncate_ctxt *)priv_data; uint32_t len = 0, new_size_in_clusters = ctxt->new_size_in_clusters; uint64_t start = 0; errcode_t ret; int func_ret = OCFS2_EXTENT_ERROR; char *buf = NULL; struct ocfs2_extent_list *el = NULL; int cleanup_rec = 0; if ((rec->e_cpos + ocfs2_rec_clusters(tree_depth, rec)) <= new_size_in_clusters) return 0; if (rec->e_cpos >= new_size_in_clusters) { /* the rec is entirely outside the new size, free it */ if (!tree_depth) { start = rec->e_blkno; len = ocfs2_rec_clusters(tree_depth, rec); } else { /* here we meet with a full empty extent block, delete * it. The extent list it contains should already be * iterated and all the clusters have been freed. */ ret = ocfs2_delete_extent_block(fs, rec->e_blkno); if (ret) goto bail; } cleanup_rec = 1; } else { /* we're truncating into the middle of the rec */ len = rec->e_cpos + ocfs2_rec_clusters(tree_depth, rec); len -= new_size_in_clusters; if (!tree_depth) { ocfs2_set_rec_clusters(tree_depth, rec, new_size_in_clusters - rec->e_cpos); start = rec->e_blkno + ocfs2_clusters_to_blocks(fs, ocfs2_rec_clusters(tree_depth, rec)); } else { ocfs2_set_rec_clusters(tree_depth, rec, new_size_in_clusters - rec->e_cpos); /* * For a sparse file, we may meet with another * situation here: * The start of the left most extent rec is greater * than the new size we truncate the file to, but the * start of the extent block is less than that size. * In this case, actually all the extent records in * this extent block have been removed. So we have * to remove the extent block also. * In this function, we have to reread the extent list * to see whether the extent block is empty or not. */ ret = ocfs2_malloc_block(fs->fs_io, &buf); if (ret) goto bail; ret = ocfs2_read_extent_block(fs, rec->e_blkno, buf); if (ret) goto bail; el = &((struct ocfs2_extent_block *)buf)->h_list; if (el->l_next_free_rec == 0) { ret = ocfs2_delete_extent_block(fs, rec->e_blkno); if (ret) goto bail; cleanup_rec = 1; } } } if (start) { if (ctxt->free_clusters) ret = ctxt->free_clusters(fs, len, start, ctxt->free_data); else ret = ocfs2_truncate_clusters(fs, rec, ctxt->ino, len, start); if (ret) goto bail; ctxt->new_i_clusters -= len; } func_ret = OCFS2_EXTENT_CHANGED; bail: if (cleanup_rec) memset(rec, 0, sizeof(struct ocfs2_extent_rec)); if (buf) ocfs2_free(&buf); return func_ret; }
/* * extend allocation only here. * we'll update all the disk stuff, and oip->alloc_size * * expect stuff to be locked, a transaction started and enough data / * metadata reservations in the contexts. * * Will return -EAGAIN, and a reason if a restart is needed. * If passed in, *reason will always be set, even in error. */ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, struct inode *inode, u32 *logical_offset, u32 clusters_to_add, struct buffer_head *fe_bh, handle_t *handle, struct ocfs2_alloc_context *data_ac, struct ocfs2_alloc_context *meta_ac, enum ocfs2_alloc_restarted *reason_ret) { int status = 0; int free_extents; struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; enum ocfs2_alloc_restarted reason = RESTART_NONE; u32 bit_off, num_bits; u64 block; BUG_ON(!clusters_to_add); free_extents = ocfs2_num_free_extents(osb, inode, fe); if (free_extents < 0) { status = free_extents; mlog_errno(status); goto leave; } /* there are two cases which could cause us to EAGAIN in the * we-need-more-metadata case: * 1) we haven't reserved *any* * 2) we are so fragmented, we've needed to add metadata too * many times. */ if (!free_extents && !meta_ac) { mlog(0, "we haven't reserved any metadata!\n"); status = -EAGAIN; reason = RESTART_META; goto leave; } else if ((!free_extents) && (ocfs2_alloc_context_bits_left(meta_ac) < ocfs2_extend_meta_needed(fe))) { mlog(0, "filesystem is really fragmented...\n"); status = -EAGAIN; reason = RESTART_META; goto leave; } status = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &num_bits); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); goto leave; } BUG_ON(num_bits > clusters_to_add); /* reserve our write early -- insert_extent may update the inode */ status = ocfs2_journal_access(handle, inode, fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto leave; } block = ocfs2_clusters_to_blocks(osb->sb, bit_off); mlog(0, "Allocating %u clusters at block %u for inode %llu\n", num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); status = ocfs2_insert_extent(osb, handle, inode, fe_bh, *logical_offset, block, num_bits, meta_ac); if (status < 0) { mlog_errno(status); goto leave; } status = ocfs2_journal_dirty(handle, fe_bh); if (status < 0) { mlog_errno(status); goto leave; } clusters_to_add -= num_bits; *logical_offset += num_bits; if (clusters_to_add) { mlog(0, "need to alloc once more, clusters = %u, wanted = " "%u\n", fe->i_clusters, clusters_to_add); status = -EAGAIN; reason = RESTART_TRANS; } leave: mlog_exit(status); if (reason_ret) *reason_ret = reason; return status; }
errcode_t ocfs2_open(const char *name, int flags, unsigned int superblock, unsigned int block_size, ocfs2_filesys **ret_fs) { ocfs2_filesys *fs; errcode_t ret; int i, len; char *ptr; unsigned char *raw_uuid; ret = ocfs2_malloc0(sizeof(ocfs2_filesys), &fs); if (ret) return ret; fs->fs_flags = flags; fs->fs_umask = 022; ret = io_open(name, (flags & (OCFS2_FLAG_RO | OCFS2_FLAG_RW | OCFS2_FLAG_BUFFERED)), &fs->fs_io); if (ret) goto out; ret = ocfs2_malloc(strlen(name)+1, &fs->fs_devname); if (ret) goto out; strcpy(fs->fs_devname, name); /* * If OCFS2_FLAG_IMAGE_FILE is specified, it needs to be handled * differently */ if (flags & OCFS2_FLAG_IMAGE_FILE) { ret = ocfs2_image_load_bitmap(fs); if (ret) goto out; if (!superblock) superblock = fs->ost->ost_superblocks[0]; if (!block_size) block_size = fs->ost->ost_fsblksz; } /* image file is not a device */ if (!(flags & OCFS2_FLAG_IMAGE_FILE)) { if (io_is_device_readonly(fs->fs_io)) fs->fs_flags |= OCFS2_FLAG_HARD_RO; } /* * If OCFS2_FLAG_NO_REV_CHECK is specified, fsck (or someone * like it) is asking to ignore the OCFS vol_header at * block 0. */ if (!(flags & OCFS2_FLAG_NO_REV_CHECK)) { ret = ocfs2_validate_ocfs1_header(fs); if (ret) goto out; } if (superblock) { ret = OCFS2_ET_INVALID_ARGUMENT; if (!block_size) goto out; io_set_blksize(fs->fs_io, block_size); ret = ocfs2_read_super(fs, (uint64_t)superblock, NULL); } else { superblock = OCFS2_SUPER_BLOCK_BLKNO; if (block_size) { io_set_blksize(fs->fs_io, block_size); ret = ocfs2_read_super(fs, (uint64_t)superblock, NULL); } else { for (block_size = io_get_blksize(fs->fs_io); block_size <= OCFS2_MAX_BLOCKSIZE; block_size <<= 1) { io_set_blksize(fs->fs_io, block_size); ret = ocfs2_read_super(fs, (uint64_t)superblock, NULL); if ((ret == OCFS2_ET_BAD_MAGIC) || (ret == OCFS2_ET_IO)) continue; break; } } } if (ret) goto out; fs->fs_blocksize = block_size; if (superblock == OCFS2_SUPER_BLOCK_BLKNO) { ret = ocfs2_malloc_block(fs->fs_io, &fs->fs_orig_super); if (ret) goto out; memcpy((char *)fs->fs_orig_super, (char *)fs->fs_super, fs->fs_blocksize); } #if 0 ret = OCFS2_ET_REV_TOO_HIGH; if (fs->fs_super->id2.i_super.s_major_rev_level > OCFS2_LIB_CURRENT_REV) goto out; #endif if (flags & OCFS2_FLAG_STRICT_COMPAT_CHECK) { ret = OCFS2_ET_UNSUPP_FEATURE; if (OCFS2_RAW_SB(fs->fs_super)->s_feature_compat & ~OCFS2_LIB_FEATURE_COMPAT_SUPP) goto out; /* We need to check s_tunefs_flag also to make sure * fsck.ocfs2 won't try to clean up an aborted tunefs * that it doesn't know. */ if (OCFS2_HAS_INCOMPAT_FEATURE(OCFS2_RAW_SB(fs->fs_super), OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG) && (OCFS2_RAW_SB(fs->fs_super)->s_tunefs_flag & ~OCFS2_LIB_ABORTED_TUNEFS_SUPP)) goto out; } ret = OCFS2_ET_UNSUPP_FEATURE; if (OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat & ~OCFS2_LIB_FEATURE_INCOMPAT_SUPP) goto out; ret = OCFS2_ET_RO_UNSUPP_FEATURE; if ((flags & OCFS2_FLAG_RW) && (OCFS2_RAW_SB(fs->fs_super)->s_feature_ro_compat & ~OCFS2_LIB_FEATURE_RO_COMPAT_SUPP)) goto out; ret = OCFS2_ET_UNSUPP_FEATURE; if (!(flags & OCFS2_FLAG_HEARTBEAT_DEV_OK) && (OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV)) goto out; ret = OCFS2_ET_CORRUPT_SUPERBLOCK; if (!OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits) goto out; if (fs->fs_super->i_blkno != superblock) goto out; if ((OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits < 12) || (OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits > 20)) goto out; if (!OCFS2_RAW_SB(fs->fs_super)->s_root_blkno || !OCFS2_RAW_SB(fs->fs_super)->s_system_dir_blkno) goto out; if (OCFS2_RAW_SB(fs->fs_super)->s_max_slots > OCFS2_MAX_SLOTS) goto out; ret = ocfs2_malloc0(OCFS2_RAW_SB(fs->fs_super)->s_max_slots * sizeof(ocfs2_cached_inode *), &fs->fs_inode_allocs); if (ret) goto out; ret = ocfs2_malloc0(OCFS2_RAW_SB(fs->fs_super)->s_max_slots * sizeof(ocfs2_cached_inode *), &fs->fs_eb_allocs); if (ret) goto out; ret = OCFS2_ET_UNEXPECTED_BLOCK_SIZE; if (block_size != (1U << OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits)) goto out; fs->fs_clustersize = 1 << OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits; /* FIXME: Read the system dir */ fs->fs_root_blkno = OCFS2_RAW_SB(fs->fs_super)->s_root_blkno; fs->fs_sysdir_blkno = OCFS2_RAW_SB(fs->fs_super)->s_system_dir_blkno; fs->fs_clusters = fs->fs_super->i_clusters; fs->fs_blocks = ocfs2_clusters_to_blocks(fs, fs->fs_clusters); fs->fs_first_cg_blkno = OCFS2_RAW_SB(fs->fs_super)->s_first_cluster_group; raw_uuid = OCFS2_RAW_SB(fs->fs_super)->s_uuid; for (i = 0, ptr = fs->uuid_str; i < OCFS2_VOL_UUID_LEN; i++) { /* print with null */ len = snprintf(ptr, 3, "%02X", raw_uuid[i]); if (len != 2) { ret = OCFS2_ET_INTERNAL_FAILURE; goto out; } /* then only advace past the last char */ ptr += 2; } *ret_fs = fs; return 0; out: if (fs->fs_inode_allocs) ocfs2_free(&fs->fs_inode_allocs); ocfs2_freefs(fs); return ret; }
static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, u32 cpos, u32 phys_cpos, u32 *len, int ext_flags) { int ret, credits = 0, extra_blocks = 0, partial = context->partial; handle_t *handle; struct inode *inode = context->inode; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; struct ocfs2_refcount_tree *ref_tree = NULL; u32 new_phys_cpos, new_len; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); BUG_ON(!context->refcount_loc); ret = ocfs2_lock_refcount_tree(osb, context->refcount_loc, 1, &ref_tree, NULL); if (ret) { mlog_errno(ret); return ret; } ret = ocfs2_prepare_refcount_change_for_del(inode, context->refcount_loc, phys_blkno, *len, &credits, &extra_blocks); if (ret) { mlog_errno(ret); goto out; } } ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1, &context->meta_ac, &context->data_ac, extra_blocks, &credits); if (ret) { mlog_errno(ret); goto out; } mutex_lock(&tl_inode->i_mutex); if (ocfs2_truncate_log_needs_flush(osb)) { ret = __ocfs2_flush_truncate_log(osb); if (ret < 0) { mlog_errno(ret); goto out_unlock_mutex; } } handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); mlog_errno(ret); goto out_unlock_mutex; } ret = __ocfs2_claim_clusters(handle, context->data_ac, 1, *len, &new_phys_cpos, &new_len); if (ret) { mlog_errno(ret); goto out_commit; } if (new_len != *len) { mlog(0, "len_claimed: %u, len: %u\n", new_len, *len); if (!partial) { context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; ret = -ENOSPC; goto out_commit; } } mlog(0, "cpos: %u, phys_cpos: %u, new_phys_cpos: %u\n", cpos, phys_cpos, new_phys_cpos); ret = __ocfs2_move_extent(handle, context, cpos, new_len, phys_cpos, new_phys_cpos, ext_flags); if (ret) mlog_errno(ret); if (partial && (new_len != *len)) *len = new_len; ret = ocfs2_cow_sync_writeback(inode->i_sb, context->inode, cpos, *len); if (ret) mlog_errno(ret); out_commit: ocfs2_commit_trans(osb, handle); out_unlock_mutex: mutex_unlock(&tl_inode->i_mutex); if (context->data_ac) { ocfs2_free_alloc_context(context->data_ac); context->data_ac = NULL; } if (context->meta_ac) { ocfs2_free_alloc_context(context->meta_ac); context->meta_ac = NULL; } out: if (ref_tree) ocfs2_unlock_refcount_tree(osb, ref_tree, 1); return ret; }