static int walk_extents_func(ocfs2_filesys *fs, struct ocfs2_extent_rec *rec, int tree_depth, uint32_t ccount, uint64_t ref_blkno, int ref_recno, void *priv_data) { struct walk_it *wi = priv_data; int pad_amount = wi->di->id2.i_list.l_tree_depth - tree_depth; int i; if (!ccount && !pad_amount) fprintf(stdout, "EXTENTS:\n"); fprintf(stdout, "0x%08"PRIX64":%02u ", ref_blkno, ref_recno); for (i = 0; i < pad_amount; i++) fprintf(stdout, " "); fprintf(stdout, "(%08"PRIu32", %08"PRIu32", %08"PRIu64") |" " + %08"PRIu32" = %08"PRIu32" / %08"PRIu32"\n", rec->e_cpos, ocfs2_rec_clusters(tree_depth, rec), rec->e_blkno, ccount, ccount + ocfs2_rec_clusters(tree_depth, rec), wi->di->i_clusters); if (!tree_depth && ((ccount + ocfs2_rec_clusters(tree_depth, rec)) == wi->di->i_clusters)) fprintf(stdout, "TOTAL: %u\n", wi->di->i_clusters); return 0; }
static int block_iterate_func(ocfs2_filesys *fs, struct ocfs2_extent_rec *rec, int tree_depth, uint32_t ccount, uint64_t ref_blkno, int ref_recno, void *priv_data) { struct block_context *ctxt = priv_data; uint64_t blkno, bcount, bend; int iret = 0; bcount = ocfs2_clusters_to_blocks(fs, rec->e_cpos); bend = bcount + ocfs2_clusters_to_blocks(fs, ocfs2_rec_clusters(tree_depth, rec)); for (blkno = rec->e_blkno; bcount < bend; blkno++, bcount++) { if (((bcount * fs->fs_blocksize) >= ctxt->inode->i_size) && !(ctxt->flags & OCFS2_BLOCK_FLAG_APPEND)) break; iret = (*ctxt->func)(fs, blkno, bcount, rec->e_flags, ctxt->priv_data); if (iret & OCFS2_BLOCK_ABORT) break; } return iret; }
void dump_extent_list(FILE *out, struct ocfs2_extent_list *ext) { struct ocfs2_extent_rec *rec; int i; uint32_t clusters; char flags[PATH_MAX]; fprintf(out, "\tTree Depth: %u Count: %u Next Free Rec: %u\n", ext->l_tree_depth, ext->l_count, ext->l_next_free_rec); if (!ext->l_next_free_rec) goto bail; if (ext->l_tree_depth) fprintf(out, "\t## %-11s %-12s %-s\n", "Offset", "Clusters", "Block#"); else fprintf(out, "\t## %-11s %-12s %-13s %s\n", "Offset", "Clusters", "Block#", "Flags"); for (i = 0; i < ext->l_next_free_rec; ++i) { rec = &(ext->l_recs[i]); clusters = ocfs2_rec_clusters(ext->l_tree_depth, rec); if (ext->l_tree_depth) fprintf(out, "\t%-2d %-11u %-12u %"PRIu64"\n", i, rec->e_cpos, clusters, (uint64_t)rec->e_blkno); else { flags[0] = '\0'; if (ocfs2_snprint_extent_flags(flags, PATH_MAX, rec->e_flags)) flags[0] = '\0'; fprintf(out, "\t%-2d %-11u %-12u " "%-13"PRIu64" 0x%x %s\n", i, rec->e_cpos, clusters, (uint64_t)rec->e_blkno, rec->e_flags, flags); } } bail: return ; }
errcode_t ocfs2_fill_heartbeat_desc(ocfs2_filesys *fs, struct o2cb_region_desc *desc) { errcode_t ret; char *filename; char *buf = NULL; uint64_t blkno, blocks, start_block; uint32_t block_bits, cluster_bits; int sectsize, sectsize_bits; struct ocfs2_dinode *di; struct ocfs2_extent_rec *rec; ret = ocfs2_get_device_sectsize(fs->fs_devname, §size); if (ret) { if (ret == OCFS2_ET_CANNOT_DETERMINE_SECTOR_SIZE) sectsize = OCFS2_MIN_BLOCKSIZE; else goto leave; } sectsize_bits = ffs(sectsize) - 1; filename = ocfs2_system_inodes[HEARTBEAT_SYSTEM_INODE].si_name; ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno, filename, strlen(filename), NULL, &blkno); if (ret) goto leave; ret = ocfs2_malloc_block(fs->fs_io, &buf); if (ret) goto leave; ret = ocfs2_read_inode(fs, blkno, buf); if (ret) goto leave; di = (struct ocfs2_dinode *)buf; if (di->id2.i_list.l_tree_depth || di->id2.i_list.l_next_free_rec != 1) { ret = OCFS2_ET_BAD_HEARTBEAT_FILE; goto leave; } rec = &(di->id2.i_list.l_recs[0]); block_bits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits; cluster_bits = OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits; if (block_bits < sectsize_bits) { ret = OCFS2_ET_BLOCK_SIZE_TOO_SMALL_FOR_HARDWARE; goto leave; } blocks = ocfs2_rec_clusters(0, rec) << cluster_bits; blocks >>= block_bits; if (blocks > O2NM_MAX_NODES) blocks = O2NM_MAX_NODES; start_block = rec->e_blkno << block_bits; start_block >>= sectsize_bits; desc->r_name = fs->uuid_str; desc->r_device_name = fs->fs_devname; desc->r_block_bytes = sectsize; desc->r_start_block = start_block; desc->r_blocks = blocks; leave: if (buf) ocfs2_free(&buf); return ret; }
/* * Delete and free clusters if needed. This only works with DEPTH_TRAVERSE. */ static int truncate_iterate(ocfs2_filesys *fs, struct ocfs2_extent_rec *rec, int tree_depth, uint32_t ccount, uint64_t ref_blkno, int ref_recno, void *priv_data) { struct truncate_ctxt *ctxt = (struct truncate_ctxt *)priv_data; uint32_t len = 0, new_size_in_clusters = ctxt->new_size_in_clusters; uint64_t start = 0; errcode_t ret; int func_ret = OCFS2_EXTENT_ERROR; char *buf = NULL; struct ocfs2_extent_list *el = NULL; int cleanup_rec = 0; if ((rec->e_cpos + ocfs2_rec_clusters(tree_depth, rec)) <= new_size_in_clusters) return 0; if (rec->e_cpos >= new_size_in_clusters) { /* the rec is entirely outside the new size, free it */ if (!tree_depth) { start = rec->e_blkno; len = ocfs2_rec_clusters(tree_depth, rec); } else { /* here we meet with a full empty extent block, delete * it. The extent list it contains should already be * iterated and all the clusters have been freed. */ ret = ocfs2_delete_extent_block(fs, rec->e_blkno); if (ret) goto bail; } cleanup_rec = 1; } else { /* we're truncating into the middle of the rec */ len = rec->e_cpos + ocfs2_rec_clusters(tree_depth, rec); len -= new_size_in_clusters; if (!tree_depth) { ocfs2_set_rec_clusters(tree_depth, rec, new_size_in_clusters - rec->e_cpos); start = rec->e_blkno + ocfs2_clusters_to_blocks(fs, ocfs2_rec_clusters(tree_depth, rec)); } else { ocfs2_set_rec_clusters(tree_depth, rec, new_size_in_clusters - rec->e_cpos); /* * For a sparse file, we may meet with another * situation here: * The start of the left most extent rec is greater * than the new size we truncate the file to, but the * start of the extent block is less than that size. * In this case, actually all the extent records in * this extent block have been removed. So we have * to remove the extent block also. * In this function, we have to reread the extent list * to see whether the extent block is empty or not. */ ret = ocfs2_malloc_block(fs->fs_io, &buf); if (ret) goto bail; ret = ocfs2_read_extent_block(fs, rec->e_blkno, buf); if (ret) goto bail; el = &((struct ocfs2_extent_block *)buf)->h_list; if (el->l_next_free_rec == 0) { ret = ocfs2_delete_extent_block(fs, rec->e_blkno); if (ret) goto bail; cleanup_rec = 1; } } } if (start) { if (ctxt->free_clusters) ret = ctxt->free_clusters(fs, len, start, ctxt->free_data); else ret = ocfs2_truncate_clusters(fs, rec, ctxt->ino, len, start); if (ret) goto bail; ctxt->new_i_clusters -= len; } func_ret = OCFS2_EXTENT_CHANGED; bail: if (cleanup_rec) memset(rec, 0, sizeof(struct ocfs2_extent_rec)); if (buf) ocfs2_free(&buf); return func_ret; }
errcode_t check_el(o2fsck_state *ost, struct extent_info *ei, struct ocfs2_dinode *di, struct ocfs2_extent_list *el, uint16_t max_recs, int *changed) { int trust_next_free = 1; struct ocfs2_extent_rec *er; uint64_t max_size; uint16_t i; uint32_t clusters; size_t cpy; verbosef("depth %u count %u next_free %u\n", el->l_tree_depth, el->l_count, el->l_next_free_rec); if (ei->ei_expect_depth && el->l_tree_depth != ei->ei_expected_depth && prompt(ost, PY, PR_EXTENT_LIST_DEPTH, "Extent list in inode %"PRIu64" is recorded as " "being at depth %u but we expect it to be at depth %u. " "update the list?", (uint64_t)di->i_blkno, el->l_tree_depth, ei->ei_expected_depth)) { el->l_tree_depth = ei->ei_expected_depth; *changed = 1; } if (el->l_count > max_recs && prompt(ost, PY, PR_EXTENT_LIST_COUNT, "Extent list in inode %"PRIu64" claims to have %u " "records, but the maximum is %u. Fix the list's count?", (uint64_t)di->i_blkno, el->l_count, max_recs)) { el->l_count = max_recs; *changed = 1; } if (max_recs > el->l_count) max_recs = el->l_count; if (el->l_next_free_rec > max_recs) { if (prompt(ost, PY, PR_EXTENT_LIST_FREE, "Extent list in inode %"PRIu64" claims %u " "as the next free chain record, but fsck believes " "the largest valid value is %u. Clamp the next " "record value?", (uint64_t)di->i_blkno, el->l_next_free_rec, max_recs)) { el->l_next_free_rec = el->l_count; *changed = 1; } else { trust_next_free = 0; } } if (trust_next_free) max_recs = el->l_next_free_rec; for (i = 0; i < max_recs; i++) { er = &el->l_recs[i]; clusters = ocfs2_rec_clusters(el->l_tree_depth, er); /* * For a sparse file, we may find an empty record * in the left most record. Just skip it. */ if ((OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) && el->l_tree_depth && !i && !clusters) continue; /* returns immediately if blkno is out of range. * descends into eb. checks that data er doesn't * reference past the volume or anything crazy. */ check_er(ost, ei, di, el, er, changed); /* offer to remove records that point to nowhere */ if (ocfs2_block_out_of_range(ost->ost_fs, er->e_blkno) && prompt(ost, PY, PR_EXTENT_BLKNO_RANGE, "Extent record %u in inode %"PRIu64" " "refers to a block that is out of range. Remove " "this record from the extent list?", i, (uint64_t)di->i_blkno)) { if (!trust_next_free) { printf("Can't remove the record becuase " "next_free_rec hasn't been fixed\n"); continue; } cpy = (max_recs - i - 1) * sizeof(*er); /* shift the remaining recs into this ones place */ if (cpy != 0) { memcpy(er, er + 1, cpy); memset(&el->l_recs[max_recs - 1], 0, sizeof(*er)); i--; } el->l_next_free_rec--; max_recs--; *changed = 1; continue; } /* we've already accounted for the extent block as part of * the extent block chain groups */ if (el->l_tree_depth) continue; /* mark the data clusters as used */ o2fsck_mark_clusters_allocated(ost, ocfs2_blocks_to_clusters(ost->ost_fs, er->e_blkno), clusters); ei->ei_clusters += clusters; max_size = (er->e_cpos + clusters) << OCFS2_RAW_SB(ost->ost_fs->fs_super)->s_clustersize_bits; if (max_size > ei->ei_max_size) ei->ei_max_size = max_size; } return 0; }
/* the caller will check if er->e_blkno is out of range to determine if it * should try removing the record */ static errcode_t check_er(o2fsck_state *ost, struct extent_info *ei, struct ocfs2_dinode *di, struct ocfs2_extent_list *el, struct ocfs2_extent_rec *er, int *changed) { errcode_t ret = 0; uint64_t first_block; uint32_t last_cluster, clusters; clusters = ocfs2_rec_clusters(el->l_tree_depth, er); verbosef("cpos %u clusters %u blkno %"PRIu64"\n", er->e_cpos, clusters, (uint64_t)er->e_blkno); if (ocfs2_block_out_of_range(ost->ost_fs, er->e_blkno)) goto out; if (el->l_tree_depth) { int is_valid = 0; /* we only expect a given depth when we descend to extent blocks * from a previous depth. these start at 0 when the inode * is checked */ ei->ei_expect_depth = 1; ei->ei_expected_depth = el->l_tree_depth - 1; check_eb(ost, ei, di, er->e_blkno, &is_valid); if (!is_valid && prompt(ost, PY, PR_EXTENT_EB_INVALID, "The extent record for cluster offset " "%"PRIu32" in inode %"PRIu64" refers to an invalid " "extent block at %"PRIu64". Clear the reference " "to this invalid block?", er->e_cpos, (uint64_t)di->i_blkno, (uint64_t)er->e_blkno)) { er->e_blkno = 0; *changed = 1; } ret = 0; goto out; } if (!ocfs2_writes_unwritten_extents(OCFS2_RAW_SB(ost->ost_fs->fs_super)) && (er->e_flags & OCFS2_EXT_UNWRITTEN) && prompt(ost, PY, PR_EXTENT_MARKED_UNWRITTEN, "The extent record for cluster offset %"PRIu32" " "in inode %"PRIu64" has the UNWRITTEN flag set, but " "this filesystem does not support unwritten extents. " "Clear the UNWRITTEN flag?", er->e_cpos, (uint64_t)di->i_blkno)) { er->e_flags &= ~OCFS2_EXT_UNWRITTEN; } first_block = ocfs2_blocks_to_clusters(ost->ost_fs, er->e_blkno); first_block = ocfs2_clusters_to_blocks(ost->ost_fs, first_block); if (first_block != er->e_blkno && prompt(ost, PY, PR_EXTENT_BLKNO_UNALIGNED, "The extent record for cluster offset %"PRIu32" " "in inode %"PRIu64" refers to block %"PRIu64" which isn't " "aligned with the start of a cluster. Point the extent " "record at block %"PRIu64" which starts this cluster?", er->e_cpos, (uint64_t)di->i_blkno, (uint64_t)er->e_blkno, first_block)) { er->e_blkno = first_block; *changed = 1; } /* imagine blkno 0, 1 er_clusters. last_cluster is 1 and * fs_clusters is 1, which is ok.. */ last_cluster = ocfs2_blocks_to_clusters(ost->ost_fs, er->e_blkno) + clusters; if (last_cluster > ost->ost_fs->fs_clusters && prompt(ost, PY, PR_EXTENT_CLUSTERS_OVERRUN, "The extent record for cluster offset %"PRIu32" " "in inode %"PRIu64" refers to an extent that goes beyond " "the end of the volume. Truncate the extent by %"PRIu32" " "clusters to fit it in the volume?", er->e_cpos, (uint64_t)di->i_blkno, last_cluster - ost->ost_fs->fs_clusters)) { clusters -= last_cluster - ost->ost_fs->fs_clusters; ocfs2_set_rec_clusters(el->l_tree_depth, er, clusters); *changed = 1; } /* XXX offer to remove leaf records with er_clusters set to 0? */ /* XXX check that the blocks that are referenced aren't already * used */ out: return ret; }
/* * Figure out the size of a hole which starts at v_cluster within the given * extent list. * * If there is no more allocation past v_cluster, we return the maximum * cluster size minus v_cluster. * * If we have in-inode extents, then el points to the dinode list and * eb_buf is NULL. Otherwise, eb_buf should point to the extent block * containing el. */ static int ocfs2_figure_hole_clusters(ocfs2_cached_inode *cinode, struct ocfs2_extent_list *el, char *eb_buf, uint32_t v_cluster, uint32_t *num_clusters) { int ret, i; char *next_eb_buf = NULL; struct ocfs2_extent_block *eb, *next_eb; i = ocfs2_search_for_hole_index(el, v_cluster); if (i == el->l_next_free_rec && eb_buf) { eb = (struct ocfs2_extent_block *)eb_buf; /* * Check the next leaf for any extents. */ if (eb->h_next_leaf_blk == 0) goto no_more_extents; ret = ocfs2_malloc_block(cinode->ci_fs->fs_io, &next_eb_buf); if (ret) goto out; ret = ocfs2_read_extent_block(cinode->ci_fs, eb->h_next_leaf_blk, next_eb_buf); if (ret) goto out; next_eb = (struct ocfs2_extent_block *)next_eb_buf; el = &next_eb->h_list; i = ocfs2_search_for_hole_index(el, v_cluster); if (i > 0) { if ((i > 1) || ocfs2_rec_clusters(el->l_tree_depth, &el->l_recs[0])) { ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK; goto out; } } } no_more_extents: if (i == el->l_next_free_rec) { /* * We're at the end of our existing allocation. Just * return the maximum number of clusters we could * possibly allocate. */ *num_clusters = UINT32_MAX - v_cluster; } else *num_clusters = el->l_recs[i].e_cpos - v_cluster; ret = 0; out: if (next_eb_buf) ocfs2_free(&next_eb_buf); return ret; }
errcode_t ocfs2_get_clusters(ocfs2_cached_inode *cinode, uint32_t v_cluster, uint32_t *p_cluster, uint32_t *num_clusters, uint16_t *extent_flags) { int i; uint16_t flags = 0; errcode_t ret = 0; ocfs2_filesys *fs = cinode->ci_fs; struct ocfs2_dinode *di; struct ocfs2_extent_block *eb; struct ocfs2_extent_list *el; struct ocfs2_extent_rec *rec; char *eb_buf = NULL; uint32_t coff; di = cinode->ci_inode; el = &di->id2.i_list; if (el->l_tree_depth) { ret = ocfs2_find_leaf(fs, di, v_cluster, &eb_buf); if (ret) goto out; eb = (struct ocfs2_extent_block *) eb_buf; el = &eb->h_list; if (el->l_tree_depth) { ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK; goto out; } } i = ocfs2_search_extent_list(el, v_cluster); if (i == -1) { /* * A hole was found. Return some canned values that * callers can key on. If asked for, num_clusters will * be populated with the size of the hole. */ *p_cluster = 0; if (num_clusters) { ret = ocfs2_figure_hole_clusters(cinode, el, eb_buf, v_cluster, num_clusters); if (ret) goto out; } } else { rec = &el->l_recs[i]; assert(v_cluster >= rec->e_cpos); if (!rec->e_blkno) { ret = OCFS2_ET_BAD_BLKNO; goto out; } coff = v_cluster - rec->e_cpos; *p_cluster = ocfs2_blocks_to_clusters(fs, rec->e_blkno); *p_cluster = *p_cluster + coff; if (num_clusters) *num_clusters = ocfs2_rec_clusters(el->l_tree_depth, rec) - coff; flags = rec->e_flags; } if (extent_flags) *extent_flags = flags; out: if (eb_buf) ocfs2_free(&eb_buf); return ret; }
errcode_t ocfs2_xattr_get_clusters(ocfs2_filesys *fs, struct ocfs2_extent_list *el, uint64_t el_blkno, char *el_blk, uint32_t v_cluster, uint32_t *p_cluster, uint32_t *num_clusters, uint16_t *extent_flags) { int i; errcode_t ret = 0; struct ocfs2_extent_block *eb; struct ocfs2_extent_rec *rec; char *eb_buf = NULL; uint32_t coff; if (el->l_tree_depth) { ret = ocfs2_tree_find_leaf(fs, el, el_blkno, el_blk, v_cluster, &eb_buf); if (ret) goto out; eb = (struct ocfs2_extent_block *)eb_buf; el = &eb->h_list; if (el->l_tree_depth) { ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK; goto out; } } i = ocfs2_search_extent_list(el, v_cluster); if (i == -1) { ret = -1; goto out; } else { rec = &el->l_recs[i]; assert(v_cluster >= rec->e_cpos); if (!rec->e_blkno) { ret = OCFS2_ET_BAD_BLKNO; goto out; } coff = v_cluster - rec->e_cpos; *p_cluster = ocfs2_blocks_to_clusters(fs, rec->e_blkno); *p_cluster = *p_cluster + coff; if (num_clusters) *num_clusters = ocfs2_rec_clusters(el->l_tree_depth, rec) - coff; if (extent_flags) *extent_flags = rec->e_flags; } out: if (eb_buf) ocfs2_free(&eb_buf); return ret; }
static int extent_iterate_el(struct ocfs2_extent_list *el, uint64_t ref_blkno, struct extent_context *ctxt) { struct ocfs2_extent_rec before; int iret = 0; int i; for (i = 0; i < el->l_next_free_rec; i++) { /* XXX we could put some constraints on how the rec * is allowed to change.. */ before = el->l_recs[i]; if (el->l_tree_depth) { iret |= extent_iterate_eb(&el->l_recs[i], el->l_tree_depth, ref_blkno, i, ctxt); if (iret & OCFS2_EXTENT_CHANGED) iret |= update_eb_rec(ctxt, &before, &el->l_recs[i]); if (el->l_recs[i].e_int_clusters && (el->l_recs[i].e_cpos >= ctxt->last_eb_cpos)) { /* * Only set last_eb_blkno if current extent * list point to leaf blocks. */ if (el->l_tree_depth == 1) ctxt->last_eb_blkno = el->l_recs[i].e_blkno; ctxt->last_eb_cpos = el->l_recs[i].e_cpos; } } else { /* * For a sparse file, we may find an empty record * in the left most record. Just skip it. */ if (!i && !el->l_recs[i].e_leaf_clusters) continue; iret |= (*ctxt->func)(ctxt->fs, &el->l_recs[i], el->l_tree_depth, ctxt->ccount, ref_blkno, i, ctxt->priv_data); if (iret & OCFS2_EXTENT_CHANGED) iret |= update_leaf_rec(ctxt, &before, &el->l_recs[i]); ctxt->ccount += ocfs2_rec_clusters(el->l_tree_depth, &el->l_recs[i]); } if (iret & (OCFS2_EXTENT_ABORT | OCFS2_EXTENT_ERROR)) break; } if (iret & OCFS2_EXTENT_CHANGED) { for (i = 0; i < el->l_count; i++) { if (ocfs2_rec_clusters(el->l_tree_depth, &el->l_recs[i])) continue; el->l_next_free_rec = i; break; } } return iret; }