int xfs_discard_extents( struct xfs_mount *mp, struct list_head *list) { struct xfs_busy_extent *busyp; int error = 0; list_for_each_entry(busyp, list, list) { trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, busyp->length); error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), XFS_FSB_TO_BB(mp, busyp->length), GFP_NOFS, 0); if (error && error != EOPNOTSUPP) { xfs_info(mp, "discard failed for extent [0x%llu,%u], error %d", (unsigned long long)busyp->bno, busyp->length, error); return error; } }
static void scanfunc_ino( struct xfs_btree_block *block, int level, xfs_agf_t *agf) { xfs_agino_t agino; xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); int i; int j; int off; xfs_inobt_ptr_t *pp; xfs_inobt_rec_t *rp; if (level == 0) { rp = XFS_INOBT_REC_ADDR(mp, block, 1); for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++) { agino = be32_to_cpu(rp[i].ir_startino); off = XFS_INO_TO_OFFSET(mp, agino); push_cur(); set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, seqno, XFS_AGINO_TO_AGBNO(mp, agino)), XFS_FSB_TO_BB(mp, XFS_IALLOC_BLOCKS(mp)), DB_RING_IGN, NULL); if (iocur_top->data == NULL) { dbprintf(_("can't read inode block %u/%u\n"), seqno, XFS_AGINO_TO_AGBNO(mp, agino)); continue; } for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { if (XFS_INOBT_IS_FREE_DISK(&rp[i], j)) continue; process_inode(agf, agino + j, (xfs_dinode_t *) ((char *)iocur_top->data + ((off + j) << mp->m_sb.sb_inodelog))); } pop_cur(); } return; } pp = XFS_INOBT_PTR_ADDR(mp, block, 1, mp->m_inobt_mxr[1]); for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++) scan_sbtree(agf, be32_to_cpu(pp[i]), level, scanfunc_ino, TYP_INOBT); }
static void scan_sbtree( xfs_agf_t *agf, xfs_agblock_t root, int nlevels, scan_sbtree_f_t func, typnm_t btype) { xfs_agnumber_t seqno = be32_to_cpu(agf->agf_seqno); push_cur(); set_cur(&typtab[btype], XFS_AGB_TO_DADDR(mp, seqno, root), blkbb, DB_RING_IGN, NULL); if (iocur_top->data == NULL) { dbprintf(_("can't read btree block %u/%u\n"), seqno, root); return; } (*func)(iocur_top->data, nlevels - 1, agf); pop_cur(); }
/* Transform a bnobt irec into a fsmap */ STATIC int xfs_getfsmap_datadev_bnobt_helper( struct xfs_btree_cur *cur, struct xfs_alloc_rec_incore *rec, void *priv) { struct xfs_mount *mp = cur->bc_mp; struct xfs_getfsmap_info *info = priv; struct xfs_rmap_irec irec; xfs_daddr_t rec_daddr; rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_private.a.agno, rec->ar_startblock); irec.rm_startblock = rec->ar_startblock; irec.rm_blockcount = rec->ar_blockcount; irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ irec.rm_offset = 0; irec.rm_flags = 0; return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr); }
/* * Initialise a new set of inodes. */ STATIC int xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen) { struct xfs_buf *fbuf; struct xfs_dinode *free; int blks_per_cluster, nbufs, ninodes; int version; int i, j; xfs_daddr_t d; /* * Loop over the new block(s), filling in the inodes. * For small block sizes, manipulate the inodes in buffers * which are multiples of the blocks size. */ if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; nbufs = length; ninodes = mp->m_sb.sb_inopblock; } else { blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / mp->m_sb.sb_blocksize; nbufs = length / blks_per_cluster; ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; } /* * Figure out what version number to use in the inodes we create. * If the superblock version has caught up to the one that supports * the new inode format, then use the new inode version. Otherwise * use the old version so that old kernels will continue to be * able to use the file system. */ if (xfs_sb_version_hasnlink(&mp->m_sb)) version = 2; else version = 1; for (j = 0; j < nbufs; j++) { /* * Get the block. */ d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, XBF_LOCK); if (!fbuf) return ENOMEM; /* * Initialize all inodes in this buffer and then log them. * * XXX: It would be much better if we had just one transaction * to log a whole cluster of inodes instead of all the * individual transactions causing a lot of log traffic. */ xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = sizeof(struct xfs_dinode); free = xfs_make_iptr(mp, fbuf, i); free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); free->di_version = version; free->di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } xfs_trans_inode_alloc_buf(tp, fbuf); } return 0; }
extern void readbitmap(char* device, image_head image_hdr, unsigned long* bitmap, int pui) { xfs_agnumber_t agno = 0; xfs_agblock_t first_agbno; xfs_agnumber_t num_ags; ag_header_t ag_hdr; xfs_daddr_t read_ag_off; int read_ag_length; void *read_ag_buf = NULL; xfs_off_t read_ag_position; /* xfs_types.h: typedef __s64 */ uint64_t sk, res, s_pos = 0; void *btree_buf_data = NULL; int btree_buf_length; xfs_off_t btree_buf_position; xfs_agblock_t bno; uint current_level; uint btree_levels; xfs_daddr_t begin, next_begin, ag_begin, new_begin, ag_end; /* xfs_types.h: typedef __s64*/ xfs_off_t pos; xfs_alloc_ptr_t *ptr; xfs_alloc_rec_t *rec_ptr; int length; int i; uint64_t size, sizeb; xfs_off_t w_position; int w_length; int wblocks; int w_size = 1 * 1024 * 1024; uint64_t numblocks = 0; xfs_off_t logstart, logend; xfs_off_t logstart_pos, logend_pos; int log_length; struct xfs_btree_block *block; uint64_t current_block, block_count, prog_cur_block = 0; int start = 0; int bit_size = 1; progress_bar prog; uint64_t bused = 0; uint64_t bfree = 0; /// init progress progress_init(&prog, start, image_hdr.totalblock, image_hdr.totalblock, BITMAP, bit_size); fs_open(device); first_agbno = (((XFS_AGFL_DADDR(mp) + 1) * source_sectorsize) + first_residue) / source_blocksize; num_ags = mp->m_sb.sb_agcount; log_mesg(1, 0, 0, fs_opt.debug, "ags = %i\n", num_ags); for (agno = 0; agno < num_ags ; agno++) { /* read in first blocks of the ag */ /* initial settings */ log_mesg(2, 0, 0, fs_opt.debug, "read ag %i header\n", agno); read_ag_off = XFS_AG_DADDR(mp, agno, XFS_SB_DADDR); read_ag_length = first_agbno * source_blocksize; read_ag_position = (xfs_off_t) read_ag_off * (xfs_off_t) BBSIZE; read_ag_buf = malloc(read_ag_length); if(read_ag_buf == NULL){ log_mesg(0, 1, 1, fs_opt.debug, "%s, %i, ERROR:%s", __func__, __LINE__, strerror(errno)); } memset(read_ag_buf, 0, read_ag_length); log_mesg(2, 0, 0, fs_opt.debug, "seek to read_ag_position %lli\n", read_ag_position); sk = lseek(source_fd, read_ag_position, SEEK_SET); current_block = (sk/source_blocksize); block_count = (read_ag_length/source_blocksize); set_bitmap(bitmap, sk, read_ag_length); log_mesg(2, 0, 0, fs_opt.debug, "read ag header fd = %llu(%i), length = %i(%i)\n", sk, current_block, read_ag_length, block_count); if ((res = read(source_fd, read_ag_buf, read_ag_length)) < 0) { log_mesg(1, 0, 1, fs_opt.debug, "read failure at offset %lld\n", read_ag_position); } ag_hdr.xfs_sb = (xfs_dsb_t *) (read_ag_buf); ASSERT(be32_to_cpu(ag_hdr.xfs_sb->sb_magicnum) == XFS_SB_MAGIC); ag_hdr.xfs_agf = (xfs_agf_t *) (read_ag_buf + source_sectorsize); ASSERT(be32_to_cpu(ag_hdr.xfs_agf->agf_magicnum) == XFS_AGF_MAGIC); ag_hdr.xfs_agi = (xfs_agi_t *) (read_ag_buf + 2 * source_sectorsize); ASSERT(be32_to_cpu(ag_hdr.xfs_agi->agi_magicnum) == XFS_AGI_MAGIC); ag_hdr.xfs_agfl = (xfs_agfl_t *) (read_ag_buf + 3 * source_sectorsize); log_mesg(2, 0, 0, fs_opt.debug, "ag header read ok\n"); /* save what we need (agf) in the btree buffer */ btree_buf_data = malloc(source_blocksize); if(btree_buf_data == NULL){ log_mesg(0, 1, 1, fs_opt.debug, "%s, %i, ERROR:%s", __func__, __LINE__, strerror(errno)); } memset(btree_buf_data, 0, source_blocksize); memmove(btree_buf_data, ag_hdr.xfs_agf, source_sectorsize); ag_hdr.xfs_agf = (xfs_agf_t *) btree_buf_data; btree_buf_length = source_blocksize; ///* traverse btree until we get to the leftmost leaf node */ bno = be32_to_cpu(ag_hdr.xfs_agf->agf_roots[XFS_BTNUM_BNOi]); current_level = 0; btree_levels = be32_to_cpu(ag_hdr.xfs_agf->agf_levels[XFS_BTNUM_BNOi]); ag_end = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(ag_hdr.xfs_agf->agf_length) - 1) + source_blocksize / BBSIZE; for (;;) { /* none of this touches the w_buf buffer */ current_level++; btree_buf_position = pos = (xfs_off_t)XFS_AGB_TO_DADDR(mp,agno,bno) << BBSHIFT; btree_buf_length = source_blocksize; sk = lseek(source_fd, btree_buf_position, SEEK_SET); current_block = (sk/source_blocksize); block_count = (btree_buf_length/source_blocksize); set_bitmap(bitmap, sk, btree_buf_length); log_mesg(2, 0, 0, fs_opt.debug, "read btree sf = %llu(%i), length = %i(%i)\n", sk, current_block, btree_buf_length, block_count); read(source_fd, btree_buf_data, btree_buf_length); block = (struct xfs_btree_block *)((char *)btree_buf_data + pos - btree_buf_position); if (be16_to_cpu(block->bb_level) == 0) break; ptr = XFS_ALLOC_PTR_ADDR(mp, block, 1, mp->m_alloc_mxr[1]); bno = be32_to_cpu(ptr[0]); } log_mesg(2, 0, 0, fs_opt.debug, "btree read done\n"); /* align first data copy but don't overwrite ag header */ pos = read_ag_position >> BBSHIFT; length = read_ag_length >> BBSHIFT; next_begin = pos + length; ag_begin = next_begin; ///* handle the rest of the ag */ for (;;) { if (be16_to_cpu(block->bb_level) != 0) { log_mesg(0, 1, 1, fs_opt.debug, "WARNING: source filesystem inconsistent.\nA leaf btree rec isn't a leaf. Aborting now.\n"); } rec_ptr = XFS_ALLOC_REC_ADDR(mp, block, 1); for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++, rec_ptr++) { /* calculate in daddr's */ begin = next_begin; /* * protect against pathological case of a * hole right after the ag header in a * mis-aligned case */ if (begin < ag_begin) begin = ag_begin; /* * round size up to ensure we copy a * range bigger than required */ log_mesg(3, 0, 0, fs_opt.debug, "XFS_AGB_TO_DADDR = %llu, agno = %i, be32_to_cpu=%llu\n", XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(rec_ptr->ar_startblock)), agno, be32_to_cpu(rec_ptr->ar_startblock)); sizeb = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(rec_ptr->ar_startblock)) - begin; size = roundup(sizeb <<BBSHIFT, source_sectorsize); log_mesg(3, 0, 0, fs_opt.debug, "BB = %i size %i and sizeb %llu brgin = %llu\n", BBSHIFT, size, sizeb, begin); if (size > 0) { /* copy extent */ log_mesg(2, 0, 0, fs_opt.debug, "copy extent\n"); w_position = (xfs_off_t)begin << BBSHIFT; while (size > 0) { /* * let lower layer do alignment */ if (size > w_size) { w_length = w_size; size -= w_size; sizeb -= wblocks; numblocks += wblocks; } else { w_length = size; numblocks += sizeb; size = 0; } //read_wbuf(source_fd, &w_buf, mp); sk = lseek(source_fd, w_position, SEEK_SET); current_block = (sk/source_blocksize); block_count = (w_length/source_blocksize); set_bitmap(bitmap, sk, w_length); log_mesg(2, 0, 0, fs_opt.debug, "read ext sourcefd to w_buf source_fd=%llu(%i), length=%i(%i)\n", sk, current_block, w_length, block_count); sk = lseek(source_fd, w_length, SEEK_CUR); w_position += w_length; } } /* round next starting point down */ new_begin = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(rec_ptr->ar_startblock) + be32_to_cpu(rec_ptr->ar_blockcount)); next_begin = rounddown(new_begin, source_sectorsize >> BBSHIFT); } if (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK){ log_mesg(2, 0, 0, fs_opt.debug, "NULLAGBLOCK\n"); break; } /* read in next btree record block */ btree_buf_position = pos = (xfs_off_t)XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)) << BBSHIFT; btree_buf_length = source_blocksize; /* let read_wbuf handle alignment */ //read_wbuf(source_fd, &btree_buf, mp); sk = lseek(source_fd, btree_buf_position, SEEK_SET); current_block = (sk/source_blocksize); block_count = (btree_buf_length/source_blocksize); set_bitmap(bitmap, sk, btree_buf_length); log_mesg(2, 0, 0, fs_opt.debug, "read btreebuf fd = %llu(%i), length = %i(%i) \n", sk, current_block, btree_buf_length, block_count); read(source_fd, btree_buf_data, btree_buf_length); block = (struct xfs_btree_block *)((char *) btree_buf_data + pos - btree_buf_position); ASSERT(be32_to_cpu(block->bb_magic) == XFS_ABTB_MAGIC); } /* * write out range of used blocks after last range * of free blocks in AG */ if (next_begin < ag_end) { begin = next_begin; sizeb = ag_end - begin; size = roundup(sizeb << BBSHIFT, source_sectorsize); if (size > 0) { /* copy extent */ w_position = (xfs_off_t) begin << BBSHIFT; while (size > 0) { /* * let lower layer do alignment */ if (size > w_size) { w_length = w_size; size -= w_size; sizeb -= wblocks; numblocks += wblocks; } else { w_length = size; numblocks += sizeb; size = 0; } sk = lseek(source_fd, w_position, SEEK_SET); current_block = (sk/source_blocksize); block_count = (w_length/source_blocksize); set_bitmap(bitmap, sk, w_length); log_mesg(2, 0, 0, fs_opt.debug, "read ext fd = %llu(%i), length = %i(%i)\n", sk, current_block, w_length, block_count); //read_wbuf(source_fd, &w_buf, mp); lseek(source_fd, w_length, SEEK_CUR); w_position += w_length; } } } log_mesg(2, 0, 0, fs_opt.debug, "write a clean log\n"); log_length = 1 * 1024 * 1024; logstart = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart) << BBSHIFT; logstart_pos = rounddown(logstart, (xfs_off_t)log_length); if (logstart % log_length) { /* unaligned */ sk = lseek(source_fd, logstart_pos, SEEK_SET); current_block = (sk/source_blocksize); block_count = (log_length/source_blocksize); set_bitmap(bitmap, sk, log_length); log_mesg(2, 0, 0, fs_opt.debug, "read log start from %llu(%i) %i(%i)\n", sk, current_block, log_length, block_count); sk = lseek(source_fd, log_length, SEEK_CUR); } logend = XFS_FSB_TO_DADDR(mp, mp->m_sb.sb_logstart) << BBSHIFT; logend += XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks); logend_pos = rounddown(logend, (xfs_off_t)log_length); if (logend % log_length) { /* unaligned */ sk = lseek(source_fd, logend_pos, SEEK_SET); current_block = (sk/source_blocksize); block_count = (log_length/source_blocksize); set_bitmap(bitmap, sk, log_length); log_mesg(2, 0, 0, fs_opt.debug, "read log end from %llu(%i) %i(%i)\n", sk, current_block, log_length, block_count); sk = lseek(source_fd, log_length, SEEK_CUR); } log_mesg(2, 0, 0, fs_opt.debug, "write a clean log done\n"); prog_cur_block = image_hdr.totalblock/num_ags*(agno+1)-1; update_pui(&prog, prog_cur_block, prog_cur_block, 0); } for(current_block = 0; current_block <= image_hdr.totalblock; current_block++){ if(pc_test_bit(current_block, bitmap)) bused++; else bfree++; } log_mesg(0, 0, 0, fs_opt.debug, "bused = %lli, bfree = %lli\n", bused, bfree); fs_close(); update_pui(&prog, 1, 1, 1); }
STATIC int xfs_trim_extents( struct xfs_mount *mp, xfs_agnumber_t agno, xfs_fsblock_t start, xfs_fsblock_t len, xfs_fsblock_t minlen, __uint64_t *blocks_trimmed) { struct block_device *bdev = mp->m_ddev_targp->bt_bdev; struct xfs_btree_cur *cur; struct xfs_buf *agbp; struct xfs_perag *pag; int error; int i; pag = xfs_perag_get(mp, agno); error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); if (error || !agbp) goto out_put_perag; cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); /* * Force out the log. This means any transactions that might have freed * space before we took the AGF buffer lock are now on disk, and the * volatile disk cache is flushed. */ xfs_log_force(mp, XFS_LOG_SYNC); /* * Look up the longest btree in the AGF and start with it. */ error = xfs_alloc_lookup_le(cur, 0, XFS_BUF_TO_AGF(agbp)->agf_longest, &i); if (error) goto out_del_cursor; /* * Loop until we are done with all extents that are large * enough to be worth discarding. */ while (i) { xfs_agblock_t fbno; xfs_extlen_t flen; error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); if (error) goto out_del_cursor; XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); /* * Too small? Give up. */ if (flen < minlen) { trace_xfs_discard_toosmall(mp, agno, fbno, flen); goto out_del_cursor; } /* * If the extent is entirely outside of the range we are * supposed to discard skip it. Do not bother to trim * down partially overlapping ranges for now. */ if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { trace_xfs_discard_exclude(mp, agno, fbno, flen); goto next_extent; } /* * If any blocks in the range are still busy, skip the * discard and try again the next time. */ if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { trace_xfs_discard_busy(mp, agno, fbno, flen); goto next_extent; } trace_xfs_discard_extent(mp, agno, fbno, flen); error = -blkdev_issue_discard(bdev, XFS_AGB_TO_DADDR(mp, agno, fbno), XFS_FSB_TO_BB(mp, flen), GFP_NOFS, 0); if (error) goto out_del_cursor; *blocks_trimmed += flen; next_extent: error = xfs_btree_decrement(cur, 0, &i); if (error) goto out_del_cursor; } out_del_cursor: xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); xfs_buf_relse(agbp); out_put_perag: xfs_perag_put(pag); return error; }
/* * rebuilds an inode tree given a cursor. We're lazy here and call * the routine that builds the agi */ static void build_ino_tree(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs, __uint32_t magic, struct agi_stat *agi_stat, int finobt) { xfs_agnumber_t i; xfs_agblock_t j; xfs_agblock_t agbno; xfs_agino_t first_agino; struct xfs_btree_block *bt_hdr; xfs_inobt_rec_t *bt_rec; ino_tree_node_t *ino_rec; bt_stat_level_t *lptr; xfs_agino_t count = 0; xfs_agino_t freecount = 0; int inocnt; uint8_t finocnt; int k; int level = btree_curs->num_levels; int spmask; uint64_t sparse; uint16_t holemask; for (i = 0; i < level; i++) { lptr = &btree_curs->level[i]; agbno = get_next_blockaddr(agno, i, btree_curs); lptr->buf_p = libxfs_getbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno), XFS_FSB_TO_BB(mp, 1)); if (i == btree_curs->num_levels - 1) btree_curs->root = agbno; lptr->agbno = agbno; lptr->prev_agbno = NULLAGBLOCK; lptr->prev_buf_p = NULL; /* * initialize block header */ lptr->buf_p->b_ops = &xfs_inobt_buf_ops; bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p); memset(bt_hdr, 0, mp->m_sb.sb_blocksize); if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, lptr->buf_p, magic, i, 0, agno, XFS_BTREE_CRC_BLOCKS); else xfs_btree_init_block(mp, lptr->buf_p, magic, i, 0, agno, 0); } /* * run along leaf, setting up records. as we have to switch * blocks, call the prop_ino_cursor routine to set up the new * pointers for the parent. that can recurse up to the root * if required. set the sibling pointers for leaf level here. */ if (finobt) ino_rec = findfirst_free_inode_rec(agno); else ino_rec = findfirst_inode_rec(agno); if (ino_rec != NULL) first_agino = ino_rec->ino_startnum; else first_agino = NULLAGINO; lptr = &btree_curs->level[0]; for (i = 0; i < lptr->num_blocks; i++) { /* * block initialization, lay in block header */ lptr->buf_p->b_ops = &xfs_inobt_buf_ops; bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p); memset(bt_hdr, 0, mp->m_sb.sb_blocksize); if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, lptr->buf_p, magic, 0, 0, agno, XFS_BTREE_CRC_BLOCKS); else xfs_btree_init_block(mp, lptr->buf_p, magic, 0, 0, agno, 0); bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno); bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb + (lptr->modulo > 0)); if (lptr->modulo > 0) lptr->modulo--; if (lptr->num_recs_pb > 0) prop_ino_cursor(mp, agno, btree_curs, ino_rec->ino_startnum, 0); bt_rec = (xfs_inobt_rec_t *) ((char *)bt_hdr + XFS_INOBT_BLOCK_LEN(mp)); for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) { ASSERT(ino_rec != NULL); bt_rec[j].ir_startino = cpu_to_be32(ino_rec->ino_startnum); bt_rec[j].ir_free = cpu_to_be64(ino_rec->ir_free); inocnt = finocnt = 0; for (k = 0; k < sizeof(xfs_inofree_t)*NBBY; k++) { ASSERT(is_inode_confirmed(ino_rec, k)); if (is_inode_sparse(ino_rec, k)) continue; if (is_inode_free(ino_rec, k)) finocnt++; inocnt++; } /* * Set the freecount and check whether we need to update * the sparse format fields. Otherwise, skip to the next * record. */ inorec_set_freecount(mp, &bt_rec[j], finocnt); if (!xfs_sb_version_hassparseinodes(&mp->m_sb)) goto nextrec; /* * Convert the 64-bit in-core sparse inode state to the * 16-bit on-disk holemask. */ holemask = 0; spmask = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1; sparse = ino_rec->ir_sparse; for (k = 0; k < XFS_INOBT_HOLEMASK_BITS; k++) { if (sparse & spmask) { ASSERT((sparse & spmask) == spmask); holemask |= (1 << k); } else ASSERT((sparse & spmask) == 0); sparse >>= XFS_INODES_PER_HOLEMASK_BIT; } bt_rec[j].ir_u.sp.ir_count = inocnt; bt_rec[j].ir_u.sp.ir_holemask = cpu_to_be16(holemask); nextrec: freecount += finocnt; count += inocnt; if (finobt) ino_rec = next_free_ino_rec(ino_rec); else ino_rec = next_ino_rec(ino_rec); } if (ino_rec != NULL) { /* * get next leaf level block */ if (lptr->prev_buf_p != NULL) { #ifdef XR_BLD_INO_TRACE fprintf(stderr, "writing inobt agbno %u\n", lptr->prev_agbno); #endif ASSERT(lptr->prev_agbno != NULLAGBLOCK); libxfs_writebuf(lptr->prev_buf_p, 0); } lptr->prev_buf_p = lptr->buf_p; lptr->prev_agbno = lptr->agbno; lptr->agbno = get_next_blockaddr(agno, 0, btree_curs); bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno); lptr->buf_p = libxfs_getbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, lptr->agbno), XFS_FSB_TO_BB(mp, 1)); } } if (agi_stat) { agi_stat->first_agino = first_agino; agi_stat->count = count; agi_stat->freecount = freecount; } }
int main(int argc, char **argv) { int i, j; int howfar = 0; int open_flags; xfs_off_t pos, end_pos; size_t length; int c, first_residue, tmp_residue; __uint64_t size, sizeb; __uint64_t numblocks = 0; int wblocks = 0; int num_threads = 0; struct dioattr d; int wbuf_size; int wbuf_align; int wbuf_miniosize; int source_is_file = 0; int buffered_output = 0; int duplicate = 0; uint btree_levels, current_level; ag_header_t ag_hdr; xfs_mount_t *mp; xfs_mount_t mbuf; xfs_buf_t *sbp; xfs_sb_t *sb; xfs_agnumber_t num_ags, agno; xfs_agblock_t bno; xfs_daddr_t begin, next_begin, ag_begin, new_begin, ag_end; struct xfs_btree_block *block; xfs_alloc_ptr_t *ptr; xfs_alloc_rec_t *rec_ptr; extern char *optarg; extern int optind; libxfs_init_t xargs; thread_args *tcarg; struct stat64 statbuf; progname = basename(argv[0]); setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); while ((c = getopt(argc, argv, "bdL:V")) != EOF) { switch (c) { case 'b': buffered_output = 1; break; case 'd': duplicate = 1; break; case 'L': logfile_name = optarg; break; case 'V': printf(_("%s version %s\n"), progname, VERSION); exit(0); case '?': usage(); } } if (argc - optind < 2) usage(); if (logfile_name) { logfd = open(logfile_name, O_CREAT|O_WRONLY|O_EXCL, 0600); } else { logfile_name = LOGFILE_NAME; logfd = mkstemp(logfile_name); } if (logfd < 0) { fprintf(stderr, _("%s: couldn't open log file \"%s\"\n"), progname, logfile_name); perror(_("Aborting XFS copy - reason")); exit(1); } if ((logerr = fdopen(logfd, "w")) == NULL) { fprintf(stderr, _("%s: couldn't set up logfile stream\n"), progname); perror(_("Aborting XFS copy - reason")); exit(1); } source_name = argv[optind]; source_fd = -1; optind++; num_targets = argc - optind; if ((target = malloc(sizeof(target_control) * num_targets)) == NULL) { do_log(_("Couldn't allocate target array\n")); die_perror(); } for (i = 0; optind < argc; i++, optind++) { target[i].name = argv[optind]; target[i].fd = -1; target[i].position = -1; target[i].state = INACTIVE; target[i].error = 0; target[i].err_type = 0; } parent_pid = getpid(); if (atexit(killall)) { do_log(_("%s: couldn't register atexit function.\n"), progname); die_perror(); } /* open up source -- is it a file? */ open_flags = O_RDONLY; if ((source_fd = open(source_name, open_flags)) < 0) { do_log(_("%s: couldn't open source \"%s\"\n"), progname, source_name); die_perror(); } if (fstat64(source_fd, &statbuf) < 0) { do_log(_("%s: couldn't stat source \"%s\"\n"), progname, source_name); die_perror(); } if (S_ISREG(statbuf.st_mode)) source_is_file = 1; if (source_is_file && platform_test_xfs_fd(source_fd)) { if (fcntl(source_fd, F_SETFL, open_flags | O_DIRECT) < 0) { do_log(_("%s: Cannot set direct I/O flag on \"%s\".\n"), progname, source_name); die_perror(); } if (xfsctl(source_name, source_fd, XFS_IOC_DIOINFO, &d) < 0) { do_log(_("%s: xfsctl on file \"%s\" failed.\n"), progname, source_name); die_perror(); } wbuf_align = d.d_mem; wbuf_size = MIN(d.d_maxiosz, 1 * 1024 * 1024); wbuf_miniosize = d.d_miniosz; } else { /* set arbitrary I/O params, miniosize at least 1 disk block */ wbuf_align = getpagesize(); wbuf_size = 1 * 1024 * 1024; wbuf_miniosize = -1; /* set after mounting source fs */ } if (!source_is_file) { /* * check to make sure a filesystem isn't mounted * on the device */ if (platform_check_ismounted(source_name, NULL, &statbuf, 0)) { do_log( _("%s: Warning -- a filesystem is mounted on the source device.\n"), progname); do_log( _("\t\tGenerated copies may be corrupt unless the source is\n")); do_log( _("\t\tunmounted or mounted read-only. Copy proceeding...\n")); } } /* prepare the libxfs_init structure */ memset(&xargs, 0, sizeof(xargs)); xargs.isdirect = LIBXFS_DIRECT; xargs.isreadonly = LIBXFS_ISREADONLY; if (source_is_file) { xargs.dname = source_name; xargs.disfile = 1; } else xargs.volname = source_name; if (!libxfs_init(&xargs)) { do_log(_("%s: couldn't initialize XFS library\n" "%s: Aborting.\n"), progname, progname); exit(1); } /* prepare the mount structure */ sbp = libxfs_readbuf(xargs.ddev, XFS_SB_DADDR, 1, 0); memset(&mbuf, 0, sizeof(xfs_mount_t)); sb = &mbuf.m_sb; libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbp)); mp = libxfs_mount(&mbuf, sb, xargs.ddev, xargs.logdev, xargs.rtdev, 1); if (mp == NULL) { do_log(_("%s: %s filesystem failed to initialize\n" "%s: Aborting.\n"), progname, source_name, progname); exit(1); } else if (mp->m_sb.sb_inprogress) { do_log(_("%s %s filesystem failed to initialize\n" "%s: Aborting.\n"), progname, source_name, progname); exit(1); } else if (mp->m_sb.sb_logstart == 0) { do_log(_("%s: %s has an external log.\n%s: Aborting.\n"), progname, source_name, progname); exit(1); } else if (mp->m_sb.sb_rextents != 0) { do_log(_("%s: %s has a real-time section.\n" "%s: Aborting.\n"), progname, source_name, progname); exit(1); } source_blocksize = mp->m_sb.sb_blocksize; source_sectorsize = mp->m_sb.sb_sectsize; if (wbuf_miniosize == -1) wbuf_miniosize = source_sectorsize; ASSERT(source_blocksize % source_sectorsize == 0); ASSERT(source_sectorsize % BBSIZE == 0); if (source_blocksize > source_sectorsize) { /* get number of leftover sectors in last block of ag header */ tmp_residue = ((XFS_AGFL_DADDR(mp) + 1) * source_sectorsize) % source_blocksize; first_residue = (tmp_residue == 0) ? 0 : source_blocksize - tmp_residue; ASSERT(first_residue % source_sectorsize == 0); } else if (source_blocksize == source_sectorsize) { first_residue = 0; } else { do_log(_("Error: filesystem block size is smaller than the" " disk sectorsize.\nAborting XFS copy now.\n")); exit(1); } first_agbno = (((XFS_AGFL_DADDR(mp) + 1) * source_sectorsize) + first_residue) / source_blocksize; ASSERT(first_agbno != 0); ASSERT( ((((XFS_AGFL_DADDR(mp) + 1) * source_sectorsize) + first_residue) % source_blocksize) == 0); /* now open targets */ open_flags = O_RDWR; for (i = 0; i < num_targets; i++) { int write_last_block = 0; if (stat64(target[i].name, &statbuf) < 0) { /* ok, assume it's a file and create it */ do_out(_("Creating file %s\n"), target[i].name); open_flags |= O_CREAT; if (!buffered_output) open_flags |= O_DIRECT; write_last_block = 1; } else if (S_ISREG(statbuf.st_mode)) { open_flags |= O_TRUNC; if (!buffered_output) open_flags |= O_DIRECT; write_last_block = 1; } else { /* * check to make sure a filesystem isn't mounted * on the device */ if (platform_check_ismounted(target[i].name, NULL, &statbuf, 0)) { do_log(_("%s: a filesystem is mounted " "on target device \"%s\".\n" "%s cannot copy to mounted filesystems." " Aborting\n"), progname, target[i].name, progname); exit(1); } } target[i].fd = open(target[i].name, open_flags, 0644); if (target[i].fd < 0) { do_log(_("%s: couldn't open target \"%s\"\n"), progname, target[i].name); die_perror(); } if (write_last_block) { /* ensure regular files are correctly sized */ if (ftruncate64(target[i].fd, mp->m_sb.sb_dblocks * source_blocksize)) { do_log(_("%s: cannot grow data section.\n"), progname); die_perror(); } if (platform_test_xfs_fd(target[i].fd)) { if (xfsctl(target[i].name, target[i].fd, XFS_IOC_DIOINFO, &d) < 0) { do_log( _("%s: xfsctl on \"%s\" failed.\n"), progname, target[i].name); die_perror(); } else { wbuf_align = MAX(wbuf_align, d.d_mem); wbuf_size = MIN(d.d_maxiosz, wbuf_size); wbuf_miniosize = MAX(d.d_miniosz, wbuf_miniosize); } } } else { char *lb[XFS_MAX_SECTORSIZE] = { NULL }; off64_t off; /* ensure device files are sufficiently large */ off = mp->m_sb.sb_dblocks * source_blocksize; off -= sizeof(lb); if (pwrite64(target[i].fd, lb, sizeof(lb), off) < 0) { do_log(_("%s: failed to write last block\n"), progname); do_log(_("\tIs target \"%s\" too small?\n"), target[i].name); die_perror(); } } } /* initialize locks and bufs */ if (pthread_mutex_init(&glob_masks.mutex, NULL) != 0) { do_log(_("Couldn't initialize global thread mask\n")); die_perror(); } glob_masks.num_working = 0; if (wbuf_init(&w_buf, wbuf_size, wbuf_align, wbuf_miniosize, 0) == NULL) { do_log(_("Error initializing wbuf 0\n")); die_perror(); } wblocks = wbuf_size / BBSIZE; if (wbuf_init(&btree_buf, MAX(source_blocksize, wbuf_miniosize), wbuf_align, wbuf_miniosize, 1) == NULL) { do_log(_("Error initializing btree buf 1\n")); die_perror(); } if (pthread_mutex_init(&mainwait,NULL) != 0) { do_log(_("Error creating first semaphore.\n")); die_perror(); exit(1); } /* need to start out blocking */ pthread_mutex_lock(&mainwait); /* set up sigchild signal handler */ signal(SIGCHLD, handler); signal_maskfunc(SIGCHLD, SIG_BLOCK); /* make children */ if ((targ = malloc(num_targets * sizeof(thread_args))) == NULL) { do_log(_("Couldn't malloc space for thread args\n")); die_perror(); exit(1); } for (i = 0, tcarg = targ; i < num_targets; i++, tcarg++) { if (!duplicate) platform_uuid_generate(&tcarg->uuid); else platform_uuid_copy(&tcarg->uuid, &mp->m_sb.sb_uuid); if (pthread_mutex_init(&tcarg->wait, NULL) != 0) { do_log(_("Error creating thread mutex %d\n"), i); die_perror(); exit(1); } /* need to start out blocking */ pthread_mutex_lock(&tcarg->wait); } for (i = 0, tcarg = targ; i < num_targets; i++, tcarg++) { tcarg->id = i; tcarg->fd = target[i].fd; target[i].state = ACTIVE; num_threads++; if (pthread_create(&target[i].pid, NULL, begin_reader, (void *)tcarg)) { do_log(_("Error creating thread for target %d\n"), i); die_perror(); } } ASSERT(num_targets == num_threads); /* set up statistics */ num_ags = mp->m_sb.sb_agcount; init_bar(mp->m_sb.sb_blocksize / BBSIZE * ((__uint64_t)mp->m_sb.sb_dblocks - (__uint64_t)mp->m_sb.sb_fdblocks + 10 * num_ags)); kids = num_targets; block = (struct xfs_btree_block *) btree_buf.data; for (agno = 0; agno < num_ags && kids > 0; agno++) { /* read in first blocks of the ag */ read_ag_header(source_fd, agno, &w_buf, &ag_hdr, mp, source_blocksize, source_sectorsize); /* set the in_progress bit for the first AG */ if (agno == 0) ag_hdr.xfs_sb->sb_inprogress = 1; /* save what we need (agf) in the btree buffer */ memmove(btree_buf.data, ag_hdr.xfs_agf, source_sectorsize); ag_hdr.xfs_agf = (xfs_agf_t *) btree_buf.data; btree_buf.length = source_blocksize; /* write the ag header out */ write_wbuf(); /* traverse btree until we get to the leftmost leaf node */ bno = be32_to_cpu(ag_hdr.xfs_agf->agf_roots[XFS_BTNUM_BNOi]); current_level = 0; btree_levels = be32_to_cpu(ag_hdr.xfs_agf-> agf_levels[XFS_BTNUM_BNOi]); ag_end = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(ag_hdr.xfs_agf->agf_length) - 1) + source_blocksize / BBSIZE; for (;;) { /* none of this touches the w_buf buffer */ ASSERT(current_level < btree_levels); current_level++; btree_buf.position = pos = (xfs_off_t) XFS_AGB_TO_DADDR(mp,agno,bno) << BBSHIFT; btree_buf.length = source_blocksize; read_wbuf(source_fd, &btree_buf, mp); block = (struct xfs_btree_block *) ((char *)btree_buf.data + pos - btree_buf.position); ASSERT(be32_to_cpu(block->bb_magic) == XFS_ABTB_MAGIC); if (be16_to_cpu(block->bb_level) == 0) break; ptr = XFS_ALLOC_PTR_ADDR(mp, block, 1, mp->m_alloc_mxr[1]); bno = be32_to_cpu(ptr[0]); } /* align first data copy but don't overwrite ag header */ pos = w_buf.position >> BBSHIFT; length = w_buf.length >> BBSHIFT; next_begin = pos + length; ag_begin = next_begin; ASSERT(w_buf.position % source_sectorsize == 0); /* handle the rest of the ag */ for (;;) { if (be16_to_cpu(block->bb_level) != 0) { do_log( _("WARNING: source filesystem inconsistent.\n")); do_log( _(" A leaf btree rec isn't a leaf. Aborting now.\n")); exit(1); } rec_ptr = XFS_ALLOC_REC_ADDR(mp, block, 1); for (i = 0; i < be16_to_cpu(block->bb_numrecs); i++, rec_ptr++) { /* calculate in daddr's */ begin = next_begin; /* * protect against pathological case of a * hole right after the ag header in a * mis-aligned case */ if (begin < ag_begin) begin = ag_begin; /* * round size up to ensure we copy a * range bigger than required */ sizeb = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(rec_ptr->ar_startblock)) - begin; size = roundup(sizeb <<BBSHIFT, wbuf_miniosize); if (size > 0) { /* copy extent */ w_buf.position = (xfs_off_t) begin << BBSHIFT; while (size > 0) { /* * let lower layer do alignment */ if (size > w_buf.size) { w_buf.length = w_buf.size; size -= w_buf.size; sizeb -= wblocks; numblocks += wblocks; } else { w_buf.length = size; numblocks += sizeb; size = 0; } read_wbuf(source_fd, &w_buf, mp); write_wbuf(); w_buf.position += w_buf.length; howfar = bump_bar( howfar, numblocks); } } /* round next starting point down */ new_begin = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(rec_ptr->ar_startblock) + be32_to_cpu(rec_ptr->ar_blockcount)); next_begin = rounddown(new_begin, w_buf.min_io_size >> BBSHIFT); } if (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK) break; /* read in next btree record block */ btree_buf.position = pos = (xfs_off_t) XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu( block->bb_u.s.bb_rightsib)) << BBSHIFT; btree_buf.length = source_blocksize; /* let read_wbuf handle alignment */ read_wbuf(source_fd, &btree_buf, mp); block = (struct xfs_btree_block *) ((char *) btree_buf.data + pos - btree_buf.position); ASSERT(be32_to_cpu(block->bb_magic) == XFS_ABTB_MAGIC); } /* * write out range of used blocks after last range * of free blocks in AG */ if (next_begin < ag_end) { begin = next_begin; sizeb = ag_end - begin; size = roundup(sizeb << BBSHIFT, wbuf_miniosize); if (size > 0) { /* copy extent */ w_buf.position = (xfs_off_t) begin << BBSHIFT; while (size > 0) { /* * let lower layer do alignment */ if (size > w_buf.size) { w_buf.length = w_buf.size; size -= w_buf.size; sizeb -= wblocks; numblocks += wblocks; } else { w_buf.length = size; numblocks += sizeb; size = 0; } read_wbuf(source_fd, &w_buf, mp); write_wbuf(); w_buf.position += w_buf.length; howfar = bump_bar(howfar, numblocks); } } } } if (kids > 0) { if (!duplicate) { /* write a clean log using the specified UUID */ for (j = 0, tcarg = targ; j < num_targets; j++) { w_buf.owner = tcarg; w_buf.length = rounddown(w_buf.size, w_buf.min_io_size); pos = write_log_header( source_fd, &w_buf, mp); end_pos = write_log_trailer( source_fd, &w_buf, mp); w_buf.position = pos; memset(w_buf.data, 0, w_buf.length); while (w_buf.position < end_pos) { do_write(tcarg); w_buf.position += w_buf.length; } tcarg++; } } else { num_ags = 1; } /* reread and rewrite superblocks (UUID and in-progress) */ /* [backwards, so inprogress bit only updated when done] */ for (i = num_ags - 1; i >= 0; i--) { read_ag_header(source_fd, i, &w_buf, &ag_hdr, mp, source_blocksize, source_sectorsize); if (i == 0) ag_hdr.xfs_sb->sb_inprogress = 0; /* do each thread in turn, each has its own UUID */ for (j = 0, tcarg = targ; j < num_targets; j++) { platform_uuid_copy(&ag_hdr.xfs_sb->sb_uuid, &tcarg->uuid); do_write(tcarg); tcarg++; } } bump_bar(100, 0); } check_errors(); killall(); pthread_exit(NULL); /*NOTREACHED*/ return 0; }
/* Does this block match the btree information passed in? */ STATIC int xrep_findroot_block( struct xrep_findroot *ri, struct xrep_find_ag_btree *fab, uint64_t owner, xfs_agblock_t agbno, bool *done_with_block) { struct xfs_mount *mp = ri->sc->mp; struct xfs_buf *bp; struct xfs_btree_block *btblock; xfs_daddr_t daddr; int block_level; int error = 0; daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno); /* * Blocks in the AGFL have stale contents that might just happen to * have a matching magic and uuid. We don't want to pull these blocks * in as part of a tree root, so we have to filter out the AGFL stuff * here. If the AGFL looks insane we'll just refuse to repair. */ if (owner == XFS_RMAP_OWN_AG) { error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, xrep_findroot_agfl_walk, &agbno); if (error == XFS_BTREE_QUERY_RANGE_ABORT) return 0; if (error) return error; } /* * Read the buffer into memory so that we can see if it's a match for * our btree type. We have no clue if it is beforehand, and we want to * avoid xfs_trans_read_buf's behavior of dumping the DONE state (which * will cause needless disk reads in subsequent calls to this function) * and logging metadata verifier failures. * * Therefore, pass in NULL buffer ops. If the buffer was already in * memory from some other caller it will already have b_ops assigned. * If it was in memory from a previous unsuccessful findroot_block * call, the buffer won't have b_ops but it should be clean and ready * for us to try to verify if the read call succeeds. The same applies * if the buffer wasn't in memory at all. * * Note: If we never match a btree type with this buffer, it will be * left in memory with NULL b_ops. This shouldn't be a problem unless * the buffer gets written. */ error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr, mp->m_bsize, 0, &bp, NULL); if (error) return error; /* Ensure the block magic matches the btree type we're looking for. */ btblock = XFS_BUF_TO_BLOCK(bp); ASSERT(fab->buf_ops->magic[1] != 0); if (btblock->bb_magic != fab->buf_ops->magic[1]) goto out; /* * If the buffer already has ops applied and they're not the ones for * this btree type, we know this block doesn't match the btree and we * can bail out. * * If the buffer ops match ours, someone else has already validated * the block for us, so we can move on to checking if this is a root * block candidate. * * If the buffer does not have ops, nobody has successfully validated * the contents and the buffer cannot be dirty. If the magic, uuid, * and structure match this btree type then we'll move on to checking * if it's a root block candidate. If there is no match, bail out. */ if (bp->b_ops) { if (bp->b_ops != fab->buf_ops) goto out; } else { ASSERT(!xfs_trans_buf_is_dirty(bp)); if (!uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) goto out; /* * Read verifiers can reference b_ops, so we set the pointer * here. If the verifier fails we'll reset the buffer state * to what it was before we touched the buffer. */ bp->b_ops = fab->buf_ops; fab->buf_ops->verify_read(bp); if (bp->b_error) { bp->b_ops = NULL; bp->b_error = 0; goto out; } /* * Some read verifiers will (re)set b_ops, so we must be * careful not to change b_ops after running the verifier. */ } /* * This block passes the magic/uuid and verifier tests for this btree * type. We don't need the caller to try the other tree types. */ *done_with_block = true; /* * Compare this btree block's level to the height of the current * candidate root block. * * If the level matches the root we found previously, throw away both * blocks because there can't be two candidate roots. * * If level is lower in the tree than the root we found previously, * ignore this block. */ block_level = xfs_btree_get_level(btblock); if (block_level + 1 == fab->height) { fab->root = NULLAGBLOCK; goto out; } else if (block_level < fab->height) { goto out; } /* * This is the highest block in the tree that we've found so far. * Update the btree height to reflect what we've learned from this * block. */ fab->height = block_level + 1; /* * If this block doesn't have sibling pointers, then it's the new root * block candidate. Otherwise, the root will be found farther up the * tree. */ if (btblock->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) && btblock->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) fab->root = agbno; else fab->root = NULLAGBLOCK; trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno, be32_to_cpu(btblock->bb_magic), fab->height - 1); out: xfs_trans_brelse(ri->sc->tp, bp); return error; }
/* Make sure the free mask is consistent with what the inodes think. */ STATIC int xfs_scrub_iallocbt_check_freemask( struct xfs_scrub_btree *bs, struct xfs_inobt_rec_incore *irec) { struct xfs_owner_info oinfo; struct xfs_imap imap; struct xfs_mount *mp = bs->cur->bc_mp; struct xfs_dinode *dip; struct xfs_buf *bp; xfs_ino_t fsino; xfs_agino_t nr_inodes; xfs_agino_t agino; xfs_agino_t chunkino; xfs_agino_t clusterino; xfs_agblock_t agbno; int blks_per_cluster; uint16_t holemask; uint16_t ir_holemask; int error = 0; /* Make sure the freemask matches the inode records. */ blks_per_cluster = xfs_icluster_size_fsb(mp); nr_inodes = XFS_OFFBNO_TO_AGINO(mp, blks_per_cluster, 0); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); for (agino = irec->ir_startino; agino < irec->ir_startino + XFS_INODES_PER_CHUNK; agino += blks_per_cluster * mp->m_sb.sb_inopblock) { fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_private.a.agno, agino); chunkino = agino - irec->ir_startino; agbno = XFS_AGINO_TO_AGBNO(mp, agino); /* Compute the holemask mask for this cluster. */ for (clusterino = 0, holemask = 0; clusterino < nr_inodes; clusterino += XFS_INODES_PER_HOLEMASK_BIT) holemask |= XFS_INOBT_MASK((chunkino + clusterino) / XFS_INODES_PER_HOLEMASK_BIT); /* The whole cluster must be a hole or not a hole. */ ir_holemask = (irec->ir_holemask & holemask); if (ir_holemask != holemask && ir_holemask != 0) { xfs_scrub_btree_set_corrupt(bs->sc, bs->cur, 0); continue; } /* If any part of this is a hole, skip it. */ if (ir_holemask) continue; /* Grab the inode cluster buffer. */ imap.im_blkno = XFS_AGB_TO_DADDR(mp, bs->cur->bc_private.a.agno, agbno); imap.im_len = XFS_FSB_TO_BB(mp, blks_per_cluster); imap.im_boffset = 0; error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &bp, 0, 0); if (!xfs_scrub_btree_process_error(bs->sc, bs->cur, 0, &error)) continue; /* Which inodes are free? */ for (clusterino = 0; clusterino < nr_inodes; clusterino++) { error = xfs_scrub_iallocbt_check_cluster_freemask(bs, fsino, chunkino, clusterino, irec, bp); if (error) { xfs_trans_brelse(bs->cur->bc_tp, bp); return error; } } xfs_trans_brelse(bs->cur->bc_tp, bp); } return error; }
/* * rebuilds a freespace tree given a cursor and magic number of type * of tree to build (bno or bcnt). returns the number of free blocks * represented by the tree. */ static xfs_extlen_t build_freespace_tree(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs, __uint32_t magic) { xfs_agnumber_t i; xfs_agblock_t j; struct xfs_btree_block *bt_hdr; xfs_alloc_rec_t *bt_rec; int level; xfs_agblock_t agbno; extent_tree_node_t *ext_ptr; bt_stat_level_t *lptr; xfs_extlen_t freeblks; __uint32_t crc_magic; #ifdef XR_BLD_FREE_TRACE fprintf(stderr, "in build_freespace_tree, agno = %d\n", agno); #endif level = btree_curs->num_levels; freeblks = 0; ASSERT(level > 0); if (magic == XFS_ABTB_MAGIC) crc_magic = XFS_ABTB_CRC_MAGIC; else crc_magic = XFS_ABTC_CRC_MAGIC; /* * initialize the first block on each btree level */ for (i = 0; i < level; i++) { lptr = &btree_curs->level[i]; agbno = get_next_blockaddr(agno, i, btree_curs); lptr->buf_p = libxfs_getbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno), XFS_FSB_TO_BB(mp, 1)); if (i == btree_curs->num_levels - 1) btree_curs->root = agbno; lptr->agbno = agbno; lptr->prev_agbno = NULLAGBLOCK; lptr->prev_buf_p = NULL; /* * initialize block header */ lptr->buf_p->b_ops = &xfs_allocbt_buf_ops; bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p); memset(bt_hdr, 0, mp->m_sb.sb_blocksize); if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, lptr->buf_p, crc_magic, i, 0, agno, XFS_BTREE_CRC_BLOCKS); else xfs_btree_init_block(mp, lptr->buf_p, magic, i, 0, agno, 0); } /* * run along leaf, setting up records. as we have to switch * blocks, call the prop_freespace_cursor routine to set up the new * pointers for the parent. that can recurse up to the root * if required. set the sibling pointers for leaf level here. */ if (magic == XFS_ABTB_MAGIC) ext_ptr = findfirst_bno_extent(agno); else ext_ptr = findfirst_bcnt_extent(agno); #ifdef XR_BLD_FREE_TRACE fprintf(stderr, "bft, agno = %d, start = %u, count = %u\n", agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount); #endif lptr = &btree_curs->level[0]; for (i = 0; i < btree_curs->level[0].num_blocks; i++) { /* * block initialization, lay in block header */ lptr->buf_p->b_ops = &xfs_allocbt_buf_ops; bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p); memset(bt_hdr, 0, mp->m_sb.sb_blocksize); if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, lptr->buf_p, crc_magic, 0, 0, agno, XFS_BTREE_CRC_BLOCKS); else xfs_btree_init_block(mp, lptr->buf_p, magic, 0, 0, agno, 0); bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno); bt_hdr->bb_numrecs = cpu_to_be16(lptr->num_recs_pb + (lptr->modulo > 0)); #ifdef XR_BLD_FREE_TRACE fprintf(stderr, "bft, bb_numrecs = %d\n", be16_to_cpu(bt_hdr->bb_numrecs)); #endif if (lptr->modulo > 0) lptr->modulo--; /* * initialize values in the path up to the root if * this is a multi-level btree */ if (btree_curs->num_levels > 1) prop_freespace_cursor(mp, agno, btree_curs, ext_ptr->ex_startblock, ext_ptr->ex_blockcount, 0, magic); bt_rec = (xfs_alloc_rec_t *) ((char *)bt_hdr + XFS_ALLOC_BLOCK_LEN(mp)); for (j = 0; j < be16_to_cpu(bt_hdr->bb_numrecs); j++) { ASSERT(ext_ptr != NULL); bt_rec[j].ar_startblock = cpu_to_be32( ext_ptr->ex_startblock); bt_rec[j].ar_blockcount = cpu_to_be32( ext_ptr->ex_blockcount); freeblks += ext_ptr->ex_blockcount; if (magic == XFS_ABTB_MAGIC) ext_ptr = findnext_bno_extent(ext_ptr); else ext_ptr = findnext_bcnt_extent(agno, ext_ptr); #if 0 #ifdef XR_BLD_FREE_TRACE if (ext_ptr == NULL) fprintf(stderr, "null extent pointer, j = %d\n", j); else fprintf(stderr, "bft, agno = %d, start = %u, count = %u\n", agno, ext_ptr->ex_startblock, ext_ptr->ex_blockcount); #endif #endif } if (ext_ptr != NULL) { /* * get next leaf level block */ if (lptr->prev_buf_p != NULL) { #ifdef XR_BLD_FREE_TRACE fprintf(stderr, " writing fst agbno %u\n", lptr->prev_agbno); #endif ASSERT(lptr->prev_agbno != NULLAGBLOCK); libxfs_writebuf(lptr->prev_buf_p, 0); } lptr->prev_buf_p = lptr->buf_p; lptr->prev_agbno = lptr->agbno; lptr->agbno = get_next_blockaddr(agno, 0, btree_curs); bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(lptr->agbno); lptr->buf_p = libxfs_getbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, lptr->agbno), XFS_FSB_TO_BB(mp, 1)); } } return(freeblks); }
static void prop_freespace_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs, xfs_agblock_t startblock, xfs_extlen_t blockcount, int level, __uint32_t magic) { struct xfs_btree_block *bt_hdr; xfs_alloc_key_t *bt_key; xfs_alloc_ptr_t *bt_ptr; xfs_agblock_t agbno; bt_stat_level_t *lptr; __uint32_t crc_magic; if (magic == XFS_ABTB_MAGIC) crc_magic = XFS_ABTB_CRC_MAGIC; else crc_magic = XFS_ABTC_CRC_MAGIC; level++; if (level >= btree_curs->num_levels) return; lptr = &btree_curs->level[level]; bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p); if (be16_to_cpu(bt_hdr->bb_numrecs) == 0) { /* * only happens once when initializing the * left-hand side of the tree. */ prop_freespace_cursor(mp, agno, btree_curs, startblock, blockcount, level, magic); } if (be16_to_cpu(bt_hdr->bb_numrecs) == lptr->num_recs_pb + (lptr->modulo > 0)) { /* * write out current prev block, grab us a new block, * and set the rightsib pointer of current block */ #ifdef XR_BLD_FREE_TRACE fprintf(stderr, " %d ", lptr->prev_agbno); #endif if (lptr->prev_agbno != NULLAGBLOCK) { ASSERT(lptr->prev_buf_p != NULL); libxfs_writebuf(lptr->prev_buf_p, 0); } lptr->prev_agbno = lptr->agbno;; lptr->prev_buf_p = lptr->buf_p; agbno = get_next_blockaddr(agno, level, btree_curs); bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno); lptr->buf_p = libxfs_getbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno), XFS_FSB_TO_BB(mp, 1)); lptr->agbno = agbno; if (lptr->modulo) lptr->modulo--; /* * initialize block header */ lptr->buf_p->b_ops = &xfs_allocbt_buf_ops; bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p); memset(bt_hdr, 0, mp->m_sb.sb_blocksize); if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, lptr->buf_p, crc_magic, level, 0, agno, XFS_BTREE_CRC_BLOCKS); else xfs_btree_init_block(mp, lptr->buf_p, magic, level, 0, agno, 0); bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno); /* * propagate extent record for first extent in new block up */ prop_freespace_cursor(mp, agno, btree_curs, startblock, blockcount, level, magic); } /* * add extent info to current block */ be16_add_cpu(&bt_hdr->bb_numrecs, 1); bt_key = XFS_ALLOC_KEY_ADDR(mp, bt_hdr, be16_to_cpu(bt_hdr->bb_numrecs)); bt_ptr = XFS_ALLOC_PTR_ADDR(mp, bt_hdr, be16_to_cpu(bt_hdr->bb_numrecs), mp->m_alloc_mxr[1]); bt_key->ar_startblock = cpu_to_be32(startblock); bt_key->ar_blockcount = cpu_to_be32(blockcount); *bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno); }
/* * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ STATIC int /* error code or 0 */ xfs_ialloc_ag_alloc( xfs_trans_t *tp, /* transaction pointer */ xfs_buf_t *agbp, /* alloc group buffer */ int *alloc) { xfs_agi_t *agi; /* allocation group header */ xfs_alloc_arg_t args; /* allocation argument structure */ int blks_per_cluster; /* fs blocks per inode cluster */ xfs_btree_cur_t *cur; /* inode btree cursor */ xfs_daddr_t d; /* disk addr of buffer */ xfs_agnumber_t agno; int error; xfs_buf_t *fbuf; /* new free inodes' buffer */ xfs_dinode_t *free; /* new free inode structure */ int i; /* inode counter */ int j; /* block counter */ int nbufs; /* num bufs of new inodes */ xfs_agino_t newino; /* new first inode's number */ xfs_agino_t newlen; /* new number of inodes */ int ninodes; /* num inodes per buf */ xfs_agino_t thisino; /* current inode number, for loop */ int version; /* inode version number to use */ int isaligned = 0; /* inode allocation at stripe unit */ /* boundary */ unsigned int gen; args.tp = tp; args.mp = tp->t_mountp; /* * Locking will ensure that we don't have two callers in here * at one time. */ newlen = XFS_IALLOC_INODES(args.mp); if (args.mp->m_maxicount && args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount) return XFS_ERROR(ENOSPC); args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); /* * First try to allocate inodes contiguous with the last-allocated * chunk of inodes. If the filesystem is striped, this will fill * an entire stripe unit with inodes. */ agi = XFS_BUF_TO_AGI(agbp); newino = be32_to_cpu(agi->agi_newino); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + XFS_IALLOC_BLOCKS(args.mp); if (likely(newino != NULLAGINO && (args.agbno < be32_to_cpu(agi->agi_length)))) { args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno), args.agbno); args.type = XFS_ALLOCTYPE_THIS_BNO; args.mod = args.total = args.wasdel = args.isfl = args.userdata = args.minalignslop = 0; args.prod = 1; /* * We need to take into account alignment here to ensure that * we don't modify the free list if we fail to have an exact * block. If we don't have an exact match, and every oher * attempt allocation attempt fails, we'll end up cancelling * a dirty transaction and shutting down. * * For an exact allocation, alignment must be 1, * however we need to take cluster alignment into account when * fixing up the freelist. Use the minalignslop field to * indicate that extra blocks might be required for alignment, * but not to use them in the actual exact allocation. */ args.alignment = 1; args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1; /* Allow space for the inode btree to split. */ args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; if ((error = xfs_alloc_vextent(&args))) return error; } else args.fsbno = NULLFSBLOCK; if (unlikely(args.fsbno == NULLFSBLOCK)) { /* * Set the alignment for the allocation. * If stripe alignment is turned on then align at stripe unit * boundary. * If the cluster size is smaller than a filesystem block * then we're doing I/O for inodes in filesystem block size * pieces, so don't need alignment anyway. */ isaligned = 0; if (args.mp->m_sinoalign) { ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); args.alignment = args.mp->m_dalign; isaligned = 1; } else args.alignment = xfs_ialloc_cluster_alignment(&args); /* * Need to figure out where to allocate the inode blocks. * Ideally they should be spaced out through the a.g. * For now, just allocate blocks up front. */ args.agbno = be32_to_cpu(agi->agi_root); args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno), args.agbno); /* * Allocate a fixed-size extent of inodes. */ args.type = XFS_ALLOCTYPE_NEAR_BNO; args.mod = args.total = args.wasdel = args.isfl = args.userdata = args.minalignslop = 0; args.prod = 1; /* * Allow space for the inode btree to split. */ args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; if ((error = xfs_alloc_vextent(&args))) return error; } /* * If stripe alignment is turned on, then try again with cluster * alignment. */ if (isaligned && args.fsbno == NULLFSBLOCK) { args.type = XFS_ALLOCTYPE_NEAR_BNO; args.agbno = be32_to_cpu(agi->agi_root); args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno), args.agbno); args.alignment = xfs_ialloc_cluster_alignment(&args); if ((error = xfs_alloc_vextent(&args))) return error; } if (args.fsbno == NULLFSBLOCK) { *alloc = 0; return 0; } ASSERT(args.len == args.minlen); /* * Convert the results. */ newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); /* * Loop over the new block(s), filling in the inodes. * For small block sizes, manipulate the inodes in buffers * which are multiples of the blocks size. */ if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) { blks_per_cluster = 1; nbufs = (int)args.len; ninodes = args.mp->m_sb.sb_inopblock; } else { blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) / args.mp->m_sb.sb_blocksize; nbufs = (int)args.len / blks_per_cluster; ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock; } /* * Figure out what version number to use in the inodes we create. * If the superblock version has caught up to the one that supports * the new inode format, then use the new inode version. Otherwise * use the old version so that old kernels will continue to be * able to use the file system. */ if (xfs_sb_version_hasnlink(&args.mp->m_sb)) version = XFS_DINODE_VERSION_2; else version = XFS_DINODE_VERSION_1; /* * Seed the new inode cluster with a random generation number. This * prevents short-term reuse of generation numbers if a chunk is * freed and then immediately reallocated. We use random numbers * rather than a linear progression to prevent the next generation * number from being easily guessable. */ gen = random32(); for (j = 0; j < nbufs; j++) { /* * Get the block. */ d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno), args.agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d, args.mp->m_bsize * blks_per_cluster, XFS_BUF_LOCK); ASSERT(fbuf); ASSERT(!XFS_BUF_GETERROR(fbuf)); /* * Set initial values for the inodes in this buffer. */ xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { free = XFS_MAKE_IPTR(args.mp, fbuf, i); free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC); free->di_core.di_version = version; free->di_core.di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); xfs_ialloc_log_di(tp, fbuf, i, XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED); } xfs_trans_inode_alloc_buf(tp, fbuf); } be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); agno = be32_to_cpu(agi->agi_seqno); down_read(&args.mp->m_peraglock); args.mp->m_perag[agno].pagi_freecount += newlen; up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); /* * Insert records describing the new inode chunk into the btree. */ cur = xfs_btree_init_cursor(args.mp, tp, agbp, agno, XFS_BTNUM_INO, (xfs_inode_t *)0, 0); for (thisino = newino; thisino < newino + newlen; thisino += XFS_INODES_PER_CHUNK) { if ((error = xfs_inobt_lookup_eq(cur, thisino, XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } ASSERT(i == 0); if ((error = xfs_inobt_insert(cur, &i))) { xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; } ASSERT(i == 1); } xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); /* * Log allocation group header fields */ xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO); /* * Modify/log superblock values for inode count and inode free count. */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); *alloc = 1; return 0; }
/* * Initialise a new set of inodes. */ STATIC int xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen) { struct xfs_buf *fbuf; struct xfs_dinode *free; int blks_per_cluster, nbufs, ninodes; int version; int i, j; xfs_daddr_t d; xfs_ino_t ino = 0; /* * Loop over the new block(s), filling in the inodes. * For small block sizes, manipulate the inodes in buffers * which are multiples of the blocks size. */ if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; nbufs = length; ninodes = mp->m_sb.sb_inopblock; } else { blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / mp->m_sb.sb_blocksize; nbufs = length / blks_per_cluster; ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; } /* * Figure out what version number to use in the inodes we create. If * the superblock version has caught up to the one that supports the new * inode format, then use the new inode version. Otherwise use the old * version so that old kernels will continue to be able to use the file * system. * * For v3 inodes, we also need to write the inode number into the inode, * so calculate the first inode number of the chunk here as * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not * across multiple filesystem blocks (such as a cluster) and so cannot * be used in the cluster buffer loop below. * * Further, because we are writing the inode directly into the buffer * and calculating a CRC on the entire inode, we have ot log the entire * inode so that the entire range the CRC covers is present in the log. * That means for v3 inode we log the entire buffer rather than just the * inode cores. */ if (xfs_sb_version_hascrc(&mp->m_sb)) { version = 3; ino = XFS_AGINO_TO_INO(mp, agno, XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); } else if (xfs_sb_version_hasnlink(&mp->m_sb)) version = 2; else version = 1; for (j = 0; j < nbufs; j++) { /* * Get the block. */ d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, XBF_UNMAPPED); if (!fbuf) return ENOMEM; /* * Initialize all inodes in this buffer and then log them. * * XXX: It would be much better if we had just one transaction * to log a whole cluster of inodes instead of all the * individual transactions causing a lot of log traffic. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < ninodes; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = xfs_dinode_size(version); free = xfs_make_iptr(mp, fbuf, i); free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); free->di_version = version; free->di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); if (version == 3) { free->di_ino = cpu_to_be64(ino); ino++; uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); xfs_dinode_calc_crc(mp, free); } else { /* just log the inode core */ xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } } if (version == 3) { /* need to log the entire buffer */ xfs_trans_log_buf(tp, fbuf, 0, BBTOB(fbuf->b_length) - 1); } xfs_trans_inode_alloc_buf(tp, fbuf); } return 0; }
xfs_daddr_t xfs_agb_to_daddr(xfs_mount_t *mp, xfs_agnumber_t agno, xfs_agblock_t agbno) { return XFS_AGB_TO_DADDR(mp, agno, agbno); }
/* * Initialise a new set of inodes. When called without a transaction context * (e.g. from recovery) we initiate a delayed write of the inode buffers rather * than logging them (which in a transaction context puts them into the AIL * for writeback rather than the xfsbufd queue). */ int xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, struct list_head *buffer_list, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen) { struct xfs_buf *fbuf; struct xfs_dinode *free; int blks_per_cluster, nbufs, ninodes; int version; int i, j; xfs_daddr_t d; xfs_ino_t ino = 0; /* * Loop over the new block(s), filling in the inodes. * For small block sizes, manipulate the inodes in buffers * which are multiples of the blocks size. */ if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; nbufs = length; ninodes = mp->m_sb.sb_inopblock; } else { blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / mp->m_sb.sb_blocksize; nbufs = length / blks_per_cluster; ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; } /* * Figure out what version number to use in the inodes we create. If * the superblock version has caught up to the one that supports the new * inode format, then use the new inode version. Otherwise use the old * version so that old kernels will continue to be able to use the file * system. * * For v3 inodes, we also need to write the inode number into the inode, * so calculate the first inode number of the chunk here as * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not * across multiple filesystem blocks (such as a cluster) and so cannot * be used in the cluster buffer loop below. * * Further, because we are writing the inode directly into the buffer * and calculating a CRC on the entire inode, we have ot log the entire * inode so that the entire range the CRC covers is present in the log. * That means for v3 inode we log the entire buffer rather than just the * inode cores. */ if (xfs_sb_version_hascrc(&mp->m_sb)) { version = 3; ino = XFS_AGINO_TO_INO(mp, agno, XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); /* * log the initialisation that is about to take place as an * logical operation. This means the transaction does not * need to log the physical changes to the inode buffers as log * recovery will know what initialisation is actually needed. * Hence we only need to log the buffers as "ordered" buffers so * they track in the AIL as if they were physically logged. */ if (tp) xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), mp->m_sb.sb_inodesize, length, gen); } else if (xfs_sb_version_hasnlink(&mp->m_sb)) version = 2; else version = 1; for (j = 0; j < nbufs; j++) { /* * Get the block. */ d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, XBF_UNMAPPED); if (!fbuf) return ENOMEM; /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < ninodes; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = xfs_dinode_size(version); free = xfs_make_iptr(mp, fbuf, i); free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); free->di_version = version; free->di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); if (version == 3) { free->di_ino = cpu_to_be64(ino); ino++; uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); xfs_dinode_calc_crc(mp, free); } else if (tp) { /* just log the inode core */ xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } } if (tp) { /* * Mark the buffer as an inode allocation buffer so it * sticks in AIL at the point of this allocation * transaction. This ensures the they are on disk before * the tail of the log can be moved past this * transaction (i.e. by preventing relogging from moving * it forward in the log). */ xfs_trans_inode_alloc_buf(tp, fbuf); if (version == 3) { /* * Mark the buffer as ordered so that they are * not physically logged in the transaction but * still tracked in the AIL as part of the * transaction and pin the log appropriately. */ xfs_trans_ordered_buf(tp, fbuf); xfs_trans_log_buf(tp, fbuf, 0, BBTOB(fbuf->b_length) - 1); } } else { fbuf->b_flags |= XBF_DONE; xfs_buf_delwri_queue(fbuf, buffer_list); xfs_buf_relse(fbuf); } } return 0; }
/* * Check that the holemask and freemask of a hypothetical inode cluster match * what's actually on disk. If sparse inodes are enabled, the cluster does * not actually have to map to inodes if the corresponding holemask bit is set. * * @cluster_base is the first inode in the cluster within the @irec. */ STATIC int xchk_iallocbt_check_cluster( struct xchk_btree *bs, struct xfs_inobt_rec_incore *irec, unsigned int cluster_base) { struct xfs_imap imap; struct xfs_mount *mp = bs->cur->bc_mp; struct xfs_dinode *dip; struct xfs_buf *cluster_bp; unsigned int nr_inodes; xfs_agnumber_t agno = bs->cur->bc_private.a.agno; xfs_agblock_t agbno; unsigned int cluster_index; uint16_t cluster_mask = 0; uint16_t ir_holemask; int error = 0; nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK, mp->m_inodes_per_cluster); /* Map this inode cluster */ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base); /* Compute a bitmask for this cluster that can be used for holemask. */ for (cluster_index = 0; cluster_index < nr_inodes; cluster_index += XFS_INODES_PER_HOLEMASK_BIT) cluster_mask |= XFS_INOBT_MASK((cluster_base + cluster_index) / XFS_INODES_PER_HOLEMASK_BIT); /* * Map the first inode of this cluster to a buffer and offset. * Be careful about inobt records that don't align with the start of * the inode buffer when block sizes are large enough to hold multiple * inode chunks. When this happens, cluster_base will be zero but * ir_startino can be large enough to make im_boffset nonzero. */ ir_holemask = (irec->ir_holemask & cluster_mask); imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino); if (imap.im_boffset != 0 && cluster_base != 0) { ASSERT(imap.im_boffset == 0 || cluster_base == 0); xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return 0; } trace_xchk_iallocbt_check_cluster(mp, agno, irec->ir_startino, imap.im_blkno, imap.im_len, cluster_base, nr_inodes, cluster_mask, ir_holemask, XFS_INO_TO_OFFSET(mp, irec->ir_startino + cluster_base)); /* The whole cluster must be a hole or not a hole. */ if (ir_holemask != cluster_mask && ir_holemask != 0) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return 0; } /* If any part of this is a hole, skip it. */ if (ir_holemask) { xchk_xref_is_not_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, &XFS_RMAP_OINFO_INODES); return 0; } xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, &XFS_RMAP_OINFO_INODES); /* Grab the inode cluster buffer. */ error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp, 0, 0); if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error)) return error; /* Check free status of each inode within this cluster. */ for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) { struct xfs_dinode *dip; if (imap.im_boffset >= BBTOB(cluster_bp->b_length)) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); break; } dip = xfs_buf_offset(cluster_bp, imap.im_boffset); error = xchk_iallocbt_check_cluster_ifree(bs, irec, cluster_base + cluster_index, dip); if (error) break; imap.im_boffset += mp->m_sb.sb_inodesize; } xfs_trans_brelse(bs->cur->bc_tp, cluster_bp); return error; }
static void prop_ino_cursor(xfs_mount_t *mp, xfs_agnumber_t agno, bt_status_t *btree_curs, xfs_agino_t startino, int level) { struct xfs_btree_block *bt_hdr; xfs_inobt_key_t *bt_key; xfs_inobt_ptr_t *bt_ptr; xfs_agblock_t agbno; bt_stat_level_t *lptr; level++; if (level >= btree_curs->num_levels) return; lptr = &btree_curs->level[level]; bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p); if (be16_to_cpu(bt_hdr->bb_numrecs) == 0) { /* * this only happens once to initialize the * first path up the left side of the tree * where the agbno's are already set up */ prop_ino_cursor(mp, agno, btree_curs, startino, level); } if (be16_to_cpu(bt_hdr->bb_numrecs) == lptr->num_recs_pb + (lptr->modulo > 0)) { /* * write out current prev block, grab us a new block, * and set the rightsib pointer of current block */ #ifdef XR_BLD_INO_TRACE fprintf(stderr, " ino prop agbno %d ", lptr->prev_agbno); #endif if (lptr->prev_agbno != NULLAGBLOCK) { ASSERT(lptr->prev_buf_p != NULL); libxfs_writebuf(lptr->prev_buf_p, 0); } lptr->prev_agbno = lptr->agbno;; lptr->prev_buf_p = lptr->buf_p; agbno = get_next_blockaddr(agno, level, btree_curs); bt_hdr->bb_u.s.bb_rightsib = cpu_to_be32(agbno); lptr->buf_p = libxfs_getbuf(mp->m_dev, XFS_AGB_TO_DADDR(mp, agno, agbno), XFS_FSB_TO_BB(mp, 1)); lptr->agbno = agbno; if (lptr->modulo) lptr->modulo--; /* * initialize block header */ lptr->buf_p->b_ops = &xfs_inobt_buf_ops; bt_hdr = XFS_BUF_TO_BLOCK(lptr->buf_p); memset(bt_hdr, 0, mp->m_sb.sb_blocksize); if (xfs_sb_version_hascrc(&mp->m_sb)) xfs_btree_init_block(mp, lptr->buf_p, XFS_IBT_CRC_MAGIC, level, 0, agno, XFS_BTREE_CRC_BLOCKS); else xfs_btree_init_block(mp, lptr->buf_p, XFS_IBT_MAGIC, level, 0, agno, 0); bt_hdr->bb_u.s.bb_leftsib = cpu_to_be32(lptr->prev_agbno); /* * propagate extent record for first extent in new block up */ prop_ino_cursor(mp, agno, btree_curs, startino, level); } /* * add inode info to current block */ be16_add_cpu(&bt_hdr->bb_numrecs, 1); bt_key = XFS_INOBT_KEY_ADDR(mp, bt_hdr, be16_to_cpu(bt_hdr->bb_numrecs)); bt_ptr = XFS_INOBT_PTR_ADDR(mp, bt_hdr, be16_to_cpu(bt_hdr->bb_numrecs), mp->m_inobt_mxr[1]); bt_key->ir_startino = cpu_to_be32(startino); *bt_ptr = cpu_to_be32(btree_curs->level[level-1].agbno); }