static void xfs_dquot_buf_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; struct xfs_disk_dquot *ddq; xfs_dqid_t id = 0; int i; /* * On the first read of the buffer, verify that each dquot is valid. * We don't know what the id of the dquot is supposed to be, just that * they should be increasing monotonically within the buffer. If the * first id is corrupt, then it will fail on the second dquot in the * buffer so corruptions could point to the wrong dquot in this case. */ for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { int error; ddq = &d[i].dd_diskdq; if (i == 0) id = be32_to_cpu(ddq->d_id); error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, "xfs_dquot_read_verify"); if (error) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d); xfs_buf_ioerror(bp, EFSCORRUPTED); break; } } }
static void xfs_allocbt_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; unsigned int level; int sblock_ok; /* block passes checks */ /* * magic number and level verification * * During growfs operations, we can't verify the exact level as the * perag is not fully initialised and hence not attached to the buffer. * In this case, check against the maximum tree depth. */ level = be16_to_cpu(block->bb_level); switch (block->bb_magic) { case cpu_to_be32(XFS_ABTB_MAGIC): if (pag) sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi]; else sblock_ok = level < mp->m_ag_maxlevels; break; case cpu_to_be32(XFS_ABTC_MAGIC): if (pag) sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi]; else sblock_ok = level < mp->m_ag_maxlevels; break; default: sblock_ok = 0; break; } /* numrecs verification */ sblock_ok = sblock_ok && be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0]; /* sibling pointer verification */ sblock_ok = sblock_ok && (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) && block->bb_u.s.bb_leftsib && (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) && block->bb_u.s.bb_rightsib; if (!sblock_ok) { trace_xfs_btree_corrupt(bp, _RET_IP_); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block); xfs_buf_ioerror(bp, EFSCORRUPTED); } }
static void xfs_dquot_buf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); } }
static void xfs_dir3_block_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; if ((xfs_sb_version_hascrc(&mp->m_sb) && !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF)) || !xfs_dir3_block_verify(bp)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); } }
static void xfs_dir2_block_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_dir2_data_hdr *hdr = bp->b_addr; int block_ok = 0; block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0; if (!block_ok) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); xfs_buf_ioerror(bp, EFSCORRUPTED); } }
static void xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; char *ptr; int len; xfs_daddr_t bno; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; ptr = bp->b_addr; bno = bp->b_bn; len = BBTOB(bp->b_length); ASSERT(len >= XFS_LBSIZE(mp)); while (len > 0) { if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); return; } if (bip) { struct xfs_attr3_rmt_hdr *rmt; rmt = (struct xfs_attr3_rmt_hdr *)ptr; rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); } xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF); len -= XFS_LBSIZE(mp); ptr += XFS_LBSIZE(mp); bno += mp->m_bsize; } ASSERT(len == 0); }
static void xfs_attr3_rmt_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; char *ptr; int len; bool corrupt = false; xfs_daddr_t bno; /* no verification of non-crc buffers */ if (!xfs_sb_version_hascrc(&mp->m_sb)) return; ptr = bp->b_addr; bno = bp->b_bn; len = BBTOB(bp->b_length); ASSERT(len >= XFS_LBSIZE(mp)); while (len > 0) { if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF)) { corrupt = true; break; } if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { corrupt = true; break; } len -= XFS_LBSIZE(mp); ptr += XFS_LBSIZE(mp); bno += mp->m_bsize; } if (corrupt) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); } else ASSERT(len == 0); }
static void xfs_dir3_block_write_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_buf_log_item *bip = bp->b_fspriv; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_block_verify(bp)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); return; } if (!xfs_sb_version_hascrc(&mp->m_sb)) return; if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); }
/* * Copy out entries of shortform attribute lists for attr_list(). * Shortform attribute lists are not stored in hashval sorted order. * If the output buffer is not large enough to hold them all, then we * we have to calculate each entries' hashvalue and sort them before * we can begin returning them to the user. */ static int xfs_attr_shortform_list(xfs_attr_list_context_t *context) { attrlist_cursor_kern_t *cursor; xfs_attr_sf_sort_t *sbuf, *sbp; xfs_attr_shortform_t *sf; xfs_attr_sf_entry_t *sfe; xfs_inode_t *dp; int sbsize, nsbuf, count, i; int error; ASSERT(context != NULL); dp = context->dp; ASSERT(dp != NULL); ASSERT(dp->i_afp != NULL); sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data; ASSERT(sf != NULL); if (!sf->hdr.count) return 0; cursor = context->cursor; ASSERT(cursor != NULL); trace_xfs_attr_list_sf(context); /* * If the buffer is large enough and the cursor is at the start, * do not bother with sorting since we will return everything in * one buffer and another call using the cursor won't need to be * made. * Note the generous fudge factor of 16 overhead bytes per entry. * If bufsize is zero then put_listent must be a search function * and can just scan through what we have. */ if (context->bufsize == 0 || (XFS_ISRESET_CURSOR(cursor) && (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { error = context->put_listent(context, sfe->flags, sfe->nameval, (int)sfe->namelen, (int)sfe->valuelen); if (error) return error; /* * Either search callback finished early or * didn't fit it all in the buffer after all. */ if (context->seen_enough) break; sfe = XFS_ATTR_SF_NEXTENTRY(sfe); } trace_xfs_attr_list_sf_all(context); return 0; } /* do no more for a search callback */ if (context->bufsize == 0) return 0; /* * It didn't all fit, so we have to sort everything on hashval. */ sbsize = sf->hdr.count * sizeof(*sbuf); sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS); /* * Scan the attribute list for the rest of the entries, storing * the relevant info from only those that match into a buffer. */ nsbuf = 0; for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { if (unlikely( ((char *)sfe < (char *)sf) || ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)))) { XFS_CORRUPTION_ERROR("xfs_attr_shortform_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, sfe); kmem_free(sbuf); return -EFSCORRUPTED; } sbp->entno = i; sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen); sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; sfe = XFS_ATTR_SF_NEXTENTRY(sfe); sbp++; nsbuf++; } /* * Sort the entries on hash then entno. */ xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_attr_shortform_compare); /* * Re-find our place IN THE SORTED LIST. */ count = 0; cursor->initted = 1; cursor->blkno = 0; for (sbp = sbuf, i = 0; i < nsbuf; i++, sbp++) { if (sbp->hash == cursor->hashval) { if (cursor->offset == count) { break; } count++; } else if (sbp->hash > cursor->hashval) { break; } } if (i == nsbuf) { kmem_free(sbuf); return 0; } /* * Loop putting entries into the user buffer. */ for ( ; i < nsbuf; i++, sbp++) { if (cursor->hashval != sbp->hash) { cursor->hashval = sbp->hash; cursor->offset = 0; } error = context->put_listent(context, sbp->flags, sbp->name, sbp->namelen, sbp->valuelen); if (error) { kmem_free(sbuf); return error; } if (context->seen_enough) break; cursor->offset++; } kmem_free(sbuf); return 0; }
STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context) { attrlist_cursor_kern_t *cursor; xfs_attr_leafblock_t *leaf; xfs_da_intnode_t *node; struct xfs_attr3_icleaf_hdr leafhdr; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_node_entry *btree; int error, i; struct xfs_buf *bp; struct xfs_inode *dp = context->dp; struct xfs_mount *mp = dp->i_mount; trace_xfs_attr_node_list(context); cursor = context->cursor; cursor->initted = 1; /* * Do all sorts of validation on the passed-in cursor structure. * If anything is amiss, ignore the cursor and look up the hashval * starting from the btree root. */ bp = NULL; if (cursor->blkno > 0) { error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if ((error != 0) && (error != -EFSCORRUPTED)) return error; if (bp) { struct xfs_attr_leaf_entry *entries; node = bp->b_addr; switch (be16_to_cpu(node->hdr.info.magic)) { case XFS_DA_NODE_MAGIC: case XFS_DA3_NODE_MAGIC: trace_xfs_attr_list_wrong_blk(context); xfs_trans_brelse(NULL, bp); bp = NULL; break; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); if (cursor->hashval > be32_to_cpu( entries[leafhdr.count - 1].hashval)) { trace_xfs_attr_list_wrong_blk(context); xfs_trans_brelse(NULL, bp); bp = NULL; } else if (cursor->hashval <= be32_to_cpu( entries[0].hashval)) { trace_xfs_attr_list_wrong_blk(context); xfs_trans_brelse(NULL, bp); bp = NULL; } break; default: trace_xfs_attr_list_wrong_blk(context); xfs_trans_brelse(NULL, bp); bp = NULL; } } } /* * We did not find what we expected given the cursor's contents, * so we start from the top and work down based on the hash value. * Note that start of node block is same as start of leaf block. */ if (bp == NULL) { cursor->blkno = 0; for (;;) { __uint16_t magic; error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if (error) return error; node = bp->b_addr; magic = be16_to_cpu(node->hdr.info.magic); if (magic == XFS_ATTR_LEAF_MAGIC || magic == XFS_ATTR3_LEAF_MAGIC) break; if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", XFS_ERRLEVEL_LOW, context->dp->i_mount, node); xfs_trans_brelse(NULL, bp); return -EFSCORRUPTED; } dp->d_ops->node_hdr_from_disk(&nodehdr, node); btree = dp->d_ops->node_tree_p(node); for (i = 0; i < nodehdr.count; btree++, i++) { if (cursor->hashval <= be32_to_cpu(btree->hashval)) { cursor->blkno = be32_to_cpu(btree->before); trace_xfs_attr_list_node_descend(context, btree); break; } } if (i == nodehdr.count) { xfs_trans_brelse(NULL, bp); return 0; } xfs_trans_brelse(NULL, bp); } } ASSERT(bp != NULL); /* * Roll upward through the blocks, processing each leaf block in * order. As long as there is space in the result buffer, keep * adding the information. */ for (;;) { leaf = bp->b_addr; error = xfs_attr3_leaf_list_int(bp, context); if (error) { xfs_trans_brelse(NULL, bp); return error; } xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); if (context->seen_enough || leafhdr.forw == 0) break; cursor->blkno = leafhdr.forw; xfs_trans_brelse(NULL, bp); error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp); if (error) return error; } xfs_trans_brelse(NULL, bp); return 0; }
/* * Read the disk inode attributes into the in-core inode structure. * * For version 5 superblocks, if we are initialising a new inode and we are not * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new * inode core with a random generation number. If we are keeping inodes around, * we need to read the inode cluster to get the existing generation number off * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode * format) then log recovery is dependent on the di_flushiter field being * initialised from the current on-disk value and hence we must also read the * inode off disk. */ int xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *ip, uint iget_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; int error; /* * Fill in the location information in the in-core inode. */ error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) return error; /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); ip->i_d.di_magic = XFS_DINODE_MAGIC; ip->i_d.di_gen = prandom_u32(); if (xfs_sb_version_hascrc(&mp->m_sb)) { ip->i_d.di_version = 3; ip->i_d.di_ino = ip->i_ino; uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); } else ip->i_d.di_version = 2; return 0; } /* * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); if (error) return error; /* even unallocated inodes are verified */ if (!xfs_dinode_verify(mp, ip, dip)) { xfs_alert(mp, "%s: validation failed for inode %lld failed", __func__, ip->i_ino); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); error = -EFSCORRUPTED; goto out_brelse; } /* * If the on-disk inode is already linked to a directory * entry, copy all of the inode into the in-core inode. * xfs_iformat_fork() handles copying in the inode format * specific information. * Otherwise, just get the truly permanent information. */ if (dip->di_mode) { xfs_dinode_from_disk(&ip->i_d, dip); error = xfs_iformat_fork(ip, dip); if (error) { #ifdef DEBUG xfs_alert(mp, "%s: xfs_iformat() returned error %d", __func__, error); #endif /* DEBUG */ goto out_brelse; } } else { /* * Partial initialisation of the in-core inode. Just the bits * that xfs_ialloc won't overwrite or relies on being correct. */ ip->i_d.di_magic = be16_to_cpu(dip->di_magic); ip->i_d.di_version = dip->di_version; ip->i_d.di_gen = be32_to_cpu(dip->di_gen); ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); if (dip->di_version == 3) { ip->i_d.di_ino = be64_to_cpu(dip->di_ino); uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); } /* * Make sure to pull in the mode here as well in * case the inode is released without being used. * This ensures that xfs_inactive() will see that * the inode is already free and not try to mess * with the uninitialized part of it. */ ip->i_d.di_mode = 0; } /* * Automatically convert version 1 inode formats in memory to version 2 * inode format. If the inode is modified, it will get logged and * rewritten as a version 2 inode. We can do this because we set the * superblock feature bit for v2 inodes unconditionally during mount * and it means the reast of the code can assume the inode version is 2 * or higher. */ if (ip->i_d.di_version == 1) { ip->i_d.di_version = 2; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; xfs_set_projid(ip, 0); } ip->i_delayed_blks = 0; /* * Mark the buffer containing the inode as something to keep * around for a while. This helps to keep recently accessed * meta-data in-core longer. */ xfs_buf_set_ref(bp, XFS_INO_REF); /* * Use xfs_trans_brelse() to release the buffer containing the on-disk * inode, because it was acquired with xfs_trans_read_buf() in * xfs_imap_to_bp() above. If tp is NULL, this is just a normal * brelse(). If we're within a transaction, then xfs_trans_brelse() * will only release the buffer if it is not dirty within the * transaction. It will be OK to release the buffer in this case, * because inodes on disk are never destroyed and we will be locking the * new in-core inode before putting it in the cache where other * processes can find it. Thus we don't have to worry about the inode * being changed just because we released the buffer. */ out_brelse: xfs_trans_brelse(tp, bp); return error; }
/* * Add an entry to a block directory. */ int /* error */ xfs_dir2_block_addname( xfs_da_args_t *args) /* directory op arguments */ { xfs_dir2_data_free_t *bf; /* bestfree table in block */ xfs_dir2_block_t *block; /* directory block structure */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ xfs_dabuf_t *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ int compact; /* need to compact leaf ents */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* directory inode */ xfs_dir2_data_unused_t *dup; /* block unused entry */ int error; /* error return value */ xfs_dir2_data_unused_t *enddup=NULL; /* unused at end of data */ xfs_dahash_t hash; /* hash value of found entry */ int high; /* high index for binary srch */ int highstale; /* high stale index */ int lfloghigh=0; /* last final leaf to log */ int lfloglow=0; /* first final leaf to log */ int len; /* length of the new entry */ int low; /* low index for binary srch */ int lowstale; /* low stale index */ int mid=0; /* midpoint for binary srch */ xfs_mount_t *mp; /* filesystem mount point */ int needlog; /* need to log header */ int needscan; /* need to rescan freespace */ __be16 *tagp; /* pointer to tag value */ xfs_trans_t *tp; /* transaction structure */ trace_xfs_dir2_block_addname(args); dp = args->dp; tp = args->trans; mp = dp->i_mount; /* * Read the (one and only) directory block into dabuf bp. */ if ((error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) { return error; } ASSERT(bp != NULL); block = bp->data; /* * Check the magic number, corrupted if wrong. */ if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) { XFS_CORRUPTION_ERROR("xfs_dir2_block_addname", XFS_ERRLEVEL_LOW, mp, block); xfs_da_brelse(tp, bp); return XFS_ERROR(EFSCORRUPTED); } len = xfs_dir2_data_entsize(args->namelen); /* * Set up pointers to parts of the block. */ bf = block->hdr.bestfree; btp = xfs_dir2_block_tail_p(mp, block); blp = xfs_dir2_block_leaf_p(btp); /* * No stale entries? Need space for entry and new leaf. */ if (!btp->stale) { /* * Tag just before the first leaf entry. */ tagp = (__be16 *)blp - 1; /* * Data object just before the first leaf entry. */ enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); /* * If it's not free then can't do this add without cleaning up: * the space before the first leaf entry needs to be free so it * can be expanded to hold the pointer to the new entry. */ if (be16_to_cpu(enddup->freetag) != XFS_DIR2_DATA_FREE_TAG) dup = enddup = NULL; /* * Check out the biggest freespace and see if it's the same one. */ else { dup = (xfs_dir2_data_unused_t *) ((char *)block + be16_to_cpu(bf[0].offset)); if (dup == enddup) { /* * It is the biggest freespace, is it too small * to hold the new leaf too? */ if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) { /* * Yes, we use the second-largest * entry instead if it works. */ if (be16_to_cpu(bf[1].length) >= len) dup = (xfs_dir2_data_unused_t *) ((char *)block + be16_to_cpu(bf[1].offset)); else dup = NULL; } } else { /* * Not the same free entry, * just check its length. */ if (be16_to_cpu(dup->length) < len) { dup = NULL; } } } compact = 0; } /* * If there are stale entries we'll use one for the leaf. * Is the biggest entry enough to avoid compaction? */ else if (be16_to_cpu(bf[0].length) >= len) { dup = (xfs_dir2_data_unused_t *) ((char *)block + be16_to_cpu(bf[0].offset)); compact = 0; } /* * Will need to compact to make this work. */ else { /* * Tag just before the first leaf entry. */ tagp = (__be16 *)blp - 1; /* * Data object just before the first leaf entry. */ dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); /* * If it's not free then the data will go where the * leaf data starts now, if it works at all. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) dup = NULL; } else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len) dup = NULL; else dup = (xfs_dir2_data_unused_t *)blp; compact = 1; } /* * If this isn't a real add, we're done with the buffer. */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) xfs_da_brelse(tp, bp); /* * If we don't have space for the new entry & leaf ... */ if (!dup) { /* * Not trying to actually do anything, or don't have * a space reservation: return no-space. */ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) return XFS_ERROR(ENOSPC); /* * Convert to the next larger format. * Then add the new entry in that format. */ error = xfs_dir2_block_to_leaf(args, bp); xfs_da_buf_done(bp); if (error) return error; return xfs_dir2_leaf_addname(args); } /* * Just checking, and it would work, so say so. */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) return 0; needlog = needscan = 0; /* * If need to compact the leaf entries, do it now. * Leave the highest-numbered stale entry stale. * XXX should be the one closest to mid but mid is not yet computed. */ if (compact) { int fromidx; /* source leaf index */ int toidx; /* target leaf index */ for (fromidx = toidx = be32_to_cpu(btp->count) - 1, highstale = lfloghigh = -1; fromidx >= 0; fromidx--) { if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) { if (highstale == -1) highstale = toidx; else { if (lfloghigh == -1) lfloghigh = toidx; continue; } } if (fromidx < toidx) blp[toidx] = blp[fromidx]; toidx--; } lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); lfloghigh -= be32_to_cpu(btp->stale) - 1; be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); xfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), &needlog, &needscan); blp += be32_to_cpu(btp->stale) - 1; btp->stale = cpu_to_be32(1); /* * If we now need to rebuild the bestfree map, do so. * This needs to happen before the next call to use_free. */ if (needscan) { xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); needscan = 0; } } /* * Set leaf logging boundaries to impossible state. * For the no-stale case they're set explicitly. */ else if (btp->stale) { lfloglow = be32_to_cpu(btp->count); lfloghigh = -1; } /* * Find the slot that's first lower than our hash value, -1 if none. */ for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) { mid = (low + high) >> 1; if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) break; if (hash < args->hashval) low = mid + 1; else high = mid - 1; } while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) { mid--; } /* * No stale entries, will use enddup space to hold new leaf. */ if (!btp->stale) { /* * Mark the space needed for the new leaf entry, now in use. */ xfs_dir2_data_use_free(tp, bp, enddup, (xfs_dir2_data_aoff_t) ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) - sizeof(*blp)), (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog, &needscan); /* * Update the tail (entry count). */ be32_add_cpu(&btp->count, 1); /* * If we now need to rebuild the bestfree map, do so. * This needs to happen before the next call to use_free. */ if (needscan) { xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); needscan = 0; } /* * Adjust pointer to the first leaf entry, we're about to move * the table up one to open up space for the new leaf entry. * Then adjust our index to match. */ blp--; mid++; if (mid) memmove(blp, &blp[1], mid * sizeof(*blp)); lfloglow = 0; lfloghigh = mid; } /* * Use a stale leaf for our new entry. */ else { for (lowstale = mid; lowstale >= 0 && be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR; lowstale--) continue; for (highstale = mid + 1; highstale < be32_to_cpu(btp->count) && be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR && (lowstale < 0 || mid - lowstale > highstale - mid); highstale++) continue; /* * Move entries toward the low-numbered stale entry. */ if (lowstale >= 0 && (highstale == be32_to_cpu(btp->count) || mid - lowstale <= highstale - mid)) { if (mid - lowstale) memmove(&blp[lowstale], &blp[lowstale + 1], (mid - lowstale) * sizeof(*blp)); lfloglow = MIN(lowstale, lfloglow); lfloghigh = MAX(mid, lfloghigh); } /* * Move entries toward the high-numbered stale entry. */ else { ASSERT(highstale < be32_to_cpu(btp->count)); mid++; if (highstale - mid) memmove(&blp[mid + 1], &blp[mid], (highstale - mid) * sizeof(*blp)); lfloglow = MIN(mid, lfloglow); lfloghigh = MAX(highstale, lfloghigh); } be32_add_cpu(&btp->stale, -1); } /* * Point to the new data entry. */ dep = (xfs_dir2_data_entry_t *)dup; /* * Fill in the leaf entry. */ blp[mid].hashval = cpu_to_be32(args->hashval); blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* * Mark space for the data entry used. */ xfs_dir2_data_use_free(tp, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* * Create the new data entry. */ dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); if (needlog) xfs_dir2_data_log_header(tp, bp); xfs_dir2_block_log_tail(tp, bp); xfs_dir2_data_log_entry(tp, bp, dep); xfs_dir2_data_check(dp, bp); xfs_da_buf_done(bp); return 0; }