/* * Look up a name in a leaf attribute list structure. * * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). */ STATIC int xfs_attr_leaf_get(xfs_da_args_t *args) { struct xfs_buf *bp; int error; trace_xfs_attr_leaf_get(args); args->blkno = 0; error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) return error; error = xfs_attr3_leaf_lookup_int(bp, args); if (error != -EEXIST) { xfs_trans_brelse(args->trans, bp); return error; } error = xfs_attr3_leaf_getvalue(bp, args); xfs_trans_brelse(args->trans, bp); if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { error = xfs_attr_rmtval_get(args); } return error; }
/* * Given an AG extent, find the lowest-numbered run of shared blocks * within that range and return the range in fbno/flen. If * find_end_of_shared is true, return the longest contiguous extent of * shared blocks. If there are no shared extents, fbno and flen will * be set to NULLAGBLOCK and 0, respectively. */ int xfs_reflink_find_shared( struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_end_of_shared) { struct xfs_buf *agbp; struct xfs_btree_cur *cur; int error; error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (error) return error; if (!agbp) return -ENOMEM; cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno, NULL); error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen, find_end_of_shared); xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); xfs_trans_brelse(tp, agbp); return error; }
/* ARGSUSED */ STATIC int xfs_qm_dqread( xfs_trans_t **tpp, xfs_dqid_t id, xfs_dquot_t *dqp, /* dquot to get filled in */ uint flags) { xfs_disk_dquot_t *ddqp; xfs_buf_t *bp; int error; xfs_trans_t *tp; ASSERT(tpp); /* * get a pointer to the on-disk dquot and the buffer containing it * dqp already knows its own type (GROUP/USER). */ xfs_dqtrace_entry(dqp, "DQREAD"); if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { return (error); } tp = *tpp; /* copy everything from disk dquot to the incore dquot */ memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); xfs_qm_dquot_logitem_init(dqp); /* * Reservation counters are defined as reservation plus current usage * to avoid having to add everytime. */ dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); /* Mark the buf so that this will stay incore a little longer */ XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF); /* * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) * So we need to release with xfs_trans_brelse(). * The strategy here is identical to that of inodes; we lock * the dquot in xfs_qm_dqget() before making it accessible to * others. This is because dquots, like inodes, need a good level of * concurrency, and we don't want to take locks on the entire buffers * for dquot accesses. * Note also that the dquot buffer may even be dirty at this point, if * this particular dquot was repaired. We still aren't afraid to * brelse it because we have the changes incore. */ ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); xfs_trans_brelse(tp, bp); return (error); }
/* * Indiscriminately delete the entire attribute fork * * Recurse (gasp!) through the attribute nodes until we find leaves. * We're doing a depth-first traversal in order to invalidate everything. */ int xfs_attr3_root_inactive( struct xfs_trans **trans, struct xfs_inode *dp) { struct xfs_da_blkinfo *info; struct xfs_buf *bp; xfs_daddr_t blkno; int error; /* * Read block 0 to see what we have to work with. * We only get here if we have extents, since we remove * the extents in reverse order the extent containing * block 0 must still be there. */ error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK); if (error) return error; blkno = bp->b_bn; /* * Invalidate the tree, even if the "tree" is only a single leaf block. * This is a depth-first traversal! */ info = bp->b_addr; switch (info->magic) { case cpu_to_be16(XFS_DA_NODE_MAGIC): case cpu_to_be16(XFS_DA3_NODE_MAGIC): error = xfs_attr3_node_inactive(trans, dp, bp, 1); break; case cpu_to_be16(XFS_ATTR_LEAF_MAGIC): case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): error = xfs_attr3_leaf_inactive(trans, dp, bp); break; default: error = -EIO; xfs_trans_brelse(*trans, bp); break; } if (error) return error; /* * Invalidate the incore copy of the root block. */ error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK); if (error) return error; xfs_trans_binval(*trans, bp); /* remove from cache */ /* * Commit the invalidate and start the next transaction. */ error = xfs_trans_roll(trans, dp); return error; }
/* * Remove a name from the leaf attribute list structure * * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). */ STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args) { xfs_inode_t *dp; struct xfs_buf *bp; int error, committed, forkoff; trace_xfs_attr_leaf_removename(args); /* * Remove the attribute. */ dp = args->dp; args->blkno = 0; error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) return error; error = xfs_attr3_leaf_lookup_int(bp, args); if (error == -ENOATTR) { xfs_trans_brelse(args->trans, bp); return error; } xfs_attr3_leaf_remove(bp, args); /* * If the result is small enough, shrink it all into the inode. */ if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { xfs_bmap_init(args->flist, args->firstblock); error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); } if (error) { ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); return error; } /* * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ if (committed) xfs_trans_ijoin(args->trans, dp, 0); } return 0; }
/* Clean up after calling xfs_rmap_finish_one. */ void xfs_rmap_finish_one_cleanup( struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error) { struct xfs_buf *agbp; if (rcur == NULL) return; agbp = rcur->bc_private.a.agbp; xfs_btree_del_cursor(rcur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); if (error) xfs_trans_brelse(tp, agbp); }
/* * Copy out attribute entries for attr_list(), for leaf attribute lists. */ STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context) { int error; struct xfs_buf *bp; trace_xfs_attr_leaf_list(context); context->cursor->blkno = 0; error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp); if (error) return error; error = xfs_attr3_leaf_list_int(bp, context); xfs_trans_brelse(NULL, bp); return error; }
/* * Look up a filename in a node attribute list. * * This routine gets called for any attribute fork that has more than one * block, ie: both true Btree attr lists and for single-leaf-blocks with * "remote" values taking up more blocks. */ STATIC int xfs_attr_node_get(xfs_da_args_t *args) { xfs_da_state_t *state; xfs_da_state_blk_t *blk; int error, retval; int i; trace_xfs_attr_node_get(args); state = xfs_da_state_alloc(); state->args = args; state->mp = args->dp->i_mount; /* * Search to see if name exists, and get back a pointer to it. */ error = xfs_da3_node_lookup_int(state, &retval); if (error) { retval = error; } else if (retval == -EEXIST) { blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->bp != NULL); ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); /* * Get the value, local or "remote" */ retval = xfs_attr3_leaf_getvalue(blk->bp, args); if (!retval && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) { retval = xfs_attr_rmtval_get(args); } } /* * If not in a transaction, we have to release all the buffers. */ for (i = 0; i < state->path.active; i++) { xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } xfs_da_state_free(state); return retval; }
STATIC int xfs_qm_dqrepair( struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_dquot *dqp, xfs_dqid_t firstid, struct xfs_buf **bpp) { int error; struct xfs_disk_dquot *ddq; struct xfs_dqblk *d; int i; /* * Read the buffer without verification so we get the corrupted * buffer returned to us. make sure we verify it on write, though. */ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0, bpp, NULL); if (error) { ASSERT(*bpp == NULL); return XFS_ERROR(error); } (*bpp)->b_ops = &xfs_dquot_buf_ops; ASSERT(xfs_buf_islocked(*bpp)); d = (struct xfs_dqblk *)(*bpp)->b_addr; /* Do the actual repair of dquots in this buffer */ for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) { ddq = &d[i].dd_diskdq; error = xfs_dqcheck(mp, ddq, firstid + i, dqp->dq_flags & XFS_DQ_ALLTYPES, XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair"); if (error) { /* repair failed, we're screwed */ xfs_trans_brelse(tp, *bpp); return XFS_ERROR(EIO); } } return 0; }
/* * Remove a name from the leaf attribute list structure * * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). */ STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args) { xfs_inode_t *dp; struct xfs_buf *bp; int error, forkoff; trace_xfs_attr_leaf_removename(args); /* * Remove the attribute. */ dp = args->dp; args->blkno = 0; error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) return error; error = xfs_attr3_leaf_lookup_int(bp, args); if (error == -ENOATTR) { xfs_trans_brelse(args->trans, bp); return error; } xfs_attr3_leaf_remove(bp, args); /* * If the result is small enough, shrink it all into the inode. */ if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { xfs_defer_init(args->dfops, args->firstblock); error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (!error) error = xfs_defer_finish(&args->trans, args->dfops, dp); if (error) { args->trans = NULL; xfs_defer_cancel(args->dfops); return error; } } return 0; }
/* * Look up an entry in the block. This is the external routine, * xfs_dir2_block_lookup_int does the real work. */ int /* error */ xfs_dir2_block_lookup( xfs_da_args_t *args) /* dir lookup arguments */ { xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ int ent; /* entry index */ int error; /* error return value */ xfs_mount_t *mp; /* filesystem mount point */ trace_xfs_dir2_block_lookup(args); /* * Get the buffer, look up the entry. * If not found (ENOENT) then return, have no buffer. */ if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) return error; dp = args->dp; mp = dp->i_mount; hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Get the offset from the leaf entry, to point to the data. */ dep = (xfs_dir2_data_entry_t *)((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(blp[ent].address))); /* * Fill in inode number, CI name if appropriate, release the block. */ args->inumber = be64_to_cpu(dep->inumber); args->filetype = dp->d_ops->data_get_ftype(dep); error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_trans_brelse(args->trans, bp); return error; }
/* * Select an allocation group to look for a free inode in, based on the parent * inode and then mode. Return the allocation group buffer. */ STATIC xfs_buf_t * /* allocation group buffer */ xfs_ialloc_ag_select( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent directory inode number */ mode_t mode, /* bits set to indicate file type */ int okalloc) /* ok to allocate more space */ { xfs_buf_t *agbp; /* allocation group header buffer */ xfs_agnumber_t agcount; /* number of ag's in the filesystem */ xfs_agnumber_t agno; /* current ag number */ int flags; /* alloc buffer locking flags */ xfs_extlen_t ineed; /* blocks needed for inode allocation */ xfs_extlen_t longest = 0; /* longest extent available */ xfs_mount_t *mp; /* mount point structure */ int needspace; /* file mode implies space allocated */ xfs_perag_t *pag; /* per allocation group data */ xfs_agnumber_t pagno; /* parent (starting) ag number */ /* * Files of these types need at least one block if length > 0 * (and they won't fit in the inode, but that's hard to figure out). */ needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode); mp = tp->t_mountp; agcount = mp->m_maxagi; if (S_ISDIR(mode)) pagno = xfs_ialloc_next_ag(mp); else { pagno = XFS_INO_TO_AGNO(mp, parent); if (pagno >= agcount) pagno = 0; } ASSERT(pagno < agcount); /* * Loop through allocation groups, looking for one with a little * free space in it. Note we don't look for free inodes, exactly. * Instead, we include whether there is a need to allocate inodes * to mean that blocks must be allocated for them, * if none are currently free. */ agno = pagno; flags = XFS_ALLOC_FLAG_TRYLOCK; down_read(&mp->m_peraglock); for (;;) { pag = &mp->m_perag[agno]; if (!pag->pagi_init) { if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { agbp = NULL; goto nextag; } } else agbp = NULL; if (!pag->pagi_inodeok) { xfs_ialloc_next_ag(mp); goto unlock_nextag; } /* * Is there enough free space for the file plus a block * of inodes (if we need to allocate some)? */ ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp); if (ineed && !pag->pagf_init) { if (agbp == NULL && xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { agbp = NULL; goto nextag; } (void)xfs_alloc_pagf_init(mp, tp, agno, flags); } if (!ineed || pag->pagf_init) { if (ineed && !(longest = pag->pagf_longest)) longest = pag->pagf_flcount > 0; if (!ineed || (pag->pagf_freeblks >= needspace + ineed && longest >= ineed && okalloc)) { if (agbp == NULL && xfs_ialloc_read_agi(mp, tp, agno, &agbp)) { agbp = NULL; goto nextag; } up_read(&mp->m_peraglock); return agbp; } } unlock_nextag: if (agbp) xfs_trans_brelse(tp, agbp); nextag: /* * No point in iterating over the rest, if we're shutting * down. */ if (XFS_FORCED_SHUTDOWN(mp)) { up_read(&mp->m_peraglock); return NULL; } agno++; if (agno >= agcount) agno = 0; if (agno == pagno) { if (flags == 0) { up_read(&mp->m_peraglock); return NULL; } flags = 0; } } }
/* * Allocate an inode on disk. * Mode is used to tell whether the new inode will need space, and whether * it is a directory. * * The arguments IO_agbp and alloc_done are defined to work within * the constraint of one allocation per transaction. * xfs_dialloc() is designed to be called twice if it has to do an * allocation to make more free inodes. On the first call, * IO_agbp should be set to NULL. If an inode is available, * i.e., xfs_dialloc() did not need to do an allocation, an inode * number is returned. In this case, IO_agbp would be set to the * current ag_buf and alloc_done set to false. * If an allocation needed to be done, xfs_dialloc would return * the current ag_buf in IO_agbp and set alloc_done to true. * The caller should then commit the current transaction, allocate a new * transaction, and call xfs_dialloc() again, passing in the previous * value of IO_agbp. IO_agbp should be held across the transactions. * Since the agbp is locked across the two calls, the second call is * guaranteed to have a free inode available. * * Once we successfully pick an inode its number is returned and the * on-disk data structures are updated. The inode itself is not read * in, since doing so would break ordering constraints with xfs_reclaim. */ int xfs_dialloc( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent inode (directory) */ mode_t mode, /* mode bits for new inode */ int okalloc, /* ok to allocate more space */ xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ boolean_t *alloc_done, /* true if we needed to replenish inode freelist */ xfs_ino_t *inop) /* inode number allocated */ { xfs_agnumber_t agcount; /* number of allocation groups */ xfs_buf_t *agbp; /* allocation group header's buffer */ xfs_agnumber_t agno; /* allocation group number */ xfs_agi_t *agi; /* allocation group header structure */ xfs_btree_cur_t *cur; /* inode allocation btree cursor */ int error; /* error return value */ int i; /* result code */ int ialloced; /* inode allocation status */ int noroom = 0; /* no space for inode blk allocation */ xfs_ino_t ino; /* fs-relative inode to be returned */ /* REFERENCED */ int j; /* result code */ xfs_mount_t *mp; /* file system mount structure */ int offset; /* index of inode in chunk */ xfs_agino_t pagino; /* parent's a.g. relative inode # */ xfs_agnumber_t pagno; /* parent's allocation group number */ xfs_inobt_rec_incore_t rec; /* inode allocation record */ xfs_agnumber_t tagno; /* testing allocation group number */ xfs_btree_cur_t *tcur; /* temp cursor */ xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ if (*IO_agbp == NULL) { /* * We do not have an agbp, so select an initial allocation * group for inode allocation. */ agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); /* * Couldn't find an allocation group satisfying the * criteria, give up. */ if (!agbp) { *inop = NULLFSINO; return 0; } agi = XFS_BUF_TO_AGI(agbp); ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); } else { /* * Continue where we left off before. In this case, we * know that the allocation group has free inodes. */ agbp = *IO_agbp; agi = XFS_BUF_TO_AGI(agbp); ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); ASSERT(be32_to_cpu(agi->agi_freecount) > 0); } mp = tp->t_mountp; agcount = mp->m_sb.sb_agcount; agno = be32_to_cpu(agi->agi_seqno); tagno = agno; pagno = XFS_INO_TO_AGNO(mp, parent); pagino = XFS_INO_TO_AGINO(mp, parent); /* * If we have already hit the ceiling of inode blocks then clear * okalloc so we scan all available agi structures for a free * inode. */ if (mp->m_maxicount && mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { noroom = 1; okalloc = 0; } /* * Loop until we find an allocation group that either has free inodes * or in which we can allocate some inodes. Iterate through the * allocation groups upward, wrapping at the end. */ *alloc_done = B_FALSE; while (!agi->agi_freecount) { /* * Don't do anything if we're not supposed to allocate * any blocks, just go on to the next ag. */ if (okalloc) { /* * Try to allocate some new inodes in the allocation * group. */ if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { xfs_trans_brelse(tp, agbp); if (error == ENOSPC) { *inop = NULLFSINO; return 0; } else return error; } if (ialloced) { /* * We successfully allocated some inodes, return * the current context to the caller so that it * can commit the current transaction and call * us again where we left off. */ ASSERT(be32_to_cpu(agi->agi_freecount) > 0); *alloc_done = B_TRUE; *IO_agbp = agbp; *inop = NULLFSINO; return 0; } } /* * If it failed, give up on this ag. */ xfs_trans_brelse(tp, agbp); /* * Go on to the next ag: get its ag header. */ nextag: if (++tagno == agcount) tagno = 0; if (tagno == agno) { *inop = NULLFSINO; return noroom ? ENOSPC : 0; } down_read(&mp->m_peraglock); if (mp->m_perag[tagno].pagi_inodeok == 0) { up_read(&mp->m_peraglock); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); up_read(&mp->m_peraglock); if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); } /* * Here with an allocation group that has a free inode. * Reset agno since we may have chosen a new ag in the * loop above. */ agno = tagno; *IO_agbp = NULL; cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno), XFS_BTNUM_INO, (xfs_inode_t *)0, 0); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. */ if (!pagino) pagino = be32_to_cpu(agi->agi_newino); #ifdef DEBUG if (cur->bc_nlevels == 1) { int freecount = 0; if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); do { if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, &rec.ir_free, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); freecount += rec.ir_freecount; if ((error = xfs_inobt_increment(cur, 0, &i))) goto error0; } while (i == 1); ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || XFS_FORCED_SHUTDOWN(mp)); } #endif /* * If in the same a.g. as the parent, try to get near the parent. */ if (pagno == agno) { if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i))) goto error0; if (i != 0 && (error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, &rec.ir_free, &j)) == 0 && j == 1 && rec.ir_freecount > 0) { /* * Found a free inode in the same chunk * as parent, done. */ } /* * In the same a.g. as parent, but parent's chunk is full. */ else { int doneleft; /* done, to the left */ int doneright; /* done, to the right */ if (error) goto error0; ASSERT(i == 1); ASSERT(j == 1); /* * Duplicate the cursor, search left & right * simultaneously. */ if ((error = xfs_btree_dup_cursor(cur, &tcur))) goto error0; /* * Search left with tcur, back up 1 record. */ if ((error = xfs_inobt_decrement(tcur, 0, &i))) goto error1; doneleft = !i; if (!doneleft) { if ((error = xfs_inobt_get_rec(tcur, &trec.ir_startino, &trec.ir_freecount, &trec.ir_free, &i))) goto error1; XFS_WANT_CORRUPTED_GOTO(i == 1, error1); } /* * Search right with cur, go forward 1 record. */ if ((error = xfs_inobt_increment(cur, 0, &i))) goto error1; doneright = !i; if (!doneright) { if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, &rec.ir_free, &i))) goto error1; XFS_WANT_CORRUPTED_GOTO(i == 1, error1); } /* * Loop until we find the closest inode chunk * with a free one. */ while (!doneleft || !doneright) { int useleft; /* using left inode chunk this time */ /* * Figure out which block is closer, * if both are valid. */ if (!doneleft && !doneright) useleft = pagino - (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < rec.ir_startino - pagino; else useleft = !doneleft; /* * If checking the left, does it have * free inodes? */ if (useleft && trec.ir_freecount) { /* * Yes, set it up as the chunk to use. */ rec = trec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); cur = tcur; break; } /* * If checking the right, does it have * free inodes? */ if (!useleft && rec.ir_freecount) { /* * Yes, it's already set up. */ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); break; } /* * If used the left, get another one * further left. */ if (useleft) { if ((error = xfs_inobt_decrement(tcur, 0, &i))) goto error1; doneleft = !i; if (!doneleft) { if ((error = xfs_inobt_get_rec( tcur, &trec.ir_startino, &trec.ir_freecount, &trec.ir_free, &i))) goto error1; XFS_WANT_CORRUPTED_GOTO(i == 1, error1); } } /* * If used the right, get another one * further right. */ else { if ((error = xfs_inobt_increment(cur, 0, &i))) goto error1; doneright = !i; if (!doneright) { if ((error = xfs_inobt_get_rec( cur, &rec.ir_startino, &rec.ir_freecount, &rec.ir_free, &i))) goto error1; XFS_WANT_CORRUPTED_GOTO(i == 1, error1); } } } ASSERT(!doneleft || !doneright); } } /* * In a different a.g. from the parent. * See if the most recently allocated block has any free. */ else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { if ((error = xfs_inobt_lookup_eq(cur, be32_to_cpu(agi->agi_newino), 0, 0, &i))) goto error0; if (i == 1 && (error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, &rec.ir_free, &j)) == 0 && j == 1 && rec.ir_freecount > 0) { /* * The last chunk allocated in the group still has * a free inode. */ } /* * None left in the last group, search the whole a.g. */ else { if (error) goto error0; if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) goto error0; ASSERT(i == 1); for (;;) { if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, &rec.ir_free, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (rec.ir_freecount > 0) break; if ((error = xfs_inobt_increment(cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } } } offset = XFS_IALLOC_FIND_FREE(&rec.ir_free); ASSERT(offset >= 0); ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); XFS_INOBT_CLR_FREE(&rec, offset); rec.ir_freecount--; if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[tagno].pagi_freecount--; up_read(&mp->m_peraglock); #ifdef DEBUG if (cur->bc_nlevels == 1) { int freecount = 0; if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i))) goto error0; do { if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino, &rec.ir_freecount, &rec.ir_free, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); freecount += rec.ir_freecount; if ((error = xfs_inobt_increment(cur, 0, &i))) goto error0; } while (i == 1); ASSERT(freecount == be32_to_cpu(agi->agi_freecount) || XFS_FORCED_SHUTDOWN(mp)); } #endif xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); *inop = ino; return 0; error1: xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); error0: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); return error; }
/* * Read in the ondisk dquot using dqtobp() then copy it to an incore version, * and release the buffer immediately. * * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed. */ int xfs_qm_dqread( struct xfs_mount *mp, xfs_dqid_t id, uint type, uint flags, struct xfs_dquot **O_dqpp) { struct xfs_dquot *dqp; struct xfs_disk_dquot *ddqp; struct xfs_buf *bp; struct xfs_trans *tp = NULL; int error; int cancelflags = 0; dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); dqp->dq_flags = type; dqp->q_core.d_id = cpu_to_be32(id); dqp->q_mount = mp; INIT_LIST_HEAD(&dqp->q_lru); mutex_init(&dqp->q_qlock); init_waitqueue_head(&dqp->q_pinwait); /* * Because we want to use a counting completion, complete * the flush completion once to allow a single access to * the flush completion without blocking. */ init_completion(&dqp->q_flush); complete(&dqp->q_flush); /* * Make sure group quotas have a different lock class than user * quotas. */ switch (type) { case XFS_DQ_USER: /* uses the default lock class */ break; case XFS_DQ_GROUP: lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); break; case XFS_DQ_PROJ: lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); break; default: ASSERT(0); break; } XFS_STATS_INC(xs_qm_dquot); trace_xfs_dqread(dqp); if (flags & XFS_QMOPT_DQALLOC) { tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, XFS_QM_DQALLOC_SPACE_RES(mp), 0); if (error) goto error1; cancelflags = XFS_TRANS_RELEASE_LOG_RES; } /* * get a pointer to the on-disk dquot and the buffer containing it * dqp already knows its own type (GROUP/USER). */ error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags); if (error) { /* * This can happen if quotas got turned off (ESRCH), * or if the dquot didn't exist on disk and we ask to * allocate (ENOENT). */ trace_xfs_dqread_fail(dqp); cancelflags |= XFS_TRANS_ABORT; goto error1; } /* copy everything from disk dquot to the incore dquot */ memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); xfs_qm_dquot_logitem_init(dqp); /* * Reservation counters are defined as reservation plus current usage * to avoid having to add every time. */ dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); /* initialize the dquot speculative prealloc thresholds */ xfs_dquot_set_prealloc_limits(dqp); /* Mark the buf so that this will stay incore a little longer */ xfs_buf_set_ref(bp, XFS_DQUOT_REF); /* * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) * So we need to release with xfs_trans_brelse(). * The strategy here is identical to that of inodes; we lock * the dquot in xfs_qm_dqget() before making it accessible to * others. This is because dquots, like inodes, need a good level of * concurrency, and we don't want to take locks on the entire buffers * for dquot accesses. * Note also that the dquot buffer may even be dirty at this point, if * this particular dquot was repaired. We still aren't afraid to * brelse it because we have the changes incore. */ ASSERT(xfs_buf_islocked(bp)); xfs_trans_brelse(tp, bp); if (tp) { error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error0; } *O_dqpp = dqp; return error; error1: if (tp) xfs_trans_cancel(tp, cancelflags); error0: xfs_qm_dqdestroy(dqp); *O_dqpp = NULL; return error; }
/* * Allocate an inode on disk. * Mode is used to tell whether the new inode will need space, and whether * it is a directory. * * The arguments IO_agbp and alloc_done are defined to work within * the constraint of one allocation per transaction. * xfs_dialloc() is designed to be called twice if it has to do an * allocation to make more free inodes. On the first call, * IO_agbp should be set to NULL. If an inode is available, * i.e., xfs_dialloc() did not need to do an allocation, an inode * number is returned. In this case, IO_agbp would be set to the * current ag_buf and alloc_done set to false. * If an allocation needed to be done, xfs_dialloc would return * the current ag_buf in IO_agbp and set alloc_done to true. * The caller should then commit the current transaction, allocate a new * transaction, and call xfs_dialloc() again, passing in the previous * value of IO_agbp. IO_agbp should be held across the transactions. * Since the agbp is locked across the two calls, the second call is * guaranteed to have a free inode available. * * Once we successfully pick an inode its number is returned and the * on-disk data structures are updated. The inode itself is not read * in, since doing so would break ordering constraints with xfs_reclaim. */ int xfs_dialloc( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent inode (directory) */ umode_t mode, /* mode bits for new inode */ int okalloc, /* ok to allocate more space */ xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ boolean_t *alloc_done, /* true if we needed to replenish inode freelist */ xfs_ino_t *inop) /* inode number allocated */ { xfs_agnumber_t agcount; /* number of allocation groups */ xfs_buf_t *agbp; /* allocation group header's buffer */ xfs_agnumber_t agno; /* allocation group number */ xfs_agi_t *agi; /* allocation group header structure */ xfs_btree_cur_t *cur; /* inode allocation btree cursor */ int error; /* error return value */ int i; /* result code */ int ialloced; /* inode allocation status */ int noroom = 0; /* no space for inode blk allocation */ xfs_ino_t ino; /* fs-relative inode to be returned */ /* REFERENCED */ int j; /* result code */ xfs_mount_t *mp; /* file system mount structure */ int offset; /* index of inode in chunk */ xfs_agino_t pagino; /* parent's AG relative inode # */ xfs_agnumber_t pagno; /* parent's AG number */ xfs_inobt_rec_incore_t rec; /* inode allocation record */ xfs_agnumber_t tagno; /* testing allocation group number */ xfs_btree_cur_t *tcur; /* temp cursor */ xfs_inobt_rec_incore_t trec; /* temp inode allocation record */ struct xfs_perag *pag; if (*IO_agbp == NULL) { /* * We do not have an agbp, so select an initial allocation * group for inode allocation. */ agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc); /* * Couldn't find an allocation group satisfying the * criteria, give up. */ if (!agbp) { *inop = NULLFSINO; return 0; } agi = XFS_BUF_TO_AGI(agbp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); } else { /* * Continue where we left off before. In this case, we * know that the allocation group has free inodes. */ agbp = *IO_agbp; agi = XFS_BUF_TO_AGI(agbp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); ASSERT(be32_to_cpu(agi->agi_freecount) > 0); } mp = tp->t_mountp; agcount = mp->m_sb.sb_agcount; agno = be32_to_cpu(agi->agi_seqno); tagno = agno; pagno = XFS_INO_TO_AGNO(mp, parent); pagino = XFS_INO_TO_AGINO(mp, parent); /* * If we have already hit the ceiling of inode blocks then clear * okalloc so we scan all available agi structures for a free * inode. */ if (mp->m_maxicount && mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { noroom = 1; okalloc = 0; } /* * Loop until we find an allocation group that either has free inodes * or in which we can allocate some inodes. Iterate through the * allocation groups upward, wrapping at the end. */ *alloc_done = B_FALSE; while (!agi->agi_freecount) { /* * Don't do anything if we're not supposed to allocate * any blocks, just go on to the next ag. */ if (okalloc) { /* * Try to allocate some new inodes in the allocation * group. */ if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) { xfs_trans_brelse(tp, agbp); if (error == ENOSPC) { *inop = NULLFSINO; return 0; } else return error; } if (ialloced) { /* * We successfully allocated some inodes, return * the current context to the caller so that it * can commit the current transaction and call * us again where we left off. */ ASSERT(be32_to_cpu(agi->agi_freecount) > 0); *alloc_done = B_TRUE; *IO_agbp = agbp; *inop = NULLFSINO; return 0; } } /* * If it failed, give up on this ag. */ xfs_trans_brelse(tp, agbp); /* * Go on to the next ag: get its ag header. */ nextag: if (++tagno == agcount) tagno = 0; if (tagno == agno) { *inop = NULLFSINO; return noroom ? ENOSPC : 0; } pag = xfs_perag_get(mp, tagno); if (pag->pagi_inodeok == 0) { xfs_perag_put(pag); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); xfs_perag_put(pag); if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); } /* * Here with an allocation group that has a free inode. * Reset agno since we may have chosen a new ag in the * loop above. */ agno = tagno; *IO_agbp = NULL; pag = xfs_perag_get(mp, agno); restart_pagno: cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno)); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. */ if (!pagino) pagino = be32_to_cpu(agi->agi_newino); error = xfs_check_agi_freecount(cur, agi); if (error) goto error0; /* * If in the same AG as the parent, try to get near the parent. */ if (pagno == agno) { int doneleft; /* done, to the left */ int doneright; /* done, to the right */ int searchdistance = 10; error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i); if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); error = xfs_inobt_get_rec(cur, &rec, &j); if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (rec.ir_freecount > 0) { /* * Found a free inode in the same chunk * as the parent, done. */ goto alloc_inode; } /* * In the same AG as parent, but parent's chunk is full. */ /* duplicate the cursor, search left & right simultaneously */ error = xfs_btree_dup_cursor(cur, &tcur); if (error) goto error0; /* * Skip to last blocks looked up if same parent inode. */ if (pagino != NULLAGINO && pag->pagl_pagino == pagino && pag->pagl_leftrec != NULLAGINO && pag->pagl_rightrec != NULLAGINO) { error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec, &trec, &doneleft, 1); if (error) goto error1; error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec, &rec, &doneright, 0); if (error) goto error1; } else { /* search left with tcur, back up 1 record */ error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); if (error) goto error1; /* search right with cur, go forward 1 record. */ error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); if (error) goto error1; } /* * Loop until we find an inode chunk with a free inode. */ while (!doneleft || !doneright) { int useleft; /* using left inode chunk this time */ if (!--searchdistance) { /* * Not in range - save last search * location and allocate a new inode */ xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); pag->pagl_leftrec = trec.ir_startino; pag->pagl_rightrec = rec.ir_startino; pag->pagl_pagino = pagino; goto newino; } /* figure out the closer block if both are valid. */ if (!doneleft && !doneright) { useleft = pagino - (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) < rec.ir_startino - pagino; } else { useleft = !doneleft; } /* free inodes to the left? */ if (useleft && trec.ir_freecount) { rec = trec; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); cur = tcur; pag->pagl_leftrec = trec.ir_startino; pag->pagl_rightrec = rec.ir_startino; pag->pagl_pagino = pagino; goto alloc_inode; } /* free inodes to the right? */ if (!useleft && rec.ir_freecount) { xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); pag->pagl_leftrec = trec.ir_startino; pag->pagl_rightrec = rec.ir_startino; pag->pagl_pagino = pagino; goto alloc_inode; } /* get next record to check */ if (useleft) { error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1); } else { error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0); } if (error) goto error1; } /* * We've reached the end of the btree. because * we are only searching a small chunk of the * btree each search, there is obviously free * inodes closer to the parent inode than we * are now. restart the search again. */ pag->pagl_pagino = NULLAGINO; pag->pagl_leftrec = NULLAGINO; pag->pagl_rightrec = NULLAGINO; xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); goto restart_pagno; } /* * In a different AG from the parent. * See if the most recently allocated block has any free. */ newino: if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), XFS_LOOKUP_EQ, &i); if (error) goto error0; if (i == 1) { error = xfs_inobt_get_rec(cur, &rec, &j); if (error) goto error0; if (j == 1 && rec.ir_freecount > 0) { /* * The last chunk allocated in the group * still has a free inode. */ goto alloc_inode; } } } /* * None left in the last group, search the whole AG */ error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); for (;;) { error = xfs_inobt_get_rec(cur, &rec, &i); if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (rec.ir_freecount > 0) break; error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } alloc_inode: offset = xfs_ialloc_find_free(&rec.ir_free); ASSERT(offset >= 0); ASSERT(offset < XFS_INODES_PER_CHUNK); ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); rec.ir_free &= ~XFS_INOBT_MASK(offset); rec.ir_freecount--; error = xfs_inobt_update(cur, &rec); if (error) goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); pag->pagi_freecount--; error = xfs_check_agi_freecount(cur, agi); if (error) goto error0; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); xfs_perag_put(pag); *inop = ino; return 0; error1: xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); error0: xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); xfs_perag_put(pag); return error; }
int xfs_attr_set( struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags) { struct xfs_mount *mp = dp->i_mount; struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_trans_res tres; int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, local; XFS_STATS_INC(mp, xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; args.value = value; args.valuelen = valuelen; args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; args.total = xfs_attr_calc_size(&args, &local); error = xfs_qm_dqattach(dp); if (error) return error; /* * If the inode doesn't have an attribute fork, add one. * (inode must not be locked when we call this routine) */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(xfs_attr_sf_hdr_t) + XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); if (error) return error; } tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + M_RES(mp)->tr_attrsetrt.tr_logres * args.total; tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ error = xfs_trans_alloc(mp, &tres, args.total, 0, rsvd ? XFS_TRANS_RESERVE : 0, &args.trans); if (error) return error; xfs_ilock(dp, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans); return error; } xfs_trans_ijoin(args.trans, dp, 0); /* * If the attribute list is non-existent or a shortform list, * upgrade it to a single-leaf-block attribute list. */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && dp->i_d.di_anextents == 0)) { /* * Build initial attribute list (if required). */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) xfs_attr_shortform_create(&args); /* * Try to add the attr to the attribute list in * the inode. */ error = xfs_attr_shortform_addname(&args); if (error != -ENOSPC) { /* * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). */ ASSERT(args.trans != NULL); /* * If this is a synchronous mount, make sure that * the transaction goes to disk before returning * to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if (!error && (flags & ATTR_KERNOTIME) == 0) { xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); } err2 = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error ? error : err2; } /* * It won't fit in the shortform, transform to a leaf block. * GROT: another possible req'mt for a double-split btree op. */ error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out; /* * Prevent the leaf buffer from being unlocked so that a * concurrent AIL push cannot grab the half-baked leaf * buffer and run into problems with the write verifier. */ xfs_trans_bhold(args.trans, leaf_bp); error = xfs_defer_finish(&args.trans); if (error) goto out; /* * Commit the leaf transformation. We'll need another (linked) * transaction to add the new attribute to the leaf, which * means that we have to hold & join the leaf buffer here too. */ error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; xfs_trans_bjoin(args.trans, leaf_bp); leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) error = xfs_attr_leaf_addname(&args); else error = xfs_attr_node_addname(&args); if (error) goto out; /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); error = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: if (leaf_bp) xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; }
/* * Add a name to the leaf attribute list structure * * This leaf block cannot have a "remote" value, we only call this routine * if bmap_one_block() says there is only one block (ie: no remote blks). */ STATIC int xfs_attr_leaf_addname( struct xfs_da_args *args) { struct xfs_inode *dp; struct xfs_buf *bp; int retval, error, forkoff; trace_xfs_attr_leaf_addname(args); /* * Read the (only) block in the attribute list in. */ dp = args->dp; args->blkno = 0; error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) return error; /* * Look up the given attribute in the leaf block. Figure out if * the given flags produce an error or call for an atomic rename. */ retval = xfs_attr3_leaf_lookup_int(bp, args); if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { xfs_trans_brelse(args->trans, bp); return retval; } else if (retval == -EEXIST) { if (args->flags & ATTR_CREATE) { /* pure create op */ xfs_trans_brelse(args->trans, bp); return retval; } trace_xfs_attr_leaf_replace(args); /* save the attribute state for later removal*/ args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; args->rmtblkno2 = args->rmtblkno; args->rmtblkcnt2 = args->rmtblkcnt; args->rmtvaluelen2 = args->rmtvaluelen; /* * clear the remote attr state now that it is saved so that the * values reflect the state of the attribute we are about to * add, not the attribute we just found and will remove later. */ args->rmtblkno = 0; args->rmtblkcnt = 0; args->rmtvaluelen = 0; } /* * Add the attribute to the leaf block, transitioning to a Btree * if required. */ retval = xfs_attr3_leaf_add(bp, args); if (retval == -ENOSPC) { /* * Promote the attribute list to the Btree format, then * Commit that transaction so that the node_addname() call * can manage its own transactions. */ error = xfs_attr3_leaf_to_node(args); if (error) return error; error = xfs_defer_finish(&args->trans); if (error) return error; /* * Commit the current trans (including the inode) and start * a new one. */ error = xfs_trans_roll_inode(&args->trans, dp); if (error) return error; /* * Fob the whole rest of the problem off on the Btree code. */ error = xfs_attr_node_addname(args); return error; } /* * Commit the transaction that added the attr name so that * later routines can manage their own transactions. */ error = xfs_trans_roll_inode(&args->trans, dp); if (error) return error; /* * If there was an out-of-line value, allocate the blocks we * identified for its storage and copy the value. This is done * after we create the attribute so that we don't overflow the * maximum size of a transaction and/or hit a deadlock. */ if (args->rmtblkno > 0) { error = xfs_attr_rmtval_set(args); if (error) return error; } /* * If this is an atomic rename operation, we must "flip" the * incomplete flags on the "new" and "old" attribute/value pairs * so that one disappears and one appears atomically. Then we * must remove the "old" attribute/value pair. */ if (args->op_flags & XFS_DA_OP_RENAME) { /* * In a separate transaction, set the incomplete flag on the * "old" attr and clear the incomplete flag on the "new" attr. */ error = xfs_attr3_leaf_flipflags(args); if (error) return error; /* * Dismantle the "old" attribute/value pair by removing * a "remote" value (if it exists). */ args->index = args->index2; args->blkno = args->blkno2; args->rmtblkno = args->rmtblkno2; args->rmtblkcnt = args->rmtblkcnt2; args->rmtvaluelen = args->rmtvaluelen2; if (args->rmtblkno) { error = xfs_attr_rmtval_remove(args); if (error) return error; } /* * Read in the block containing the "old" attr, then * remove the "old" attr from that block (neat, huh!) */ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp); if (error) return error; xfs_attr3_leaf_remove(bp, args); /* * If the result is small enough, shrink it all into the inode. */ if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) return error; error = xfs_defer_finish(&args->trans); if (error) return error; } /* * Commit the remove and start the next trans in series. */ error = xfs_trans_roll_inode(&args->trans, dp); } else if (args->rmtblkno > 0) { /* * Added a "remote" value, just clear the incomplete flag. */ error = xfs_attr3_leaf_clearflag(args); } return error; }
/* * Remove a name from a B-tree attribute list. * * This will involve walking down the Btree, and may involve joining * leaf nodes and even joining intermediate nodes up to and including * the root node (a special case of an intermediate node). */ STATIC int xfs_attr_node_removename( struct xfs_da_args *args) { struct xfs_da_state *state; struct xfs_da_state_blk *blk; struct xfs_inode *dp; struct xfs_buf *bp; int retval, error, forkoff; trace_xfs_attr_node_removename(args); /* * Tie a string around our finger to remind us where we are. */ dp = args->dp; state = xfs_da_state_alloc(); state->args = args; state->mp = dp->i_mount; /* * Search to see if name exists, and get back a pointer to it. */ error = xfs_da3_node_lookup_int(state, &retval); if (error || (retval != -EEXIST)) { if (error == 0) error = retval; goto out; } /* * If there is an out-of-line value, de-allocate the blocks. * This is done before we remove the attribute so that we don't * overflow the maximum size of a transaction and/or hit a deadlock. */ blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->bp != NULL); ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); if (args->rmtblkno > 0) { /* * Fill in disk block numbers in the state structure * so that we can get the buffers back after we commit * several transactions in the following calls. */ error = xfs_attr_fillstate(state); if (error) goto out; /* * Mark the attribute as INCOMPLETE, then bunmapi() the * remote value. */ error = xfs_attr3_leaf_setflag(args); if (error) goto out; error = xfs_attr_rmtval_remove(args); if (error) goto out; /* * Refill the state structure with buffers, the prior calls * released our buffers. */ error = xfs_attr_refillstate(state); if (error) goto out; } /* * Remove the name and update the hashvals in the tree. */ blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); retval = xfs_attr3_leaf_remove(blk->bp, args); xfs_da3_fixhashpath(state, &state->path); /* * Check to see if the tree needs to be collapsed. */ if (retval && (state->path.active > 1)) { error = xfs_da3_join(state); if (error) goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; /* * Commit the Btree join operation and start a new trans. */ error = xfs_trans_roll_inode(&args->trans, dp); if (error) goto out; } /* * If the result is small enough, push it all into the inode. */ if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { /* * Have to get rid of the copy of this dabuf in the state. */ ASSERT(state->path.active == 1); ASSERT(state->path.blk[0].bp); state->path.blk[0].bp = NULL; error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp); if (error) goto out; if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); /* bp is gone due to xfs_da_shrink_inode */ if (error) goto out; error = xfs_defer_finish(&args->trans); if (error) goto out; } else xfs_trans_brelse(args->trans, bp); } error = 0; out: xfs_da_state_free(state); return error; }
STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context) { attrlist_cursor_kern_t *cursor; xfs_attr_leafblock_t *leaf; xfs_da_intnode_t *node; struct xfs_attr3_icleaf_hdr leafhdr; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_node_entry *btree; int error, i; struct xfs_buf *bp; struct xfs_inode *dp = context->dp; struct xfs_mount *mp = dp->i_mount; trace_xfs_attr_node_list(context); cursor = context->cursor; cursor->initted = 1; /* * Do all sorts of validation on the passed-in cursor structure. * If anything is amiss, ignore the cursor and look up the hashval * starting from the btree root. */ bp = NULL; if (cursor->blkno > 0) { error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if ((error != 0) && (error != -EFSCORRUPTED)) return error; if (bp) { struct xfs_attr_leaf_entry *entries; node = bp->b_addr; switch (be16_to_cpu(node->hdr.info.magic)) { case XFS_DA_NODE_MAGIC: case XFS_DA3_NODE_MAGIC: trace_xfs_attr_list_wrong_blk(context); xfs_trans_brelse(NULL, bp); bp = NULL; break; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); if (cursor->hashval > be32_to_cpu( entries[leafhdr.count - 1].hashval)) { trace_xfs_attr_list_wrong_blk(context); xfs_trans_brelse(NULL, bp); bp = NULL; } else if (cursor->hashval <= be32_to_cpu( entries[0].hashval)) { trace_xfs_attr_list_wrong_blk(context); xfs_trans_brelse(NULL, bp); bp = NULL; } break; default: trace_xfs_attr_list_wrong_blk(context); xfs_trans_brelse(NULL, bp); bp = NULL; } } } /* * We did not find what we expected given the cursor's contents, * so we start from the top and work down based on the hash value. * Note that start of node block is same as start of leaf block. */ if (bp == NULL) { cursor->blkno = 0; for (;;) { __uint16_t magic; error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1, &bp, XFS_ATTR_FORK); if (error) return error; node = bp->b_addr; magic = be16_to_cpu(node->hdr.info.magic); if (magic == XFS_ATTR_LEAF_MAGIC || magic == XFS_ATTR3_LEAF_MAGIC) break; if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", XFS_ERRLEVEL_LOW, context->dp->i_mount, node); xfs_trans_brelse(NULL, bp); return -EFSCORRUPTED; } dp->d_ops->node_hdr_from_disk(&nodehdr, node); btree = dp->d_ops->node_tree_p(node); for (i = 0; i < nodehdr.count; btree++, i++) { if (cursor->hashval <= be32_to_cpu(btree->hashval)) { cursor->blkno = be32_to_cpu(btree->before); trace_xfs_attr_list_node_descend(context, btree); break; } } if (i == nodehdr.count) { xfs_trans_brelse(NULL, bp); return 0; } xfs_trans_brelse(NULL, bp); } } ASSERT(bp != NULL); /* * Roll upward through the blocks, processing each leaf block in * order. As long as there is space in the result buffer, keep * adding the information. */ for (;;) { leaf = bp->b_addr; error = xfs_attr3_leaf_list_int(bp, context); if (error) { xfs_trans_brelse(NULL, bp); return error; } xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); if (context->seen_enough || leafhdr.forw == 0) break; cursor->blkno = leafhdr.forw; xfs_trans_brelse(NULL, bp); error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp); if (error) return error; } xfs_trans_brelse(NULL, bp); return 0; }
/* * Internal block lookup routine. */ static int /* error */ xfs_dir2_block_lookup_int( xfs_da_args_t *args, /* dir lookup arguments */ struct xfs_buf **bpp, /* returned block buffer */ int *entno) /* returned entry number */ { xfs_dir2_dataptr_t addr; /* data entry address */ xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ struct xfs_buf *bp; /* block buffer */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* incore inode */ int error; /* error return value */ xfs_dahash_t hash; /* found hash value */ int high; /* binary search high index */ int low; /* binary search low index */ int mid; /* binary search current idx */ xfs_mount_t *mp; /* filesystem mount point */ xfs_trans_t *tp; /* transaction pointer */ enum xfs_dacmp cmp; /* comparison result */ dp = args->dp; tp = args->trans; mp = dp->i_mount; error = xfs_dir3_block_read(tp, dp, &bp); if (error) return error; hdr = bp->b_addr; xfs_dir3_data_check(dp, bp); btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Loop doing a binary search for our hash value. * Find our entry, ENOENT if it's not there. */ for (low = 0, high = be32_to_cpu(btp->count) - 1; ; ) { ASSERT(low <= high); mid = (low + high) >> 1; if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) break; if (hash < args->hashval) low = mid + 1; else high = mid - 1; if (low > high) { ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); xfs_trans_brelse(tp, bp); return -ENOENT; } } /* * Back up to the first one with the right hash value. */ while (mid > 0 && be32_to_cpu(blp[mid - 1].hashval) == args->hashval) { mid--; } /* * Now loop forward through all the entries with the * right hash value looking for our name. */ do { if ((addr = be32_to_cpu(blp[mid].address)) == XFS_DIR2_NULL_DATAPTR) continue; /* * Get pointer to the entry from the leaf. */ dep = (xfs_dir2_data_entry_t *) ((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr)); /* * Compare name and if it's an exact match, return the index * and buffer. If it's the first case-insensitive match, store * the index and buffer and continue looking for an exact match. */ cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { args->cmpresult = cmp; *bpp = bp; *entno = mid; if (cmp == XFS_CMP_EXACT) return 0; } } while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash); ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); /* * Here, we can only be doing a lookup (not a rename or replace). * If a case-insensitive match was found earlier, return success. */ if (args->cmpresult == XFS_CMP_CASE) return 0; /* * No match, release the buffer and return ENOENT. */ xfs_trans_brelse(tp, bp); return -ENOENT; }
/* Execute a getfsmap query against the regular data device. */ STATIC int __xfs_getfsmap_datadev( struct xfs_trans *tp, struct xfs_fsmap *keys, struct xfs_getfsmap_info *info, int (*query_fn)(struct xfs_trans *, struct xfs_getfsmap_info *, struct xfs_btree_cur **, void *), void *priv) { struct xfs_mount *mp = tp->t_mountp; struct xfs_btree_cur *bt_cur = NULL; xfs_fsblock_t start_fsb; xfs_fsblock_t end_fsb; xfs_agnumber_t start_ag; xfs_agnumber_t end_ag; xfs_daddr_t eofs; int error = 0; eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (keys[0].fmr_physical >= eofs) return 0; if (keys[1].fmr_physical >= eofs) keys[1].fmr_physical = eofs - 1; start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical); end_fsb = XFS_DADDR_TO_FSB(mp, keys[1].fmr_physical); /* * Convert the fsmap low/high keys to AG based keys. Initialize * low to the fsmap low key and max out the high key to the end * of the AG. */ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); if (error) return error; info->low.rm_blockcount = 0; xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); info->high.rm_startblock = -1U; info->high.rm_owner = ULLONG_MAX; info->high.rm_offset = ULLONG_MAX; info->high.rm_blockcount = 0; info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; start_ag = XFS_FSB_TO_AGNO(mp, start_fsb); end_ag = XFS_FSB_TO_AGNO(mp, end_fsb); /* Query each AG */ for (info->agno = start_ag; info->agno <= end_ag; info->agno++) { /* * Set the AG high key from the fsmap high key if this * is the last AG that we're querying. */ if (info->agno == end_ag) { info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsb); info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset); error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); if (error) goto err; xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); } if (bt_cur) { xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR); bt_cur = NULL; xfs_trans_brelse(tp, info->agf_bp); info->agf_bp = NULL; } error = xfs_alloc_read_agf(mp, tp, info->agno, 0, &info->agf_bp); if (error) goto err; trace_xfs_fsmap_low_key(mp, info->dev, info->agno, &info->low); trace_xfs_fsmap_high_key(mp, info->dev, info->agno, &info->high); error = query_fn(tp, info, &bt_cur, priv); if (error) goto err; /* * Set the AG low key to the start of the AG prior to * moving on to the next AG. */ if (info->agno == start_ag) { info->low.rm_startblock = 0; info->low.rm_owner = 0; info->low.rm_offset = 0; info->low.rm_flags = 0; } } /* Report any gap at the end of the AG */ info->last = true; error = query_fn(tp, info, &bt_cur, priv); if (error) goto err; err: if (bt_cur) xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); if (info->agf_bp) { xfs_trans_brelse(tp, info->agf_bp); info->agf_bp = NULL; } return error; }
/* * Read the disk inode attributes into the in-core inode structure. * * For version 5 superblocks, if we are initialising a new inode and we are not * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new * inode core with a random generation number. If we are keeping inodes around, * we need to read the inode cluster to get the existing generation number off * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode * format) then log recovery is dependent on the di_flushiter field being * initialised from the current on-disk value and hence we must also read the * inode off disk. */ int xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *ip, uint iget_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; int error; /* * Fill in the location information in the in-core inode. */ error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) return error; /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); ip->i_d.di_magic = XFS_DINODE_MAGIC; ip->i_d.di_gen = prandom_u32(); if (xfs_sb_version_hascrc(&mp->m_sb)) { ip->i_d.di_version = 3; ip->i_d.di_ino = ip->i_ino; uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); } else ip->i_d.di_version = 2; return 0; } /* * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); if (error) return error; /* even unallocated inodes are verified */ if (!xfs_dinode_verify(mp, ip, dip)) { xfs_alert(mp, "%s: validation failed for inode %lld failed", __func__, ip->i_ino); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); error = -EFSCORRUPTED; goto out_brelse; } /* * If the on-disk inode is already linked to a directory * entry, copy all of the inode into the in-core inode. * xfs_iformat_fork() handles copying in the inode format * specific information. * Otherwise, just get the truly permanent information. */ if (dip->di_mode) { xfs_dinode_from_disk(&ip->i_d, dip); error = xfs_iformat_fork(ip, dip); if (error) { #ifdef DEBUG xfs_alert(mp, "%s: xfs_iformat() returned error %d", __func__, error); #endif /* DEBUG */ goto out_brelse; } } else { /* * Partial initialisation of the in-core inode. Just the bits * that xfs_ialloc won't overwrite or relies on being correct. */ ip->i_d.di_magic = be16_to_cpu(dip->di_magic); ip->i_d.di_version = dip->di_version; ip->i_d.di_gen = be32_to_cpu(dip->di_gen); ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); if (dip->di_version == 3) { ip->i_d.di_ino = be64_to_cpu(dip->di_ino); uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); } /* * Make sure to pull in the mode here as well in * case the inode is released without being used. * This ensures that xfs_inactive() will see that * the inode is already free and not try to mess * with the uninitialized part of it. */ ip->i_d.di_mode = 0; } /* * Automatically convert version 1 inode formats in memory to version 2 * inode format. If the inode is modified, it will get logged and * rewritten as a version 2 inode. We can do this because we set the * superblock feature bit for v2 inodes unconditionally during mount * and it means the reast of the code can assume the inode version is 2 * or higher. */ if (ip->i_d.di_version == 1) { ip->i_d.di_version = 2; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; xfs_set_projid(ip, 0); } ip->i_delayed_blks = 0; /* * Mark the buffer containing the inode as something to keep * around for a while. This helps to keep recently accessed * meta-data in-core longer. */ xfs_buf_set_ref(bp, XFS_INO_REF); /* * Use xfs_trans_brelse() to release the buffer containing the on-disk * inode, because it was acquired with xfs_trans_read_buf() in * xfs_imap_to_bp() above. If tp is NULL, this is just a normal * brelse(). If we're within a transaction, then xfs_trans_brelse() * will only release the buffer if it is not dirty within the * transaction. It will be OK to release the buffer in this case, * because inodes on disk are never destroyed and we will be locking the * new in-core inode before putting it in the cache where other * processes can find it. Thus we don't have to worry about the inode * being changed just because we released the buffer. */ out_brelse: xfs_trans_brelse(tp, bp); return error; }
/* * Searching forward from start to limit, find the first block whose * allocated/free state is different from start's. */ int xfs_rtfind_forw( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t start, /* starting block to look at */ xfs_rtblock_t limit, /* last block to look at */ xfs_rtblock_t *rtblock) /* out: start block found */ { xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ xfs_buf_t *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ xfs_rtblock_t lastbit; /* last useful bit in the word */ xfs_rtblock_t len; /* length of inspected area */ xfs_rtword_t mask; /* mask of relevant bits for value */ xfs_rtword_t want; /* mask for "good" values */ xfs_rtword_t wdiff; /* difference from wanted value */ int word; /* word number in the buffer */ /* * Compute and read in starting bitmap block for starting block. */ block = XFS_BITTOBLOCK(mp, start); error = xfs_rtbuf_get(mp, tp, block, 0, &bp); if (error) { return error; } bufp = bp->b_addr; /* * Get the first word's index & point to it. */ word = XFS_BITTOWORD(mp, start); b = &bufp[word]; bit = (int)(start & (XFS_NBWORD - 1)); len = limit - start + 1; /* * Compute match value, based on the bit at start: if 1 (free) * then all-ones, else all-zeroes. */ want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; /* * If the starting position is not word-aligned, deal with the * partial word. */ if (bit) { /* * Calculate last (rightmost) bit number to look at, * and mask for all the relevant bits in this word. */ lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; /* * Calculate the difference between the value there * and what we're looking for. */ if ((wdiff = (*b ^ want) & mask)) { /* * Different. Mark where we are and return. */ xfs_trans_brelse(tp, bp); i = XFS_RTLOBIT(wdiff) - bit; *rtblock = start + i - 1; return 0; } i = lastbit - bit; /* * Go on to next block if that's where the next word is * and we need the next word. */ if (++word == XFS_BLOCKWSIZE(mp) && i < len) { /* * If done with this block, get the previous one. */ xfs_trans_brelse(tp, bp); error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); if (error) { return error; } b = bufp = bp->b_addr; word = 0; } else { /* * Go on to the previous word in the buffer. */ b++; } } else { /* * Starting on a word boundary, no partial word. */ i = 0; } /* * Loop over whole words in buffers. When we use up one buffer * we move on to the next one. */ while (len - i >= XFS_NBWORD) { /* * Compute difference between actual and desired value. */ if ((wdiff = *b ^ want)) { /* * Different, mark where we are and return. */ xfs_trans_brelse(tp, bp); i += XFS_RTLOBIT(wdiff); *rtblock = start + i - 1; return 0; } i += XFS_NBWORD; /* * Go on to next block if that's where the next word is * and we need the next word. */ if (++word == XFS_BLOCKWSIZE(mp) && i < len) { /* * If done with this block, get the next one. */ xfs_trans_brelse(tp, bp); error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); if (error) { return error; } b = bufp = bp->b_addr; word = 0; } else { /* * Go on to the next word in the buffer. */ b++; } } /* * If not ending on a word boundary, deal with the last * (partial) word. */ if ((lastbit = len - i)) { /* * Calculate mask for all the relevant bits in this word. */ mask = ((xfs_rtword_t)1 << lastbit) - 1; /* * Compute difference between actual and desired value. */ if ((wdiff = (*b ^ want) & mask)) { /* * Different, mark where we are and return. */ xfs_trans_brelse(tp, bp); i += XFS_RTLOBIT(wdiff); *rtblock = start + i - 1; return 0; } else i = len; } /* * No match, return that we scanned the whole area. */ xfs_trans_brelse(tp, bp); *rtblock = start + i - 1; return 0; }
/* * Maps a dquot to the buffer containing its on-disk version. * This returns a ptr to the buffer containing the on-disk dquot * in the bpp param, and a ptr to the on-disk dquot within that buffer */ STATIC int xfs_qm_dqtobp( xfs_trans_t **tpp, xfs_dquot_t *dqp, xfs_disk_dquot_t **O_ddpp, xfs_buf_t **O_bpp, uint flags) { xfs_bmbt_irec_t map; int nmaps, error; xfs_buf_t *bp; xfs_inode_t *quotip; xfs_mount_t *mp; xfs_disk_dquot_t *ddq; xfs_dqid_t id; boolean_t newdquot; xfs_trans_t *tp = (tpp ? *tpp : NULL); mp = dqp->q_mount; id = be32_to_cpu(dqp->q_core.d_id); nmaps = 1; newdquot = B_FALSE; /* * If we don't know where the dquot lives, find out. */ if (dqp->q_blkno == (xfs_daddr_t) 0) { /* We use the id as an index */ dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp); nmaps = 1; quotip = XFS_DQ_TO_QIP(dqp); xfs_ilock(quotip, XFS_ILOCK_SHARED); /* * Return if this type of quotas is turned off while we didn't * have an inode lock */ if (XFS_IS_THIS_QUOTA_OFF(dqp)) { xfs_iunlock(quotip, XFS_ILOCK_SHARED); return (ESRCH); } /* * Find the block map; no allocations yet */ error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, NULL, 0, &map, &nmaps, NULL); xfs_iunlock(quotip, XFS_ILOCK_SHARED); if (error) return (error); ASSERT(nmaps == 1); ASSERT(map.br_blockcount == 1); /* * offset of dquot in the (fixed sized) dquot chunk. */ dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) * sizeof(xfs_dqblk_t); if (map.br_startblock == HOLESTARTBLOCK) { /* * We don't allocate unless we're asked to */ if (!(flags & XFS_QMOPT_DQALLOC)) return (ENOENT); ASSERT(tp); if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, dqp->q_fileoffset, &bp))) return (error); tp = *tpp; newdquot = B_TRUE; } else { /* * store the blkno etc so that we don't have to do the * mapping all the time */ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); } } ASSERT(dqp->q_blkno != DELAYSTARTBLOCK); ASSERT(dqp->q_blkno != HOLESTARTBLOCK); /* * Read in the buffer, unless we've just done the allocation * (in which case we already have the buf). */ if (! newdquot) { xfs_dqtrace_entry(dqp, "DQTOBP READBUF"); if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, XFS_QI_DQCHUNKLEN(mp), 0, &bp))) { return (error); } if (error || !bp) return XFS_ERROR(error); } ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); /* * calculate the location of the dquot inside the buffer. */ ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset); /* * A simple sanity check in case we got a corrupted dquot... */ if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), "dqtobp")) { if (!(flags & XFS_QMOPT_DQREPAIR)) { xfs_trans_brelse(tp, bp); return XFS_ERROR(EIO); } XFS_BUF_BUSY(bp); /* We dirtied this */ } *O_bpp = bp; *O_ddpp = ddq; return (0); }
/* * Recursively walks each level of a btree * to count total fsblocks in use. */ STATIC int /* error */ xfs_bmap_count_tree( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ xfs_ifork_t *ifp, /* inode fork pointer */ xfs_fsblock_t blockno, /* file system block number */ int levelin, /* level in btree */ int *count) /* Count of blocks */ { int error; xfs_buf_t *bp, *nbp; int level = levelin; __be64 *pp; xfs_fsblock_t bno = blockno; xfs_fsblock_t nextbno; struct xfs_btree_block *block, *nextblock; int numrecs; error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) return error; *count += 1; block = XFS_BUF_TO_BLOCK(bp); if (--level) { /* Not at node above leaves, count this level of nodes */ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); while (nextbno != NULLFSBLOCK) { error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) return error; *count += 1; nextblock = XFS_BUF_TO_BLOCK(nbp); nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib); xfs_trans_brelse(tp, nbp); } /* Dive to the next level */ pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); if (unlikely((error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) { xfs_trans_brelse(tp, bp); XFS_ERROR_REPORT("xfs_bmap_count_tree(1)", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } xfs_trans_brelse(tp, bp); } else { /* count all level 1 nodes and their leaves */ for (;;) { nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); xfs_bmap_disk_count_leaves(mp, block, numrecs, count); xfs_trans_brelse(tp, bp); if (nextbno == NULLFSBLOCK) break; bno = nextbno; error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) return error; *count += 1; block = XFS_BUF_TO_BLOCK(bp); } } return 0; }
/* * Readdir for block directories. */ int /* error */ xfs_dir2_block_getdents( xfs_inode_t *dp, /* incore inode */ void *dirent, xfs_off_t *offset, filldir_t filldir) { xfs_dir2_data_hdr_t *hdr; /* block header */ struct xfs_buf *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_dir2_data_unused_t *dup; /* block unused entry */ char *endptr; /* end of the data entries */ int error; /* error return value */ xfs_mount_t *mp; /* filesystem mount point */ char *ptr; /* current data entry */ int wantoff; /* starting block offset */ xfs_off_t cook; mp = dp->i_mount; /* * If the block number in the offset is out of range, we're done. */ if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) return 0; error = xfs_dir2_block_read(NULL, dp, &bp); if (error) return error; /* * Extract the byte offset we start at from the seek pointer. * We'll skip entries before this. */ wantoff = xfs_dir2_dataptr_to_off(mp, *offset); hdr = bp->b_addr; xfs_dir2_data_check(dp, bp); /* * Set up values for the loop. */ btp = xfs_dir2_block_tail_p(mp, hdr); ptr = (char *)(hdr + 1); endptr = (char *)xfs_dir2_block_leaf_p(btp); /* * Loop over the data portion of the block. * Each object is a real entry (dep) or an unused one (dup). */ while (ptr < endptr) { dup = (xfs_dir2_data_unused_t *)ptr; /* * Unused, skip it. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ptr += be16_to_cpu(dup->length); continue; } dep = (xfs_dir2_data_entry_t *)ptr; /* * Bump pointer for the next iteration. */ ptr += xfs_dir2_data_entsize(dep->namelen); /* * The entry is before the desired starting point, skip it. */ if ((char *)dep - (char *)hdr < wantoff) continue; cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, (char *)dep - (char *)hdr); /* * If it didn't fit, set the final offset to here & return. */ if (filldir(dirent, (char *)dep->name, dep->namelen, cook & 0x7fffffff, be64_to_cpu(dep->inumber), DT_UNKNOWN)) { *offset = cook & 0x7fffffff; xfs_trans_brelse(NULL, bp); return 0; } } /* * Reached the end of the block. * Set the offset to a non-existent block 1 and return. */ *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & 0x7fffffff; xfs_trans_brelse(NULL, bp); return 0; }
/* * Check that the holemask and freemask of a hypothetical inode cluster match * what's actually on disk. If sparse inodes are enabled, the cluster does * not actually have to map to inodes if the corresponding holemask bit is set. * * @cluster_base is the first inode in the cluster within the @irec. */ STATIC int xchk_iallocbt_check_cluster( struct xchk_btree *bs, struct xfs_inobt_rec_incore *irec, unsigned int cluster_base) { struct xfs_imap imap; struct xfs_mount *mp = bs->cur->bc_mp; struct xfs_dinode *dip; struct xfs_buf *cluster_bp; unsigned int nr_inodes; xfs_agnumber_t agno = bs->cur->bc_private.a.agno; xfs_agblock_t agbno; unsigned int cluster_index; uint16_t cluster_mask = 0; uint16_t ir_holemask; int error = 0; nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK, mp->m_inodes_per_cluster); /* Map this inode cluster */ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base); /* Compute a bitmask for this cluster that can be used for holemask. */ for (cluster_index = 0; cluster_index < nr_inodes; cluster_index += XFS_INODES_PER_HOLEMASK_BIT) cluster_mask |= XFS_INOBT_MASK((cluster_base + cluster_index) / XFS_INODES_PER_HOLEMASK_BIT); /* * Map the first inode of this cluster to a buffer and offset. * Be careful about inobt records that don't align with the start of * the inode buffer when block sizes are large enough to hold multiple * inode chunks. When this happens, cluster_base will be zero but * ir_startino can be large enough to make im_boffset nonzero. */ ir_holemask = (irec->ir_holemask & cluster_mask); imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino); if (imap.im_boffset != 0 && cluster_base != 0) { ASSERT(imap.im_boffset == 0 || cluster_base == 0); xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return 0; } trace_xchk_iallocbt_check_cluster(mp, agno, irec->ir_startino, imap.im_blkno, imap.im_len, cluster_base, nr_inodes, cluster_mask, ir_holemask, XFS_INO_TO_OFFSET(mp, irec->ir_startino + cluster_base)); /* The whole cluster must be a hole or not a hole. */ if (ir_holemask != cluster_mask && ir_holemask != 0) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return 0; } /* If any part of this is a hole, skip it. */ if (ir_holemask) { xchk_xref_is_not_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, &XFS_RMAP_OINFO_INODES); return 0; } xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, &XFS_RMAP_OINFO_INODES); /* Grab the inode cluster buffer. */ error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &dip, &cluster_bp, 0, 0); if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error)) return error; /* Check free status of each inode within this cluster. */ for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) { struct xfs_dinode *dip; if (imap.im_boffset >= BBTOB(cluster_bp->b_length)) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); break; } dip = xfs_buf_offset(cluster_bp, imap.im_boffset); error = xchk_iallocbt_check_cluster_ifree(bs, irec, cluster_base + cluster_index, dip); if (error) break; imap.im_boffset += mp->m_sb.sb_inodesize; } xfs_trans_brelse(bs->cur->bc_tp, cluster_bp); return error; }
/* * Read the disk inode attributes into the in-core inode structure. * * For version 5 superblocks, if we are initialising a new inode and we are not * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new * inode core with a random generation number. If we are keeping inodes around, * we need to read the inode cluster to get the existing generation number off * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode * format) then log recovery is dependent on the di_flushiter field being * initialised from the current on-disk value and hence we must also read the * inode off disk. */ int xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *ip, uint iget_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; xfs_failaddr_t fa; int error; /* * Fill in the location information in the in-core inode. */ error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) return error; /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); VFS_I(ip)->i_generation = prandom_u32(); ip->i_d.di_version = 3; return 0; } /* * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); if (error) return error; /* even unallocated inodes are verified */ fa = xfs_dinode_verify(mp, ip->i_ino, dip); if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, sizeof(*dip), fa); error = -EFSCORRUPTED; goto out_brelse; } /* * If the on-disk inode is already linked to a directory * entry, copy all of the inode into the in-core inode. * xfs_iformat_fork() handles copying in the inode format * specific information. * Otherwise, just get the truly permanent information. */ if (dip->di_mode) { xfs_inode_from_disk(ip, dip); error = xfs_iformat_fork(ip, dip); if (error) { #ifdef DEBUG xfs_alert(mp, "%s: xfs_iformat() returned error %d", __func__, error); #endif /* DEBUG */ goto out_brelse; } } else { /* * Partial initialisation of the in-core inode. Just the bits * that xfs_ialloc won't overwrite or relies on being correct. */ ip->i_d.di_version = dip->di_version; VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen); ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); /* * Make sure to pull in the mode here as well in * case the inode is released without being used. * This ensures that xfs_inactive() will see that * the inode is already free and not try to mess * with the uninitialized part of it. */ VFS_I(ip)->i_mode = 0; } ASSERT(ip->i_d.di_version >= 2); ip->i_delayed_blks = 0; /* * Mark the buffer containing the inode as something to keep * around for a while. This helps to keep recently accessed * meta-data in-core longer. */ xfs_buf_set_ref(bp, XFS_INO_REF); /* * Use xfs_trans_brelse() to release the buffer containing the on-disk * inode, because it was acquired with xfs_trans_read_buf() in * xfs_imap_to_bp() above. If tp is NULL, this is just a normal * brelse(). If we're within a transaction, then xfs_trans_brelse() * will only release the buffer if it is not dirty within the * transaction. It will be OK to release the buffer in this case, * because inodes on disk are never destroyed and we will be locking the * new in-core inode before putting it in the cache where other * processes can find it. Thus we don't have to worry about the inode * being changed just because we released the buffer. */ out_brelse: xfs_trans_brelse(tp, bp); return error; }
/* * Read and/or modify the summary information for a given extent size, * bitmap block combination. * Keeps track of a current summary block, so we don't keep reading * it from the buffer cache. * * Summary information is returned in *sum if specified. * If no delta is specified, returns summary only. */ int xfs_rtmodify_summary_int( xfs_mount_t *mp, /* file system mount structure */ xfs_trans_t *tp, /* transaction pointer */ int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ xfs_buf_t **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { xfs_buf_t *bp; /* buffer for the summary block */ int error; /* error value */ xfs_fsblock_t sb; /* summary fsblock */ int so; /* index into the summary file */ xfs_suminfo_t *sp; /* pointer to returned data */ /* * Compute entry number in the summary file. */ so = XFS_SUMOFFS(mp, log, bbno); /* * Compute the block number in the summary file. */ sb = XFS_SUMOFFSTOBLOCK(mp, so); /* * If we have an old buffer, and the block number matches, use that. */ if (*rbpp && *rsb == sb) bp = *rbpp; /* * Otherwise we have to get the buffer. */ else { /* * If there was an old one, get rid of it first. */ if (*rbpp) xfs_trans_brelse(tp, *rbpp); error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); if (error) { return error; } /* * Remember this buffer and block for the next call. */ *rbpp = bp; *rsb = sb; } /* * Point to the summary information, modify/log it, and/or copy it out. */ sp = XFS_SUMPTR(mp, bp, so); if (delta) { uint first = (uint)((char *)sp - (char *)bp->b_addr); *sp += delta; xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1); } if (sum) *sum = *sp; return 0; }
/* * Add an entry to a block directory. */ int /* error */ xfs_dir2_block_addname( xfs_da_args_t *args) /* directory op arguments */ { xfs_dir2_data_hdr_t *hdr; /* block header */ xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ struct xfs_buf *bp; /* buffer for block */ xfs_dir2_block_tail_t *btp; /* block tail */ int compact; /* need to compact leaf ents */ xfs_dir2_data_entry_t *dep; /* block data entry */ xfs_inode_t *dp; /* directory inode */ xfs_dir2_data_unused_t *dup; /* block unused entry */ int error; /* error return value */ xfs_dir2_data_unused_t *enddup=NULL; /* unused at end of data */ xfs_dahash_t hash; /* hash value of found entry */ int high; /* high index for binary srch */ int highstale; /* high stale index */ int lfloghigh=0; /* last final leaf to log */ int lfloglow=0; /* first final leaf to log */ int len; /* length of the new entry */ int low; /* low index for binary srch */ int lowstale; /* low stale index */ int mid=0; /* midpoint for binary srch */ int needlog; /* need to log header */ int needscan; /* need to rescan freespace */ __be16 *tagp; /* pointer to tag value */ xfs_trans_t *tp; /* transaction structure */ trace_xfs_dir2_block_addname(args); dp = args->dp; tp = args->trans; /* Read the (one and only) directory block into bp. */ error = xfs_dir3_block_read(tp, dp, &bp); if (error) return error; len = dp->d_ops->data_entsize(args->namelen); /* * Set up pointers to parts of the block. */ hdr = bp->b_addr; btp = xfs_dir2_block_tail_p(args->geo, hdr); blp = xfs_dir2_block_leaf_p(btp); /* * Find out if we can reuse stale entries or whether we need extra * space for entry and new leaf. */ xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup, &enddup, &compact, len); /* * Done everything we need for a space check now. */ if (args->op_flags & XFS_DA_OP_JUSTCHECK) { xfs_trans_brelse(tp, bp); if (!dup) return -ENOSPC; return 0; } /* * If we don't have space for the new entry & leaf ... */ if (!dup) { /* Don't have a space reservation: return no-space. */ if (args->total == 0) return -ENOSPC; /* * Convert to the next larger format. * Then add the new entry in that format. */ error = xfs_dir2_block_to_leaf(args, bp); if (error) return error; return xfs_dir2_leaf_addname(args); } needlog = needscan = 0; /* * If need to compact the leaf entries, do it now. */ if (compact) { xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog, &lfloghigh, &lfloglow); /* recalculate blp post-compaction */ blp = xfs_dir2_block_leaf_p(btp); } else if (btp->stale) { /* * Set leaf logging boundaries to impossible state. * For the no-stale case they're set explicitly. */ lfloglow = be32_to_cpu(btp->count); lfloghigh = -1; } /* * Find the slot that's first lower than our hash value, -1 if none. */ for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) { mid = (low + high) >> 1; if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval) break; if (hash < args->hashval) low = mid + 1; else high = mid - 1; } while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) { mid--; } /* * No stale entries, will use enddup space to hold new leaf. */ if (!btp->stale) { /* * Mark the space needed for the new leaf entry, now in use. */ xfs_dir2_data_use_free(args, bp, enddup, (xfs_dir2_data_aoff_t) ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) - sizeof(*blp)), (xfs_dir2_data_aoff_t)sizeof(*blp), &needlog, &needscan); /* * Update the tail (entry count). */ be32_add_cpu(&btp->count, 1); /* * If we now need to rebuild the bestfree map, do so. * This needs to happen before the next call to use_free. */ if (needscan) { xfs_dir2_data_freescan(dp, hdr, &needlog); needscan = 0; } /* * Adjust pointer to the first leaf entry, we're about to move * the table up one to open up space for the new leaf entry. * Then adjust our index to match. */ blp--; mid++; if (mid) memmove(blp, &blp[1], mid * sizeof(*blp)); lfloglow = 0; lfloghigh = mid; } /* * Use a stale leaf for our new entry. */ else { for (lowstale = mid; lowstale >= 0 && blp[lowstale].address != cpu_to_be32(XFS_DIR2_NULL_DATAPTR); lowstale--) continue; for (highstale = mid + 1; highstale < be32_to_cpu(btp->count) && blp[highstale].address != cpu_to_be32(XFS_DIR2_NULL_DATAPTR) && (lowstale < 0 || mid - lowstale > highstale - mid); highstale++) continue; /* * Move entries toward the low-numbered stale entry. */ if (lowstale >= 0 && (highstale == be32_to_cpu(btp->count) || mid - lowstale <= highstale - mid)) { if (mid - lowstale) memmove(&blp[lowstale], &blp[lowstale + 1], (mid - lowstale) * sizeof(*blp)); lfloglow = MIN(lowstale, lfloglow); lfloghigh = MAX(mid, lfloghigh); } /* * Move entries toward the high-numbered stale entry. */ else { ASSERT(highstale < be32_to_cpu(btp->count)); mid++; if (highstale - mid) memmove(&blp[mid + 1], &blp[mid], (highstale - mid) * sizeof(*blp)); lfloglow = MIN(mid, lfloglow); lfloghigh = MAX(highstale, lfloghigh); } be32_add_cpu(&btp->stale, -1); } /* * Point to the new data entry. */ dep = (xfs_dir2_data_entry_t *)dup; /* * Fill in the leaf entry. */ blp[mid].hashval = cpu_to_be32(args->hashval); blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr( (char *)dep - (char *)hdr)); xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); /* * Mark space for the data entry used. */ xfs_dir2_data_use_free(args, bp, dup, (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), (xfs_dir2_data_aoff_t)len, &needlog, &needscan); /* * Create the new data entry. */ dep->inumber = cpu_to_be64(args->inumber); dep->namelen = args->namelen; memcpy(dep->name, args->name, args->namelen); dp->d_ops->data_put_ftype(dep, args->filetype); tagp = dp->d_ops->data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)hdr); /* * Clean up the bestfree array and log the header, tail, and entry. */ if (needscan) xfs_dir2_data_freescan(dp, hdr, &needlog); if (needlog) xfs_dir2_data_log_header(args, bp); xfs_dir2_block_log_tail(tp, bp); xfs_dir2_data_log_entry(args, bp, dep); xfs_dir3_data_check(dp, bp); return 0; }