/*
 * Allocate an inode on disk.
 * Mode is used to tell whether the new inode will need space, and whether
 * it is a directory.
 *
 * The arguments IO_agbp and alloc_done are defined to work within
 * the constraint of one allocation per transaction.
 * xfs_dialloc() is designed to be called twice if it has to do an
 * allocation to make more free inodes.  On the first call,
 * IO_agbp should be set to NULL. If an inode is available,
 * i.e., xfs_dialloc() did not need to do an allocation, an inode
 * number is returned.  In this case, IO_agbp would be set to the
 * current ag_buf and alloc_done set to false.
 * If an allocation needed to be done, xfs_dialloc would return
 * the current ag_buf in IO_agbp and set alloc_done to true.
 * The caller should then commit the current transaction, allocate a new
 * transaction, and call xfs_dialloc() again, passing in the previous
 * value of IO_agbp.  IO_agbp should be held across the transactions.
 * Since the agbp is locked across the two calls, the second call is
 * guaranteed to have a free inode available.
 *
 * Once we successfully pick an inode its number is returned and the
 * on-disk data structures are updated.  The inode itself is not read
 * in, since doing so would break ordering constraints with xfs_reclaim.
 */
int
xfs_dialloc(
	xfs_trans_t	*tp,		/* transaction pointer */
	xfs_ino_t	parent,		/* parent inode (directory) */
	mode_t		mode,		/* mode bits for new inode */
	int		okalloc,	/* ok to allocate more space */
	xfs_buf_t	**IO_agbp,	/* in/out ag header's buffer */
	boolean_t	*alloc_done,	/* true if we needed to replenish
					   inode freelist */
	xfs_ino_t	*inop)		/* inode number allocated */
{
	xfs_agnumber_t	agcount;	/* number of allocation groups */
	xfs_buf_t	*agbp;		/* allocation group header's buffer */
	xfs_agnumber_t	agno;		/* allocation group number */
	xfs_agi_t	*agi;		/* allocation group header structure */
	xfs_btree_cur_t	*cur;		/* inode allocation btree cursor */
	int		error;		/* error return value */
	int		i;		/* result code */
	int		ialloced;	/* inode allocation status */
	int		noroom = 0;	/* no space for inode blk allocation */
	xfs_ino_t	ino;		/* fs-relative inode to be returned */
	/* REFERENCED */
	int		j;		/* result code */
	xfs_mount_t	*mp;		/* file system mount structure */
	int		offset;		/* index of inode in chunk */
	xfs_agino_t	pagino;		/* parent's a.g. relative inode # */
	xfs_agnumber_t	pagno;		/* parent's allocation group number */
	xfs_inobt_rec_t	rec;		/* inode allocation record */
	xfs_agnumber_t	tagno;		/* testing allocation group number */
	xfs_btree_cur_t	*tcur;		/* temp cursor */
	xfs_inobt_rec_t	trec;		/* temp inode allocation record */


	if (*IO_agbp == NULL) {
		/*
		 * We do not have an agbp, so select an initial allocation
		 * group for inode allocation.
		 */
		agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
		/*
		 * Couldn't find an allocation group satisfying the
		 * criteria, give up.
		 */
		if (!agbp) {
			*inop = NULLFSINO;
			return 0;
		}
		agi = XFS_BUF_TO_AGI(agbp);
		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
	} else {
		/*
		 * Continue where we left off before.  In this case, we
		 * know that the allocation group has free inodes.
		 */
		agbp = *IO_agbp;
		agi = XFS_BUF_TO_AGI(agbp);
		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
		ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
	}
	mp = tp->t_mountp;
	agcount = mp->m_sb.sb_agcount;
	agno = be32_to_cpu(agi->agi_seqno);
	tagno = agno;
	pagno = XFS_INO_TO_AGNO(mp, parent);
	pagino = XFS_INO_TO_AGINO(mp, parent);

	/*
	 * If we have already hit the ceiling of inode blocks then clear
	 * okalloc so we scan all available agi structures for a free
	 * inode.
	 */

	if (mp->m_maxicount &&
	    mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
		noroom = 1;
		okalloc = 0;
	}

	/*
	 * Loop until we find an allocation group that either has free inodes
	 * or in which we can allocate some inodes.  Iterate through the
	 * allocation groups upward, wrapping at the end.
	 */
	*alloc_done = B_FALSE;
	while (!agi->agi_freecount) {
		/*
		 * Don't do anything if we're not supposed to allocate
		 * any blocks, just go on to the next ag.
		 */
		if (okalloc) {
			/*
			 * Try to allocate some new inodes in the allocation
			 * group.
			 */
			if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
				xfs_trans_brelse(tp, agbp);
				if (error == ENOSPC) {
					*inop = NULLFSINO;
					return 0;
				} else
					return error;
			}
			if (ialloced) {
				/*
				 * We successfully allocated some inodes, return
				 * the current context to the caller so that it
				 * can commit the current transaction and call
				 * us again where we left off.
				 */
				ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
				*alloc_done = B_TRUE;
				*IO_agbp = agbp;
				*inop = NULLFSINO;
				return 0;
			}
		}
		/*
		 * If it failed, give up on this ag.
		 */
		xfs_trans_brelse(tp, agbp);
		/*
		 * Go on to the next ag: get its ag header.
		 */
nextag:
		if (++tagno == agcount)
			tagno = 0;
		if (tagno == agno) {
			*inop = NULLFSINO;
			return noroom ? ENOSPC : 0;
		}
		down_read(&mp->m_peraglock);
		if (mp->m_perag[tagno].pagi_inodeok == 0) {
			up_read(&mp->m_peraglock);
			goto nextag;
		}
		error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
		up_read(&mp->m_peraglock);
		if (error)
			goto nextag;
		agi = XFS_BUF_TO_AGI(agbp);
		ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
	}
	/*
	 * Here with an allocation group that has a free inode.
	 * Reset agno since we may have chosen a new ag in the
	 * loop above.
	 */
	agno = tagno;
	*IO_agbp = NULL;
	cur = xfs_btree_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno),
				    XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
	/*
	 * If pagino is 0 (this is the root inode allocation) use newino.
	 * This must work because we've just allocated some.
	 */
	if (!pagino)
		pagino = be32_to_cpu(agi->agi_newino);
#ifdef DEBUG
	if (cur->bc_nlevels == 1) {
		int	freecount = 0;

		if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
			goto error0;
		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
		do {
			if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
					&rec.ir_freecount, &rec.ir_free, &i)))
				goto error0;
			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			freecount += rec.ir_freecount;
			if ((error = xfs_inobt_increment(cur, 0, &i)))
				goto error0;
		} while (i == 1);

		ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
		       XFS_FORCED_SHUTDOWN(mp));
	}
#endif
	/*
	 * If in the same a.g. as the parent, try to get near the parent.
	 */
	if (pagno == agno) {
		if ((error = xfs_inobt_lookup_le(cur, pagino, 0, 0, &i)))
			goto error0;
		if (i != 0 &&
		    (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
			    &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
		    j == 1 &&
		    rec.ir_freecount > 0) {
			/*
			 * Found a free inode in the same chunk
			 * as parent, done.
			 */
		}
		/*
		 * In the same a.g. as parent, but parent's chunk is full.
		 */
		else {
			int	doneleft;	/* done, to the left */
			int	doneright;	/* done, to the right */

			if (error)
				goto error0;
			ASSERT(i == 1);
			ASSERT(j == 1);
			/*
			 * Duplicate the cursor, search left & right
			 * simultaneously.
			 */
			if ((error = xfs_btree_dup_cursor(cur, &tcur)))
				goto error0;
			/*
			 * Search left with tcur, back up 1 record.
			 */
			if ((error = xfs_inobt_decrement(tcur, 0, &i)))
				goto error1;
			doneleft = !i;
			if (!doneleft) {
				if ((error = xfs_inobt_get_rec(tcur,
						&trec.ir_startino,
						&trec.ir_freecount,
						&trec.ir_free, &i)))
					goto error1;
				XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
			}
			/*
			 * Search right with cur, go forward 1 record.
			 */
			if ((error = xfs_inobt_increment(cur, 0, &i)))
				goto error1;
			doneright = !i;
			if (!doneright) {
				if ((error = xfs_inobt_get_rec(cur,
						&rec.ir_startino,
						&rec.ir_freecount,
						&rec.ir_free, &i)))
					goto error1;
				XFS_WANT_CORRUPTED_GOTO(i == 1, error1);
			}
			/*
			 * Loop until we find the closest inode chunk
			 * with a free one.
			 */
			while (!doneleft || !doneright) {
				int	useleft;  /* using left inode
						     chunk this time */

				/*
				 * Figure out which block is closer,
				 * if both are valid.
				 */
				if (!doneleft && !doneright)
					useleft =
						pagino -
						(trec.ir_startino +
						 XFS_INODES_PER_CHUNK - 1) <
						 rec.ir_startino - pagino;
				else
					useleft = !doneleft;
				/*
				 * If checking the left, does it have
				 * free inodes?
				 */
				if (useleft && trec.ir_freecount) {
					/*
					 * Yes, set it up as the chunk to use.
					 */
					rec = trec;
					xfs_btree_del_cursor(cur,
						XFS_BTREE_NOERROR);
					cur = tcur;
					break;
				}
				/*
				 * If checking the right, does it have
				 * free inodes?
				 */
				if (!useleft && rec.ir_freecount) {
					/*
					 * Yes, it's already set up.
					 */
					xfs_btree_del_cursor(tcur,
						XFS_BTREE_NOERROR);
					break;
				}
				/*
				 * If used the left, get another one
				 * further left.
				 */
				if (useleft) {
					if ((error = xfs_inobt_decrement(tcur, 0,
							&i)))
						goto error1;
					doneleft = !i;
					if (!doneleft) {
						if ((error = xfs_inobt_get_rec(
							    tcur,
							    &trec.ir_startino,
							    &trec.ir_freecount,
							    &trec.ir_free, &i)))
							goto error1;
						XFS_WANT_CORRUPTED_GOTO(i == 1,
							error1);
					}
				}
				/*
				 * If used the right, get another one
				 * further right.
				 */
				else {
					if ((error = xfs_inobt_increment(cur, 0,
							&i)))
						goto error1;
					doneright = !i;
					if (!doneright) {
						if ((error = xfs_inobt_get_rec(
							    cur,
							    &rec.ir_startino,
							    &rec.ir_freecount,
							    &rec.ir_free, &i)))
							goto error1;
						XFS_WANT_CORRUPTED_GOTO(i == 1,
							error1);
					}
				}
			}
			ASSERT(!doneleft || !doneright);
		}
	}
	/*
	 * In a different a.g. from the parent.
	 * See if the most recently allocated block has any free.
	 */
	else if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
		if ((error = xfs_inobt_lookup_eq(cur,
				be32_to_cpu(agi->agi_newino), 0, 0, &i)))
			goto error0;
		if (i == 1 &&
		    (error = xfs_inobt_get_rec(cur, &rec.ir_startino,
			    &rec.ir_freecount, &rec.ir_free, &j)) == 0 &&
		    j == 1 &&
		    rec.ir_freecount > 0) {
			/*
			 * The last chunk allocated in the group still has
			 * a free inode.
			 */
		}
		/*
		 * None left in the last group, search the whole a.g.
		 */
		else {
			if (error)
				goto error0;
			if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
				goto error0;
			ASSERT(i == 1);
			for (;;) {
				if ((error = xfs_inobt_get_rec(cur,
						&rec.ir_startino,
						&rec.ir_freecount, &rec.ir_free,
						&i)))
					goto error0;
				XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
				if (rec.ir_freecount > 0)
					break;
				if ((error = xfs_inobt_increment(cur, 0, &i)))
					goto error0;
				XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			}
		}
	}
	offset = XFS_IALLOC_FIND_FREE(&rec.ir_free);
	ASSERT(offset >= 0);
	ASSERT(offset < XFS_INODES_PER_CHUNK);
	ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
				   XFS_INODES_PER_CHUNK) == 0);
	ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset);
	XFS_INOBT_CLR_FREE(&rec, offset);
	rec.ir_freecount--;
	if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount,
			rec.ir_free)))
		goto error0;
	be32_add(&agi->agi_freecount, -1);
	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
	down_read(&mp->m_peraglock);
	mp->m_perag[tagno].pagi_freecount--;
	up_read(&mp->m_peraglock);
#ifdef DEBUG
	if (cur->bc_nlevels == 1) {
		int	freecount = 0;

		if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
			goto error0;
		do {
			if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
					&rec.ir_freecount, &rec.ir_free, &i)))
				goto error0;
			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
			freecount += rec.ir_freecount;
			if ((error = xfs_inobt_increment(cur, 0, &i)))
				goto error0;
		} while (i == 1);
		ASSERT(freecount == be32_to_cpu(agi->agi_freecount) ||
		       XFS_FORCED_SHUTDOWN(mp));
	}
#endif
	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
	*inop = ino;
	return 0;
error1:
	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
error0:
	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
	return error;
}
/*
 * Remove an entry from a block format directory.
 * If that makes the block small enough to fit in shortform, transform it.
 */
int						/* error */
xfs_dir2_block_removename(
	xfs_da_args_t		*args)		/* directory operation args */
{
	xfs_dir2_block_t	*block;		/* block structure */
	xfs_dir2_leaf_entry_t	*blp;		/* block leaf pointer */
	xfs_dabuf_t		*bp;		/* block buffer */
	xfs_dir2_block_tail_t	*btp;		/* block tail */
	xfs_dir2_data_entry_t	*dep;		/* block data entry */
	xfs_inode_t		*dp;		/* incore inode */
	int			ent;		/* block leaf entry index */
	int			error;		/* error return value */
	xfs_mount_t		*mp;		/* filesystem mount point */
	int			needlog;	/* need to log block header */
	int			needscan;	/* need to fixup bestfree */
	xfs_dir2_sf_hdr_t	sfh;		/* shortform header */
	int			size;		/* shortform size */
	xfs_trans_t		*tp;		/* transaction pointer */

	xfs_dir2_trace_args("block_removename", args);
	/*
	 * Look up the entry in the block.  Gets the buffer and entry index.
	 * It will always be there, the vnodeops level does a lookup first.
	 */
	if ((error = xfs_dir2_block_lookup_int(args, &bp, &ent))) {
		return error;
	}
	dp = args->dp;
	tp = args->trans;
	mp = dp->i_mount;
	block = bp->data;
	btp = xfs_dir2_block_tail_p(mp, block);
	blp = xfs_dir2_block_leaf_p(btp);
	/*
	 * Point to the data entry using the leaf entry.
	 */
	dep = (xfs_dir2_data_entry_t *)
	      ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
	/*
	 * Mark the data entry's space free.
	 */
	needlog = needscan = 0;
	xfs_dir2_data_make_free(tp, bp,
		(xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
	/*
	 * Fix up the block tail.
	 */
	be32_add(&btp->stale, 1);
	xfs_dir2_block_log_tail(tp, bp);
	/*
	 * Remove the leaf entry by marking it stale.
	 */
	blp[ent].address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
	xfs_dir2_block_log_leaf(tp, bp, ent, ent);
	/*
	 * Fix up bestfree, log the header if necessary.
	 */
	if (needscan)
		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
	if (needlog)
		xfs_dir2_data_log_header(tp, bp);
	xfs_dir2_data_check(dp, bp);
	/*
	 * See if the size as a shortform is good enough.
	 */
	if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
	    XFS_IFORK_DSIZE(dp)) {
		xfs_da_buf_done(bp);
		return 0;
	}
	/*
	 * If it works, do the conversion.
	 */
	return xfs_dir2_block_to_sf(args, bp, size, &sfh);
}
/*
 * Allocate new inodes in the allocation group specified by agbp.
 * Return 0 for success, else error code.
 */
STATIC int				/* error code or 0 */
xfs_ialloc_ag_alloc(
	xfs_trans_t	*tp,		/* transaction pointer */
	xfs_buf_t	*agbp,		/* alloc group buffer */
	int		*alloc)
{
	xfs_agi_t	*agi;		/* allocation group header */
	xfs_alloc_arg_t	args;		/* allocation argument structure */
	int		blks_per_cluster;  /* fs blocks per inode cluster */
	xfs_btree_cur_t	*cur;		/* inode btree cursor */
	xfs_daddr_t	d;		/* disk addr of buffer */
	int		error;
	xfs_buf_t	*fbuf;		/* new free inodes' buffer */
	xfs_dinode_t	*free;		/* new free inode structure */
	int		i;		/* inode counter */
	int		j;		/* block counter */
	int		nbufs;		/* num bufs of new inodes */
	xfs_agino_t	newino;		/* new first inode's number */
	xfs_agino_t	newlen;		/* new number of inodes */
	int		ninodes;	/* num inodes per buf */
	xfs_agino_t	thisino;	/* current inode number, for loop */
	int		version;	/* inode version number to use */
	int		isaligned = 0;	/* inode allocation at stripe unit */
					/* boundary */

	args.tp = tp;
	args.mp = tp->t_mountp;

	/*
	 * Locking will ensure that we don't have two callers in here
	 * at one time.
	 */
	newlen = XFS_IALLOC_INODES(args.mp);
	if (args.mp->m_maxicount &&
	    args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
		return XFS_ERROR(ENOSPC);
	args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
	/*
	 * First try to allocate inodes contiguous with the last-allocated
	 * chunk of inodes.  If the filesystem is striped, this will fill
	 * an entire stripe unit with inodes.
 	 */
	agi = XFS_BUF_TO_AGI(agbp);
	newino = be32_to_cpu(agi->agi_newino);
	args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
			XFS_IALLOC_BLOCKS(args.mp);
	if (likely(newino != NULLAGINO &&
		  (args.agbno < be32_to_cpu(agi->agi_length)))) {
		args.fsbno = XFS_AGB_TO_FSB(args.mp,
				be32_to_cpu(agi->agi_seqno), args.agbno);
		args.type = XFS_ALLOCTYPE_THIS_BNO;
		args.mod = args.total = args.wasdel = args.isfl =
			args.userdata = args.minalignslop = 0;
		args.prod = 1;
		args.alignment = 1;
		/*
		 * Allow space for the inode btree to split.
		 */
		args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
		if ((error = xfs_alloc_vextent(&args)))
			return error;
	} else
		args.fsbno = NULLFSBLOCK;

	if (unlikely(args.fsbno == NULLFSBLOCK)) {
		/*
		 * Set the alignment for the allocation.
		 * If stripe alignment is turned on then align at stripe unit
		 * boundary.
		 * If the cluster size is smaller than a filesystem block
		 * then we're doing I/O for inodes in filesystem block size
		 * pieces, so don't need alignment anyway.
		 */
		isaligned = 0;
		if (args.mp->m_sinoalign) {
			ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
			args.alignment = args.mp->m_dalign;
			isaligned = 1;
		} else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
			   args.mp->m_sb.sb_inoalignmt >=
			   XFS_B_TO_FSBT(args.mp,
			  	XFS_INODE_CLUSTER_SIZE(args.mp)))
				args.alignment = args.mp->m_sb.sb_inoalignmt;
		else
			args.alignment = 1;
		/*
		 * Need to figure out where to allocate the inode blocks.
		 * Ideally they should be spaced out through the a.g.
		 * For now, just allocate blocks up front.
		 */
		args.agbno = be32_to_cpu(agi->agi_root);
		args.fsbno = XFS_AGB_TO_FSB(args.mp,
				be32_to_cpu(agi->agi_seqno), args.agbno);
		/*
		 * Allocate a fixed-size extent of inodes.
		 */
		args.type = XFS_ALLOCTYPE_NEAR_BNO;
		args.mod = args.total = args.wasdel = args.isfl =
			args.userdata = args.minalignslop = 0;
		args.prod = 1;
		/*
		 * Allow space for the inode btree to split.
		 */
		args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
		if ((error = xfs_alloc_vextent(&args)))
			return error;
	}

	/*
	 * If stripe alignment is turned on, then try again with cluster
	 * alignment.
	 */
	if (isaligned && args.fsbno == NULLFSBLOCK) {
		args.type = XFS_ALLOCTYPE_NEAR_BNO;
		args.agbno = be32_to_cpu(agi->agi_root);
		args.fsbno = XFS_AGB_TO_FSB(args.mp,
				be32_to_cpu(agi->agi_seqno), args.agbno);
		if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
			args.mp->m_sb.sb_inoalignmt >=
			XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
				args.alignment = args.mp->m_sb.sb_inoalignmt;
		else
			args.alignment = 1;
		if ((error = xfs_alloc_vextent(&args)))
			return error;
	}

	if (args.fsbno == NULLFSBLOCK) {
		*alloc = 0;
		return 0;
	}
	ASSERT(args.len == args.minlen);
	/*
	 * Convert the results.
	 */
	newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0);
	/*
	 * Loop over the new block(s), filling in the inodes.
	 * For small block sizes, manipulate the inodes in buffers
	 * which are multiples of the blocks size.
	 */
	if (args.mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(args.mp)) {
		blks_per_cluster = 1;
		nbufs = (int)args.len;
		ninodes = args.mp->m_sb.sb_inopblock;
	} else {
		blks_per_cluster = XFS_INODE_CLUSTER_SIZE(args.mp) /
				   args.mp->m_sb.sb_blocksize;
		nbufs = (int)args.len / blks_per_cluster;
		ninodes = blks_per_cluster * args.mp->m_sb.sb_inopblock;
	}
	/*
	 * Figure out what version number to use in the inodes we create.
	 * If the superblock version has caught up to the one that supports
	 * the new inode format, then use the new inode version.  Otherwise
	 * use the old version so that old kernels will continue to be
	 * able to use the file system.
	 */
	if (XFS_SB_VERSION_HASNLINK(&args.mp->m_sb))
		version = XFS_DINODE_VERSION_2;
	else
		version = XFS_DINODE_VERSION_1;

	for (j = 0; j < nbufs; j++) {
		/*
		 * Get the block.
		 */
		d = XFS_AGB_TO_DADDR(args.mp, be32_to_cpu(agi->agi_seqno),
				     args.agbno + (j * blks_per_cluster));
		fbuf = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, d,
					 args.mp->m_bsize * blks_per_cluster,
					 XFS_BUF_LOCK);
		ASSERT(fbuf);
		ASSERT(!XFS_BUF_GETERROR(fbuf));
		/*
		 * Set initial values for the inodes in this buffer.
		 */
		xfs_biozero(fbuf, 0, ninodes << args.mp->m_sb.sb_inodelog);
		for (i = 0; i < ninodes; i++) {
			free = XFS_MAKE_IPTR(args.mp, fbuf, i);
			INT_SET(free->di_core.di_magic, ARCH_CONVERT, XFS_DINODE_MAGIC);
			INT_SET(free->di_core.di_version, ARCH_CONVERT, version);
			INT_SET(free->di_next_unlinked, ARCH_CONVERT, NULLAGINO);
			xfs_ialloc_log_di(tp, fbuf, i,
				XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
		}
		xfs_trans_inode_alloc_buf(tp, fbuf);
	}
	be32_add(&agi->agi_count, newlen);
	be32_add(&agi->agi_freecount, newlen);
	down_read(&args.mp->m_peraglock);
	args.mp->m_perag[be32_to_cpu(agi->agi_seqno)].pagi_freecount += newlen;
	up_read(&args.mp->m_peraglock);
	agi->agi_newino = cpu_to_be32(newino);
	/*
	 * Insert records describing the new inode chunk into the btree.
	 */
	cur = xfs_btree_init_cursor(args.mp, tp, agbp,
			be32_to_cpu(agi->agi_seqno),
			XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
	for (thisino = newino;
	     thisino < newino + newlen;
	     thisino += XFS_INODES_PER_CHUNK) {
		if ((error = xfs_inobt_lookup_eq(cur, thisino,
				XFS_INODES_PER_CHUNK, XFS_INOBT_ALL_FREE, &i))) {
			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
			return error;
		}
		ASSERT(i == 0);
		if ((error = xfs_inobt_insert(cur, &i))) {
			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
			return error;
		}
		ASSERT(i == 1);
	}
	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
	/*
	 * Log allocation group header fields
	 */
	xfs_ialloc_log_agi(tp, agbp,
		XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
	/*
	 * Modify/log superblock values for inode count and inode free count.
	 */
	xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
	*alloc = 1;
	return 0;
}
/*
 * Add an entry to a block directory.
 */
int						/* error */
xfs_dir2_block_addname(
	xfs_da_args_t		*args)		/* directory op arguments */
{
	xfs_dir2_data_free_t	*bf;		/* bestfree table in block */
	xfs_dir2_block_t	*block;		/* directory block structure */
	xfs_dir2_leaf_entry_t	*blp;		/* block leaf entries */
	xfs_dabuf_t		*bp;		/* buffer for block */
	xfs_dir2_block_tail_t	*btp;		/* block tail */
	int			compact;	/* need to compact leaf ents */
	xfs_dir2_data_entry_t	*dep;		/* block data entry */
	xfs_inode_t		*dp;		/* directory inode */
	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
	int			error;		/* error return value */
	xfs_dir2_data_unused_t	*enddup=NULL;	/* unused at end of data */
	xfs_dahash_t		hash;		/* hash value of found entry */
	int			high;		/* high index for binary srch */
	int			highstale;	/* high stale index */
	int			lfloghigh=0;	/* last final leaf to log */
	int			lfloglow=0;	/* first final leaf to log */
	int			len;		/* length of the new entry */
	int			low;		/* low index for binary srch */
	int			lowstale;	/* low stale index */
	int			mid=0;		/* midpoint for binary srch */
	xfs_mount_t		*mp;		/* filesystem mount point */
	int			needlog;	/* need to log header */
	int			needscan;	/* need to rescan freespace */
	__be16			*tagp;		/* pointer to tag value */
	xfs_trans_t		*tp;		/* transaction structure */

	xfs_dir2_trace_args("block_addname", args);
	dp = args->dp;
	tp = args->trans;
	mp = dp->i_mount;
	/*
	 * Read the (one and only) directory block into dabuf bp.
	 */
	if ((error =
	    xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &bp, XFS_DATA_FORK))) {
		return error;
	}
	ASSERT(bp != NULL);
	block = bp->data;
	/*
	 * Check the magic number, corrupted if wrong.
	 */
	if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) {
		XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
				     XFS_ERRLEVEL_LOW, mp, block);
		xfs_da_brelse(tp, bp);
		return XFS_ERROR(EFSCORRUPTED);
	}
	len = xfs_dir2_data_entsize(args->namelen);
	/*
	 * Set up pointers to parts of the block.
	 */
	bf = block->hdr.bestfree;
	btp = xfs_dir2_block_tail_p(mp, block);
	blp = xfs_dir2_block_leaf_p(btp);
	/*
	 * No stale entries?  Need space for entry and new leaf.
	 */
	if (!btp->stale) {
		/*
		 * Tag just before the first leaf entry.
		 */
		tagp = (__be16 *)blp - 1;
		/*
		 * Data object just before the first leaf entry.
		 */
		enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
		/*
		 * If it's not free then can't do this add without cleaning up:
		 * the space before the first leaf entry needs to be free so it
		 * can be expanded to hold the pointer to the new entry.
		 */
		if (be16_to_cpu(enddup->freetag) != XFS_DIR2_DATA_FREE_TAG)
			dup = enddup = NULL;
		/*
		 * Check out the biggest freespace and see if it's the same one.
		 */
		else {
			dup = (xfs_dir2_data_unused_t *)
			      ((char *)block + be16_to_cpu(bf[0].offset));
			if (dup == enddup) {
				/*
				 * It is the biggest freespace, is it too small
				 * to hold the new leaf too?
				 */
				if (be16_to_cpu(dup->length) < len + (uint)sizeof(*blp)) {
					/*
					 * Yes, we use the second-largest
					 * entry instead if it works.
					 */
					if (be16_to_cpu(bf[1].length) >= len)
						dup = (xfs_dir2_data_unused_t *)
						      ((char *)block +
						       be16_to_cpu(bf[1].offset));
					else
						dup = NULL;
				}
			} else {
				/*
				 * Not the same free entry,
				 * just check its length.
				 */
				if (be16_to_cpu(dup->length) < len) {
					dup = NULL;
				}
			}
		}
		compact = 0;
	}
	/*
	 * If there are stale entries we'll use one for the leaf.
	 * Is the biggest entry enough to avoid compaction?
	 */
	else if (be16_to_cpu(bf[0].length) >= len) {
		dup = (xfs_dir2_data_unused_t *)
		      ((char *)block + be16_to_cpu(bf[0].offset));
		compact = 0;
	}
	/*
	 * Will need to compact to make this work.
	 */
	else {
		/*
		 * Tag just before the first leaf entry.
		 */
		tagp = (__be16 *)blp - 1;
		/*
		 * Data object just before the first leaf entry.
		 */
		dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
		/*
		 * If it's not free then the data will go where the
		 * leaf data starts now, if it works at all.
		 */
		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
			if (be16_to_cpu(dup->length) + (be32_to_cpu(btp->stale) - 1) *
			    (uint)sizeof(*blp) < len)
				dup = NULL;
		} else if ((be32_to_cpu(btp->stale) - 1) * (uint)sizeof(*blp) < len)
			dup = NULL;
		else
			dup = (xfs_dir2_data_unused_t *)blp;
		compact = 1;
	}
	/*
	 * If this isn't a real add, we're done with the buffer.
	 */
	if (args->justcheck)
		xfs_da_brelse(tp, bp);
	/*
	 * If we don't have space for the new entry & leaf ...
	 */
	if (!dup) {
		/*
		 * Not trying to actually do anything, or don't have
		 * a space reservation: return no-space.
		 */
		if (args->justcheck || args->total == 0)
			return XFS_ERROR(ENOSPC);
		/*
		 * Convert to the next larger format.
		 * Then add the new entry in that format.
		 */
		error = xfs_dir2_block_to_leaf(args, bp);
		xfs_da_buf_done(bp);
		if (error)
			return error;
		return xfs_dir2_leaf_addname(args);
	}
	/*
	 * Just checking, and it would work, so say so.
	 */
	if (args->justcheck)
		return 0;
	needlog = needscan = 0;
	/*
	 * If need to compact the leaf entries, do it now.
	 * Leave the highest-numbered stale entry stale.
	 * XXX should be the one closest to mid but mid is not yet computed.
	 */
	if (compact) {
		int	fromidx;		/* source leaf index */
		int	toidx;			/* target leaf index */

		for (fromidx = toidx = be32_to_cpu(btp->count) - 1,
			highstale = lfloghigh = -1;
		     fromidx >= 0;
		     fromidx--) {
			if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) {
				if (highstale == -1)
					highstale = toidx;
				else {
					if (lfloghigh == -1)
						lfloghigh = toidx;
					continue;
				}
			}
			if (fromidx < toidx)
				blp[toidx] = blp[fromidx];
			toidx--;
		}
		lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);
		lfloghigh -= be32_to_cpu(btp->stale) - 1;
		be32_add(&btp->count, -(be32_to_cpu(btp->stale) - 1));
		xfs_dir2_data_make_free(tp, bp,
			(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
			(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
			&needlog, &needscan);
		blp += be32_to_cpu(btp->stale) - 1;
		btp->stale = cpu_to_be32(1);
		/*
		 * If we now need to rebuild the bestfree map, do so.
		 * This needs to happen before the next call to use_free.
		 */
		if (needscan) {
			xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
			needscan = 0;
		}
	}
	/*
	 * Set leaf logging boundaries to impossible state.
	 * For the no-stale case they're set explicitly.
	 */
	else if (btp->stale) {
		lfloglow = be32_to_cpu(btp->count);
		lfloghigh = -1;
	}
	/*
	 * Find the slot that's first lower than our hash value, -1 if none.
	 */
	for (low = 0, high = be32_to_cpu(btp->count) - 1; low <= high; ) {
		mid = (low + high) >> 1;
		if ((hash = be32_to_cpu(blp[mid].hashval)) == args->hashval)
			break;
		if (hash < args->hashval)
			low = mid + 1;
		else
			high = mid - 1;
	}
	while (mid >= 0 && be32_to_cpu(blp[mid].hashval) >= args->hashval) {
		mid--;
	}
	/*
	 * No stale entries, will use enddup space to hold new leaf.
	 */
	if (!btp->stale) {
		/*
		 * Mark the space needed for the new leaf entry, now in use.
		 */
		xfs_dir2_data_use_free(tp, bp, enddup,
			(xfs_dir2_data_aoff_t)
			((char *)enddup - (char *)block + be16_to_cpu(enddup->length) -
			 sizeof(*blp)),
			(xfs_dir2_data_aoff_t)sizeof(*blp),
			&needlog, &needscan);
		/*
		 * Update the tail (entry count).
		 */
		be32_add(&btp->count, 1);
		/*
		 * If we now need to rebuild the bestfree map, do so.
		 * This needs to happen before the next call to use_free.
		 */
		if (needscan) {
			xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
				&needlog);
			needscan = 0;
		}
		/*
		 * Adjust pointer to the first leaf entry, we're about to move
		 * the table up one to open up space for the new leaf entry.
		 * Then adjust our index to match.
		 */
		blp--;
		mid++;
		if (mid)
			memmove(blp, &blp[1], mid * sizeof(*blp));
		lfloglow = 0;
		lfloghigh = mid;
	}
	/*
	 * Use a stale leaf for our new entry.
	 */
	else {
		for (lowstale = mid;
		     lowstale >= 0 &&
			be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
		     lowstale--)
			continue;
		for (highstale = mid + 1;
		     highstale < be32_to_cpu(btp->count) &&
			be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
			(lowstale < 0 || mid - lowstale > highstale - mid);
		     highstale++)
			continue;
		/*
		 * Move entries toward the low-numbered stale entry.
		 */
		if (lowstale >= 0 &&
		    (highstale == be32_to_cpu(btp->count) ||
		     mid - lowstale <= highstale - mid)) {
			if (mid - lowstale)
				memmove(&blp[lowstale], &blp[lowstale + 1],
					(mid - lowstale) * sizeof(*blp));
			lfloglow = MIN(lowstale, lfloglow);
			lfloghigh = MAX(mid, lfloghigh);
		}
		/*
		 * Move entries toward the high-numbered stale entry.
		 */
		else {
			ASSERT(highstale < be32_to_cpu(btp->count));
			mid++;
			if (highstale - mid)
				memmove(&blp[mid + 1], &blp[mid],
					(highstale - mid) * sizeof(*blp));
			lfloglow = MIN(mid, lfloglow);
			lfloghigh = MAX(highstale, lfloghigh);
		}
		be32_add(&btp->stale, -1);
	}
	/*
	 * Point to the new data entry.
	 */
	dep = (xfs_dir2_data_entry_t *)dup;
	/*
	 * Fill in the leaf entry.
	 */
	blp[mid].hashval = cpu_to_be32(args->hashval);
	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
				(char *)dep - (char *)block));
	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
	/*
	 * Mark space for the data entry used.
	 */
	xfs_dir2_data_use_free(tp, bp, dup,
		(xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
		(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
	/*
	 * Create the new data entry.
	 */
	dep->inumber = cpu_to_be64(args->inumber);
	dep->namelen = args->namelen;
	memcpy(dep->name, args->name, args->namelen);
	tagp = xfs_dir2_data_entry_tag_p(dep);
	*tagp = cpu_to_be16((char *)dep - (char *)block);
	/*
	 * Clean up the bestfree array and log the header, tail, and entry.
	 */
	if (needscan)
		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
	if (needlog)
		xfs_dir2_data_log_header(tp, bp);
	xfs_dir2_block_log_tail(tp, bp);
	xfs_dir2_data_log_entry(tp, bp, dep);
	xfs_dir2_data_check(dp, bp);
	xfs_da_buf_done(bp);
	return 0;
}