示例#1
0
int
fusefs_rmdir(void *v)
{
	struct vop_rmdir_args *ap = v;
	struct vnode *vp = ap->a_vp;
	struct vnode *dvp = ap->a_dvp;
	struct componentname *cnp = ap->a_cnp;
	struct proc *p = cnp->cn_proc;
	struct fusefs_node *ip, *dp;
	struct fusefs_mnt *fmp;
	struct fusebuf *fbuf;
	int error;

	ip = VTOI(vp);
	dp = VTOI(dvp);
	fmp = (struct fusefs_mnt *)ip->ufs_ino.i_ump;

	if (!fmp->sess_init) {
		error = ENXIO;
		goto out;
	}

	if (fmp->undef_op & UNDEF_RMDIR) {
		error = ENOSYS;
		goto out;
	}

	/*
	 * No rmdir "." please.
	 */
	if (dp == ip) {
		vrele(dvp);
		vput(vp);
		return (EINVAL);
	}

	VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);

	fbuf = fb_setup(cnp->cn_namelen + 1, dp->ufs_ino.i_number,
	    FBT_RMDIR, p);
	memcpy(fbuf->fb_dat, cnp->cn_nameptr, cnp->cn_namelen);
	fbuf->fb_dat[cnp->cn_namelen] = '\0';

	error = fb_queue(fmp->dev, fbuf);

	if (error) {
		if (error == ENOSYS)
			fmp->undef_op |= UNDEF_RMDIR;
		if (error != ENOTEMPTY)
			VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK);

		fb_delete(fbuf);
		goto out;
	}

	cache_purge(dvp);
	vput(dvp);
	dvp = NULL;

	cache_purge(ITOV(ip));
	fb_delete(fbuf);
out:
	if (dvp)
		vput(dvp);
	VN_KNOTE(vp, NOTE_DELETE);
	vput(vp);
	return (error);
}
示例#2
0
/*
 * Update disk usage, and take corrective action.
 */
int
chkdq(struct inode *ip, ufs2_daddr_t change, struct ucred *cred, int flags)
{
	struct dquot *dq;
	ufs2_daddr_t ncurblocks;
	struct vnode *vp = ITOV(ip);
	int i, error, warn, do_check;

	/*
	 * Disk quotas must be turned off for system files.  Currently
	 * snapshot and quota files.
	 */
	if ((vp->v_vflag & VV_SYSTEM) != 0)
		return (0);
	/*
	 * XXX: Turn off quotas for files with a negative UID or GID.
	 * This prevents the creation of 100GB+ quota files.
	 */
	if ((int)ip->i_uid < 0 || (int)ip->i_gid < 0)
		return (0);
#ifdef DIAGNOSTIC
	if ((flags & CHOWN) == 0)
		chkdquot(ip);
#endif
	if (change == 0)
		return (0);
	if (change < 0) {
		for (i = 0; i < MAXQUOTAS; i++) {
			if ((dq = ip->i_dquot[i]) == NODQUOT)
				continue;
			DQI_LOCK(dq);
			DQI_WAIT(dq, PINOD+1, "chkdq1");
			ncurblocks = dq->dq_curblocks + change;
			if (ncurblocks >= 0)
				dq->dq_curblocks = ncurblocks;
			else
				dq->dq_curblocks = 0;
			dq->dq_flags &= ~DQ_BLKS;
			dq->dq_flags |= DQ_MOD;
			DQI_UNLOCK(dq);
		}
		return (0);
	}
	if ((flags & FORCE) == 0 &&
	    priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0))
		do_check = 1;
	else
		do_check = 0;
	for (i = 0; i < MAXQUOTAS; i++) {
		if ((dq = ip->i_dquot[i]) == NODQUOT)
			continue;
		warn = 0;
		DQI_LOCK(dq);
		DQI_WAIT(dq, PINOD+1, "chkdq2");
		if (do_check) {
			error = chkdqchg(ip, change, cred, i, &warn);
			if (error) {
				/*
				 * Roll back user quota changes when
				 * group quota failed.
				 */
				while (i > 0) {
					--i;
					dq = ip->i_dquot[i];
					if (dq == NODQUOT)
						continue;
					DQI_LOCK(dq);
					DQI_WAIT(dq, PINOD+1, "chkdq3");
					ncurblocks = dq->dq_curblocks - change;
					if (ncurblocks >= 0)
						dq->dq_curblocks = ncurblocks;
					else
						dq->dq_curblocks = 0;
					dq->dq_flags &= ~DQ_BLKS;
					dq->dq_flags |= DQ_MOD;
					DQI_UNLOCK(dq);
				}
				return (error);
			}
		}
		/* Reset timer when crossing soft limit */
		if (dq->dq_curblocks + change >= dq->dq_bsoftlimit &&
		    dq->dq_curblocks < dq->dq_bsoftlimit)
			dq->dq_btime = time_second + ITOUMP(ip)->um_btime[i];
		dq->dq_curblocks += change;
		dq->dq_flags |= DQ_MOD;
		DQI_UNLOCK(dq);
		if (warn)
			uprintf("\n%s: warning, %s disk quota exceeded\n",
			    ITOVFS(ip)->mnt_stat.f_mntonname,
			    quotatypes[i]);
	}
	return (0);
}
示例#3
0
/*
 * Balloc defines the structure of file system storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 */
ffs_balloc(
	register struct inode *ip,
	register ufs_daddr_t lbn,
	int size,
	kauth_cred_t cred,
	struct buf **bpp,
	int flags,
	int * blk_alloc)
{
	register struct fs *fs;
	register ufs_daddr_t nb;
	struct buf *bp, *nbp;
	struct vnode *vp = ITOV(ip);
	struct indir indirs[NIADDR + 2];
	ufs_daddr_t newb, *bap, pref;
	int deallocated, osize, nsize, num, i, error;
	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	int devBlockSize=0;
	int alloc_buffer = 1;
	struct mount *mp=vp->v_mount;
#if REV_ENDIAN_FS
	int rev_endian=(mp->mnt_flag & MNT_REVEND);
#endif /* REV_ENDIAN_FS */

	*bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	fs = ip->i_fs;
	if (flags & B_NOBUFF) 
		alloc_buffer = 0;

	if (blk_alloc)
		*blk_alloc = 0;

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_size);
	if (nb < NDADDR && nb < lbn) {
		/* the filesize prior to this write  can fit in direct 
		 * blocks (ie.  fragmentaion is possibly done)
		 * we are now extending the file write beyond 
		 * the block which has end of file prior to this write 
		 */
		osize = blksize(fs, ip, nb); 
		/* osize gives disk allocated size in the last block. It is 
		 * either in fragments or a file system block size */
		if (osize < fs->fs_bsize && osize > 0) {
			/* few fragments are already allocated,since the
			 * current extends beyond this block 
			 * allocate the complete block as fragments are only
			 * in last block
			 */
			error = ffs_realloccg(ip, nb,
				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
				osize, (int)fs->fs_bsize, cred, &bp);
			if (error)
				return (error);
			/* adjust the inode size we just grew */
			/* it is in nb+1 as nb starts from 0 */
			ip->i_size = (nb + 1) * fs->fs_bsize;
			ubc_setsize(vp, (off_t)ip->i_size);

			ip->i_db[nb] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp));
			ip->i_flag |= IN_CHANGE | IN_UPDATE;

			if ((flags & B_SYNC) || (!alloc_buffer)) {
				if (!alloc_buffer) 
					buf_setflags(bp, B_NOCACHE);
				buf_bwrite(bp);
			} else
				buf_bdwrite(bp);
			/* note that bp is already released here */
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) {
			if (alloc_buffer) {
			error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, NOCRED, &bp);
			if (error) {
				buf_brelse(bp);
				return (error);
			}
			*bpp = bp;
			}
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				if (alloc_buffer) {
				error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), osize, NOCRED, &bp);
				if (error) {
					buf_brelse(bp);
					return (error);
				}
				ip->i_flag |= IN_CHANGE | IN_UPDATE;
				*bpp = bp;
				return (0);
				}
				else {
					ip->i_flag |= IN_CHANGE | IN_UPDATE;
					return (0);
				}
			} else {
				error = ffs_realloccg(ip, lbn,
				    ffs_blkpref(ip, lbn, (int)lbn,
					&ip->i_db[0]), osize, nsize, cred, &bp);
				if (error)
					return (error);
				ip->i_db[lbn] = dbtofsb(fs, (ufs_daddr_t)buf_blkno(bp));
				ip->i_flag |= IN_CHANGE | IN_UPDATE;

				/* adjust the inode size we just grew */
				ip->i_size = (lbn * fs->fs_bsize) + size;
				ubc_setsize(vp, (off_t)ip->i_size);

				if (!alloc_buffer) {
					buf_setflags(bp, B_NOCACHE);
					if (flags & B_SYNC)
						buf_bwrite(bp);
					else
						buf_bdwrite(bp);
				 } else
					*bpp = bp;
				return (0);

			}
		} else {
			if (ip->i_size < (lbn + 1) * fs->fs_bsize)
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			if (alloc_buffer) {
			        bp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), nsize, 0, 0, BLK_WRITE);
				buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, newb)));

				if (flags & B_CLRBUF)
				        buf_clear(bp);
			}
			ip->i_db[lbn] = newb;
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (blk_alloc) {
				*blk_alloc = nsize;
			}
			if (alloc_buffer)
				*bpp = bp;
			return (0);
		}
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if (error = ufs_getlbns(vp, lbn, indirs, &num))
		return(error);
#if DIAGNOSTIC
	if (num < 1)
		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
#endif
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	allocib = NULL;
	allocblk = allociblk;
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
	        if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    cred, &newb))
			return (error);
		nb = newb;
		*allocblk++ = nb;
		bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META);
		buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		buf_clear(bp);
		/*
		 * Write synchronously conditional on mount flags.
		 */
		if ((vp)->v_mount->mnt_flag & MNT_ASYNC) {
			error = 0;
			buf_bdwrite(bp);
		} else if ((error = buf_bwrite(bp)) != 0) {
			goto fail;
		}
		allocib = &ip->i_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			buf_brelse(bp);
			goto fail;
		}
		bap = (ufs_daddr_t *)buf_dataptr(bp);
#if	REV_ENDIAN_FS
	if (rev_endian)
		nb = OSSwapInt32(bap[indirs[i].in_off]);
	else {
#endif	/* REV_ENDIAN_FS */
		nb = bap[indirs[i].in_off];
#if REV_ENDIAN_FS
	}
#endif /* REV_ENDIAN_FS */
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			buf_brelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
		if (error =
		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
			buf_brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META);
		buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		buf_clear(nbp);
		/*
		 * Write synchronously conditional on mount flags.
		 */
		if ((vp)->v_mount->mnt_flag & MNT_ASYNC) {
			error = 0;
			buf_bdwrite(nbp);
		} else if (error = buf_bwrite(nbp)) {
			buf_brelse(bp);
			goto fail;
		}
#if	REV_ENDIAN_FS
	if (rev_endian)
		bap[indirs[i - 1].in_off] = OSSwapInt32(nb);
	else {
#endif	/* REV_ENDIAN_FS */
		bap[indirs[i - 1].in_off] = nb;
#if	REV_ENDIAN_FS
	}
#endif	/* REV_ENDIAN_FS */
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			buf_bwrite(bp);
		} else {
			buf_bdwrite(bp);
		}
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		if (error = ffs_alloc(ip,
		    lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
			buf_brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
#if	REV_ENDIAN_FS
	if (rev_endian)
		bap[indirs[i].in_off] = OSSwapInt32(nb);
	else {
#endif	/* REV_ENDIAN_FS */
		bap[indirs[i].in_off] = nb;
#if	REV_ENDIAN_FS
	}
#endif	/* REV_ENDIAN_FS */
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if ((flags & B_SYNC)) {
			buf_bwrite(bp);
		} else {
			buf_bdwrite(bp);
		}
		if(alloc_buffer ) {
		nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE);
		buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));

		if (flags & B_CLRBUF)
			buf_clear(nbp);
		}
		if (blk_alloc) {
			*blk_alloc = fs->fs_bsize;
		}
		if(alloc_buffer) 
			*bpp = nbp;

		return (0);
	}
	buf_brelse(bp);
	if (alloc_buffer) {
	        if (flags & B_CLRBUF) {
		        error = (int)buf_bread(vp, (daddr64_t)((unsigned)lbn), (int)fs->fs_bsize, NOCRED, &nbp);
			if (error) {
			        buf_brelse(nbp);
				goto fail;
			}
		} else {
		        nbp = buf_getblk(vp, (daddr64_t)((unsigned)lbn), fs->fs_bsize, 0, 0, BLK_WRITE);
			buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		}
		*bpp = nbp;
	}
	return (0);
fail:
	/*
	 * If we have failed part way through block allocation, we
	 * have to deallocate any indirect blocks that we have allocated.
	 */
	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
		ffs_blkfree(ip, *blkp, fs->fs_bsize);
		deallocated += fs->fs_bsize;
	}
	if (allocib != NULL)
		*allocib = 0;
	if (deallocated) {
	        devBlockSize = vfs_devblocksize(mp);
#if QUOTA
		/*
		 * Restore user's disk quota because allocation failed.
		 */
		(void) chkdq(ip, (int64_t)-deallocated, cred, FORCE);
#endif /* QUOTA */
		ip->i_blocks -= btodb(deallocated, devBlockSize);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	return (error);
}
示例#4
0
/*
 * Update the access, modified, and inode change times as specified by the
 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED
 * flag is used to specify that the inode needs to be updated but that the
 * times have already been set. The access and modified times are taken from
 * the second and third parameters; the inode change time is always taken
 * from the current time. If waitfor is set, then wait for the disk write
 * of the inode to complete.
 */
int
ffs_update(struct inode *ip, struct timespec *atime, 
    struct timespec *mtime, int waitfor)
{
	struct vnode *vp;
	struct fs *fs;
	struct buf *bp;
	int error;
	struct timespec ts;

	vp = ITOV(ip);
	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
		ip->i_flag &=
		    ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
		return (0);
	}

	if ((ip->i_flag &
	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
	    waitfor != MNT_WAIT)
		return (0);

	getnanotime(&ts);

	if (ip->i_flag & IN_ACCESS) {
		DIP_ASSIGN(ip, atime, atime ? atime->tv_sec : ts.tv_sec);
		DIP_ASSIGN(ip, atimensec, atime ? atime->tv_nsec : ts.tv_nsec);
	}

	if (ip->i_flag & IN_UPDATE) {
		DIP_ASSIGN(ip, mtime, mtime ? mtime->tv_sec : ts.tv_sec);
		DIP_ASSIGN(ip, mtimensec, mtime ? mtime->tv_nsec : ts.tv_nsec);
		ip->i_modrev++;
	}

	if (ip->i_flag & IN_CHANGE) {
		DIP_ASSIGN(ip, ctime, ts.tv_sec);
		DIP_ASSIGN(ip, ctimensec, ts.tv_nsec);
	}

	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
	fs = ip->i_fs;

	/*
	 * Ensure that uid and gid are correct. This is a temporary
	 * fix until fsck has been changed to do the update.
	 */
	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) {
		ip->i_din1->di_ouid = ip->i_ffs1_uid;
		ip->i_din1->di_ogid = ip->i_ffs1_gid;
	}

	error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
		(int)fs->fs_bsize, NOCRED, &bp);
	if (error) {
		brelse(bp);
		return (error);
	}

	if (DOINGSOFTDEP(vp))
		softdep_update_inodeblock(ip, bp, waitfor);
	else if (ip->i_effnlink != DIP(ip, nlink))
		panic("ffs_update: bad link cnt");

#ifdef FFS2
	if (ip->i_ump->um_fstype == UM_UFS2)
		*((struct ufs2_dinode *)bp->b_data +
		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
	else
#endif
		*((struct ufs1_dinode *)bp->b_data +
		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;

	if (waitfor && !DOINGASYNC(vp)) {
		return (bwrite(bp));
	} else {
		bdwrite(bp);
		return (0);
	}
}
示例#5
0
/*
 * Check the inode limit, applying corrective action.
 */
int
chkiq(struct inode *ip, int change, struct ucred *cred, int flags)
{
	struct dquot *dq;
	ino_t ncurinodes;
	int i, error, warn, do_check;

#ifdef DIAGNOSTIC
	if ((flags & CHOWN) == 0)
		chkdquot(ip);
#endif
	if (change == 0)
		return (0);
	if (change < 0) {
		for (i = 0; i < MAXQUOTAS; i++) {
			if ((dq = ip->i_dquot[i]) == NODQUOT)
				continue;
			DQI_LOCK(dq);
			DQI_WAIT(dq, PINOD+1, "chkiq1");
			ncurinodes = dq->dq_curinodes + change;
			/* XXX: ncurinodes is unsigned */
			if (dq->dq_curinodes != 0 && ncurinodes >= 0)
				dq->dq_curinodes = ncurinodes;
			else
				dq->dq_curinodes = 0;
			dq->dq_flags &= ~DQ_INODS;
			dq->dq_flags |= DQ_MOD;
			DQI_UNLOCK(dq);
		}
		return (0);
	}
	if ((flags & FORCE) == 0 &&
	    priv_check_cred(cred, PRIV_VFS_EXCEEDQUOTA, 0))
		do_check = 1;
	else
		do_check = 0;
	for (i = 0; i < MAXQUOTAS; i++) {
		if ((dq = ip->i_dquot[i]) == NODQUOT)
			continue;
		warn = 0;
		DQI_LOCK(dq);
		DQI_WAIT(dq, PINOD+1, "chkiq2");
		if (do_check) {
			error = chkiqchg(ip, change, cred, i, &warn);
			if (error) {
				/*
				 * Roll back user quota changes when
				 * group quota failed.
				 */
				while (i > 0) {
					--i;
					dq = ip->i_dquot[i];
					if (dq == NODQUOT)
						continue;
					DQI_LOCK(dq);
					DQI_WAIT(dq, PINOD+1, "chkiq3");
					ncurinodes = dq->dq_curinodes - change;
					/* XXX: ncurinodes is unsigned */
					if (dq->dq_curinodes != 0 &&
					    ncurinodes >= 0)
						dq->dq_curinodes = ncurinodes;
					else
						dq->dq_curinodes = 0;
					dq->dq_flags &= ~DQ_INODS;
					dq->dq_flags |= DQ_MOD;
					DQI_UNLOCK(dq);
				}
				return (error);
			}
		}
		/* Reset timer when crossing soft limit */
		if (dq->dq_curinodes + change >= dq->dq_isoftlimit &&
		    dq->dq_curinodes < dq->dq_isoftlimit)
			dq->dq_itime = time_second + ip->i_ump->um_itime[i];
		dq->dq_curinodes += change;
		dq->dq_flags |= DQ_MOD;
		DQI_UNLOCK(dq);
		if (warn)
			uprintf("\n%s: warning, %s inode quota exceeded\n",
			    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
			    quotatypes[i]);
	}
	return (0);
}
示例#6
0
DENT_T *
vnode_iop_lookup(
    INODE_T *dir,
    struct dentry *dent,
    struct nameidata *nd
)
{
    char *name;
    mdki_boolean_t rele = FALSE;
    int err;
    VNODE_T *dvp;
    VNODE_T *rt_vnode;                  /* returned vnode */
    INODE_T *rt_inode = NULL;           /* returned inode ptr */
    DENT_T * real_dentry;
    DENT_T *found_dentry = dent;
    VATTR_T *vap;
    struct lookup_ctx ctx;
    CALL_DATA_T cd;

    ASSERT_I_SEM_MINE(dir);
    /* We can find our parent entry via the dentry provided to us. */
    ASSERT(dent->d_parent->d_inode == dir);

    if (dent->d_name.len > NAME_MAX)
        return ERR_PTR(-ENAMETOOLONG);
    name = /* drop the const */(char *) dent->d_name.name;
    mdki_linux_init_call_data(&cd);

    /* We pass along the dentry, as well as the parent inode so that
     * mvop_linux_lookup_* has everything it needs, even if it is passed in
     * the realvp, and it gets back a negative dentry.
     */
    dvp = ITOV(dir);
    ctx.dentrypp = &found_dentry;
    ctx.flags = LOOKUP_CTX_VALID;

    err = VOP_LOOKUP(dvp, name, &rt_vnode, (struct pathname *)NULL,
                     VNODE_LF_LOOKUP, NULL, &cd, &ctx);
    err = mdki_errno_unix_to_linux(err);

    if (!err) {
        ASSERT(rt_vnode != NULL);
        if (MDKI_INOISCLRVN(VTOI(rt_vnode))) {
            /* unwrap to the real object */
            ASSERT(CVN_TO_DENT(rt_vnode));
            rt_inode = CVN_TO_INO(rt_vnode);
            if (MDKI_INOISMVFS(rt_inode)) {
                VN_HOLD(ITOV(rt_inode));
                VN_RELE(rt_vnode);
                rt_vnode = ITOV(rt_inode);
            } else {
                igrab(rt_inode);
                VN_RELE(rt_vnode);
                rt_vnode = NULL;
            }
        } else
            rt_inode = VTOI(rt_vnode);
    }
    if (!err && (found_dentry != dent)) {
        mdki_linux_destroy_call_data(&cd);
        /* The hold was granted in makeloopnode() in the 'nocover' case. */
        if (rt_vnode != NULL)
            VN_RELE(rt_vnode);
        else
            iput(rt_inode);
        /*
         * found_dentry is the real socket/block/char device node's dentry.
         * See mvop_linux_lookup_component().
         *
         * For sockets, we use a dentry in our tree (we fill in the
         * provided dentry "dent") linked to the inode of the real
         * object.  This lets file name operations work in our
         * namespace, and lets socket connections all work (as they're
         * keyed off of the inode address) from inside to outside &
         * v.v.
         *
         * We also do this for VCHR, VBLK devices, and it seems to work OK
         * (e.g. make a node the same as /dev/tty, you can write to it)
         */
        switch (found_dentry->d_inode->i_mode & S_IFMT) {
          case S_IFSOCK:
          case S_IFCHR:
          case S_IFBLK:
            ASSERT(dent->d_inode == NULL);
            MDKI_SET_DOPS(dent, &vnode_shadow_dentry_ops);
            igrab(found_dentry->d_inode);
            VNODE_D_ADD(dent, found_dentry->d_inode);
            VNODE_DPUT(found_dentry);
            found_dentry = NULL; /* tell caller to use original dentry */
            break;
          default:
            /* use returned dentry */
            break;
        }
        return(found_dentry);
    }

    /* We need to pass back dentry ops even for negative dentries, I think.
     * Shadow inodes will have been taken care of in lookup_component.
     */
    if (dent->d_op != &vnode_shadow_dentry_ops) {
        if (dent->d_parent->d_op == &vnode_setview_dentry_ops)
            MDKI_SET_DOPS(dent, &vnode_setview_dentry_ops);
        else
            MDKI_SET_DOPS(dent, &vnode_dentry_ops);
    }
    vap = VATTR_ALLOC();
    if (vap == NULL) {
        err = -ENOMEM;
        goto alloc_err;
    }
    if (!err && MDKI_INOISMVFS(rt_inode)) {
        /* fetch attributes & place in inode */
        VATTR_SET_MASK(vap, AT_ALL);
        err = VOP_GETATTR(rt_vnode, vap, GETATTR_FLAG_UPDATE_ATTRS, &cd);
        err = mdki_errno_unix_to_linux(err);
        if (err == -EOPNOTSUPP)          /* ignore it */
            err = 0;
        else if (err)
            rele = TRUE;
        else if ((rt_vnode->v_flag & VLOOPROOT) != 0 &&
                 rt_inode == vnlayer_get_urdir_inode())
        {
            /* return the real root */
            VN_RELE(rt_vnode);
            VATTR_FREE(vap);
            mdki_linux_destroy_call_data(&cd);
            return VNODE_DGET(vnlayer_get_root_dentry());
        }
        else if (vnlayer_looproot_vp != NULL &&
                 rt_vnode == vnlayer_looproot_vp &&
                 (real_dentry = MVOP_DENT(rt_inode,
                                          &vnode_dentry_ops)) != NULL)
        {
            /* return the real /view */
            VN_RELE(rt_vnode);
            VATTR_FREE(vap);
            mdki_linux_destroy_call_data(&cd);
            return real_dentry;
        }
    }
    VATTR_FREE(vap);
alloc_err:
    mdki_linux_destroy_call_data(&cd);

    /* It's an mnode-based object, set up a dentry for it */

    /* We don't return ENOENT.  For Linux, the negative dentry is enough */
    switch (err) {
      case -ENOENT:
        err = 0;
        ASSERT(rt_inode == NULL);
        VNODE_D_ADD(dent, rt_inode);
        break;
      case 0:
        /* We will consume the count on rt_inode as a reference for dent */
        /*
         * For VOB vnodes, we maintain two separate dentry trees for
         * the vnodes.  One tree is for setview-mode names (process
         * sets to a view context, then looks directly at the VOB
         * mountpoint without any cover vnodes in the path).  The
         * other tree is for view-extended naming into a VOB, with
         * dentries starting at the view tag and covering non-VOB
         * objects until crossing a mount point into a VOB.
         *
         * Mostly the system doesn't care, as long as it goes down the
         * tree from parent to child, since it will be traversing only one
         * of the dentry trees.  But when the cache misses, the system calls
         * this lookup method and wants to get a dentry in return.
         * There are standard interfaces ( d_splice_alias() in 2.6) 
         * which can find a good dentry  referencing the inode returned
         * by the file system's lookup method, but these methods don't 
         * work right when we have VOB directory vnodes with both setview 
         * and view-extended dentries.  We implement our own function
         * [vnlayer_inode2dentry_internal()] which knows the
         * distinctions and the rules for determining that an existing
         * attached dentry is valid for the lookup request.
         *
         * We have our own d_compare() function which forces all VOB
         * lookups to come to the inode lookup method (this function),
         * and then we get to choose the right dentry to return.  We
         * have our own lookup cache inside MVFS so we don't care that
         * the dentry cache is always missing on our names.
         *
         * If we have to make a new dentry, we may need to merge it
         * with an NFS-created temporary dentry using d_move()
         * (d_splice_alias() would do this for us, but we can't use it
         * for reasons listed above).
         */
        /*
         * We want to find the "right" dentry (if there is one), so
         * look for one that has a d_parent with the same dentry ops
         * (indicating it's in the same dentry tree).
         */
        if (S_ISDIR(rt_inode->i_mode)) {
            /*
             * It has been empirically shown that we have to check the 
             * parent of the dentry.  If the parent has been checked out
             * it is possible for the cache lookup to return an inode
             * from the tree below the old parent directory.  If this 
             * happens on a rename, the system will panic because the
             * Linux rename code checks the parent of the returned
             * dentry to see that it matches what it has for a parent.
             */
            found_dentry = vnlayer_inode2dentry_internal(rt_inode,
                                                         dent->d_parent, NULL,
                                                         dent->d_op);
        } else {
            /*
             * For non-directories, we also need to consider the
             * parent & the requested name so that
             * vnlayer_inode2dentry_internal() finds the right dentry.
             * (There may be multiple hard links; we want the one in
             * the same directory with the same name)
             */
            found_dentry = vnlayer_inode2dentry_internal(rt_inode,
                                                         dent->d_parent,
                                                         &dent->d_name,
                                                         dent->d_op);
        }
        if (found_dentry != NULL) {
            ASSERT(found_dentry->d_inode == rt_inode);
            /*
             * If the existing one is a disconnected dentry, we need
             * to move the old one to the new one (just like
             * d_splice_alias) to get the proper name/parent attached
             * in the dcache.
             */
            if ((found_dentry->d_flags & DCACHE_DISCONNECTED) != 0) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7)
                ASSERT((dent->d_flags & DCACHE_UNHASHED) != 0);
#else
                ASSERT((dent->d_vfs_flags & DCACHE_UNHASHED) != 0);
#endif
                d_rehash(dent);
                d_move(found_dentry, dent);
            } 
            /* Release our count.  found_dentry also references inode. */
            iput(rt_inode);
            return found_dentry;
        }
        /*
         * Nothing suitable, wire it up to the proposed dentry.
         */
        VNODE_D_ADD(dent, rt_inode);
        break;
      default:
        /* some other error case */
        if (rele)
            VN_RELE(rt_vnode);
        break;
    }
    if (err)
        return ERR_PTR(err);
    else
        return NULL;
}
示例#7
0
/*
 * Truncate the inode oip to at most length size, freeing the
 * disk blocks.
 */
int
ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
{
	struct vnode *ovp;
	daddr64_t lastblock, datablocks;
	daddr64_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
	daddr64_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
	struct fs *fs;
	struct buf *bp;
	int offset, size, level;
	long count, nblocks, vflags, blocksreleased = 0;
	int i, aflags, error, allerror, needextclean = 0;
	off_t osize;
#ifdef FFS2
	daddr64_t extblocks;
	int softdepslowdown;
#endif

	if (length < 0)
		return (EINVAL);

	ovp = ITOV(oip);
	fs = oip->i_fs;

	if (ovp->v_type != VREG &&
	    ovp->v_type != VDIR &&
	    ovp->v_type != VLNK)
		return (0);

	/*
	 * Historically clients did not have to specify which data they were
	 * truncating. So, if not specified, we assume traditional behavior,
	 * e.g., just the normal data.
	 */
	if ((flags & (IO_EXT | IO_NORMAL)) == 0)
		flags |= IO_NORMAL;

	if (DIP(oip, size) == length && !(flags & IO_EXT))
		return (0);

	datablocks = DIP(oip, blocks);

#ifdef FFS2
	/*
	 * If we are truncating the extended-attributes, and cannot do it with
	 * soft updates, then do it slowly here. If we are truncating both the
	 * extended attributes and the file contents (e.g., the file is being
	 * unlinked), then pick it off with soft updates below.
	 */
	needextclean = 0;
	softdepslowdown = DOINGSOFTDEP(ovp) && softdep_slowdown(ovp);
	extblocks = 0;
	if (fs->fs_magic == FS_UFS2_MAGIC && oip->i_ffs2_extsize > 0) {
		extblocks = btodb(fragroundup(fs, oip->i_ffs2_extsize));
		datablocks -= extblocks;
	}
	if ((flags & IO_EXT) && extblocks > 0) {
		if (DOINGSOFTDEP(ovp) && softdepslowdown == 0 && length == 0) {
			if ((flags & IO_NORMAL) == 0) {
				softdep_setup_freeblocks(oip, length, IO_EXT);
				return (0);
			}
			needextclean = 1;
		} else {
#ifdef DIAGNOSTIC
			if (length != 0)
				panic("ffs_truncate: partial truncation of "
				    "extended attributes");
#endif
			error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc);
			if (error)
				return (error);
			osize = oip->i_ffs2_extsize;
			oip->i_ffs2_blocks -= extblocks;
			(void)ufs_quota_free_blocks(oip, extblocks, NOCRED);
			(void) vinvalbuf(ovp, V_EXT, cred, curproc, 0, 0);
			oip->i_ffs2_extsize = 0;
			for (i = 0; i < NXADDR; i++) {
				oldblks[i] = oip->i_ffs2_extb[i];
				oip->i_ffs2_extb[i] = 0;
			}
			oip->i_flag |= IN_CHANGE | IN_UPDATE;
			error = UFS_UPDATE(oip, MNT_WAIT);
			if (error)
				return (error);
			for (i = 0; i < NXADDR; i++) {
				if (oldblks[i] == 0)
					continue;
				ffs_blkfree(oip, oldblks[i],
				    sblksize(fs, osize, i));
			}
		}
	}

	if (!(flags & IO_NORMAL))
		return (0); /* Nothing else to do. */
#endif /* FFS2 */

	if (ovp->v_type == VLNK &&
	    (DIP(oip, size) < ovp->v_mount->mnt_maxsymlinklen ||
	     (ovp->v_mount->mnt_maxsymlinklen == 0 &&
	      datablocks == 0))) {
#ifdef DIAGNOSTIC
		if (length != 0)
			panic("ffs_truncate: partial truncate of symlink");
#endif
		memset(SHORTLINK(oip), 0, (size_t) DIP(oip, size));
		DIP_ASSIGN(oip, size, 0);
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
#ifdef FFS2
		if (needextclean)
			softdep_setup_freeblocks(oip, length, IO_EXT);
#endif
		return (UFS_UPDATE(oip, MNT_WAIT));
	}

	if ((error = getinoquota(oip)) != 0)
		return (error);

	uvm_vnp_setsize(ovp, length);
	oip->i_ci.ci_lasta = oip->i_ci.ci_clen 
	    = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0;

	if (DOINGSOFTDEP(ovp)) {
		if (length > 0 || softdep_slowdown(ovp)) {
			/*
			 * If a file is only partially truncated, then
			 * we have to clean up the data structures
			 * describing the allocation past the truncation
			 * point. Finding and deallocating those structures
			 * is a lot of work. Since partial truncation occurs
			 * rarely, we solve the problem by syncing the file
			 * so that it will have no data structures left.
			 */
			if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT,
					       curproc)) != 0)
				return (error);
		} else {
			(void)ufs_quota_free_blocks(oip, datablocks, NOCRED);
			softdep_setup_freeblocks(oip, length, needextclean ?
			    IO_EXT | IO_NORMAL : IO_NORMAL);
			(void) vinvalbuf(ovp, needextclean ? 0 : V_NORMAL,
			    cred, curproc, 0, 0);
			oip->i_flag |= IN_CHANGE | IN_UPDATE;
			return (UFS_UPDATE(oip, 0));
		}
	}

	osize = DIP(oip, size);
	/*
	 * Lengthen the size of the file. We must ensure that the
	 * last byte of the file is allocated. Since the smallest
	 * value of osize is 0, length will be at least 1.
	 */
	if (osize < length) {
		if (length > fs->fs_maxfilesize)
			return (EFBIG);
		aflags = B_CLRBUF;
		if (flags & IO_SYNC)
			aflags |= B_SYNC;
		error = UFS_BUF_ALLOC(oip, length - 1, 1, 
				   cred, aflags, &bp);
		if (error)
			return (error);
		DIP_ASSIGN(oip, size, length);
		uvm_vnp_setsize(ovp, length);
		(void) uvm_vnp_uncache(ovp);
		if (aflags & B_SYNC)
			bwrite(bp);
		else
			bawrite(bp);
		oip->i_flag |= IN_CHANGE | IN_UPDATE;
		return (UFS_UPDATE(oip, MNT_WAIT));
	}
	uvm_vnp_setsize(ovp, length);

	/*
	 * Shorten the size of the file. If the file is not being
	 * truncated to a block boundary, the contents of the
	 * partial block following the end of the file must be
	 * zero'ed in case it ever becomes accessible again because
	 * of subsequent file growth. Directories however are not
	 * zero'ed as they should grow back initialized to empty.
	 */
	offset = blkoff(fs, length);
	if (offset == 0) {
		DIP_ASSIGN(oip, size, length);
	} else {
		lbn = lblkno(fs, length);
		aflags = B_CLRBUF;
		if (flags & IO_SYNC)
			aflags |= B_SYNC;
		error = UFS_BUF_ALLOC(oip, length - 1, 1,
				   cred, aflags, &bp);
		if (error)
			return (error);
		/*
		 * When we are doing soft updates and the UFS_BALLOC
		 * above fills in a direct block hole with a full sized
		 * block that will be truncated down to a fragment below,
		 * we must flush out the block dependency with an FSYNC
		 * so that we do not get a soft updates inconsistency
		 * when we create the fragment below.
		 */
		if (DOINGSOFTDEP(ovp) && lbn < NDADDR &&
		    fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
		    (error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc)) != 0)
			return (error);
		DIP_ASSIGN(oip, size, length);
		size = blksize(fs, oip, lbn);
		(void) uvm_vnp_uncache(ovp);
		if (ovp->v_type != VDIR)
			bzero((char *)bp->b_data + offset,
			      (u_int)(size - offset));
		bp->b_bcount = size;
		if (aflags & B_SYNC)
			bwrite(bp);
		else
			bawrite(bp);
	}
	/*
	 * Calculate index into inode's block list of
	 * last direct and indirect blocks (if any)
	 * which we want to keep.  Lastblock is -1 when
	 * the file is truncated to 0.
	 */
	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
	lastiblock[SINGLE] = lastblock - NDADDR;
	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
	nblocks = btodb(fs->fs_bsize);

	/*
	 * Update file and block pointers on disk before we start freeing
	 * blocks.  If we crash before free'ing blocks below, the blocks
	 * will be returned to the free list.  lastiblock values are also
	 * normalized to -1 for calls to ffs_indirtrunc below.
	 */
	for (level = TRIPLE; level >= SINGLE; level--) {
		oldblks[NDADDR + level] = DIP(oip, ib[level]);
		if (lastiblock[level] < 0) {
			DIP_ASSIGN(oip, ib[level], 0);
			lastiblock[level] = -1;
		}
	}

	for (i = 0; i < NDADDR; i++) {
		oldblks[i] = DIP(oip, db[i]);
		if (i > lastblock)
			DIP_ASSIGN(oip, db[i], 0);
	}

	oip->i_flag |= IN_CHANGE | IN_UPDATE;
	if ((error = UFS_UPDATE(oip, MNT_WAIT)) != 0)
		allerror = error;

	/*
	 * Having written the new inode to disk, save its new configuration
	 * and put back the old block pointers long enough to process them.
	 * Note that we save the new block configuration so we can check it
	 * when we are done.
	 */
	for (i = 0; i < NDADDR; i++) {
		newblks[i] = DIP(oip, db[i]);
		DIP_ASSIGN(oip, db[i], oldblks[i]);
	}

	for (i = 0; i < NIADDR; i++) {
		newblks[NDADDR + i] = DIP(oip, ib[i]);
		DIP_ASSIGN(oip, ib[i], oldblks[NDADDR + i]);
	}

	DIP_ASSIGN(oip, size, osize);
	vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
	allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0);

	/*
	 * Indirect blocks first.
	 */
	indir_lbn[SINGLE] = -NDADDR;
	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
	for (level = TRIPLE; level >= SINGLE; level--) {
		bn = DIP(oip, ib[level]);
		if (bn != 0) {
			error = ffs_indirtrunc(oip, indir_lbn[level],
			    fsbtodb(fs, bn), lastiblock[level], level, &count);
			if (error)
				allerror = error;
			blocksreleased += count;
			if (lastiblock[level] < 0) {
				DIP_ASSIGN(oip, ib[level], 0);
				ffs_blkfree(oip, bn, fs->fs_bsize);
				blocksreleased += nblocks;
			}
		}
		if (lastiblock[level] >= 0)
			goto done;
	}

	/*
	 * All whole direct blocks or frags.
	 */
	for (i = NDADDR - 1; i > lastblock; i--) {
		long bsize;

		bn = DIP(oip, db[i]);
		if (bn == 0)
			continue;

		DIP_ASSIGN(oip, db[i], 0);
		bsize = blksize(fs, oip, i);
		ffs_blkfree(oip, bn, bsize);
		blocksreleased += btodb(bsize);
	}
	if (lastblock < 0)
		goto done;

	/*
	 * Finally, look for a change in size of the
	 * last direct block; release any frags.
	 */
	bn = DIP(oip, db[lastblock]);
	if (bn != 0) {
		long oldspace, newspace;

		/*
		 * Calculate amount of space we're giving
		 * back as old block size minus new block size.
		 */
		oldspace = blksize(fs, oip, lastblock);
		DIP_ASSIGN(oip, size, length);
		newspace = blksize(fs, oip, lastblock);
		if (newspace == 0)
			panic("ffs_truncate: newspace");
		if (oldspace - newspace > 0) {
			/*
			 * Block number of space to be free'd is
			 * the old block # plus the number of frags
			 * required for the storage we're keeping.
			 */
			bn += numfrags(fs, newspace);
			ffs_blkfree(oip, bn, oldspace - newspace);
			blocksreleased += btodb(oldspace - newspace);
		}
	}
done:
#ifdef DIAGNOSTIC
	for (level = SINGLE; level <= TRIPLE; level++)
		if (newblks[NDADDR + level] != DIP(oip, ib[level]))
			panic("ffs_truncate1");
	for (i = 0; i < NDADDR; i++)
		if (newblks[i] != DIP(oip, db[i]))
			panic("ffs_truncate2");
#endif /* DIAGNOSTIC */
	/*
	 * Put back the real size.
	 */
	DIP_ASSIGN(oip, size, length);
	DIP_ADD(oip, blocks, -blocksreleased);
	if (DIP(oip, blocks) < 0)	/* Sanity */
		DIP_ASSIGN(oip, blocks, 0);
	oip->i_flag |= IN_CHANGE;
	(void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED);
	return (allerror);
}
示例#8
0
/*
 * Balloc defines the structure of filesystem storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 */
int
ext2_balloc(struct inode *ip, e2fs_lbn_t lbn, int size, struct ucred *cred,
    struct buf **bpp, int flags)
{
	struct m_ext2fs *fs;
	struct ext2mount *ump;
	struct buf *bp, *nbp;
	struct vnode *vp = ITOV(ip);
	struct indir indirs[EXT2_NIADDR + 2];
	e4fs_daddr_t nb, newb;
	e2fs_daddr_t *bap, pref;
	int osize, nsize, num, i, error;

	*bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	fs = ip->i_e2fs;
	ump = ip->i_ump;

	/*
	 * check if this is a sequential block allocation.
	 * If so, increment next_alloc fields to allow ext2_blkpref
	 * to make a good guess
	 */
	if (lbn == ip->i_next_alloc_block + 1) {
		ip->i_next_alloc_block++;
		ip->i_next_alloc_goal++;
	}

	if (ip->i_flag & IN_E4EXTENTS)
		return (ext2_ext_balloc(ip, lbn, size, cred, bpp, flags));

	/*
	 * The first EXT2_NDADDR blocks are direct blocks
	 */
	if (lbn < EXT2_NDADDR) {
		nb = ip->i_db[lbn];
		/*
		 * no new block is to be allocated, and no need to expand
		 * the file
		 */
		if (nb != 0 && ip->i_size >= (lbn + 1) * fs->e2fs_bsize) {
			error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lbn, osize, NOCRED, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
			} else {
				/*
				 * Godmar thinks: this shouldn't happen w/o
				 * fragments
				 */
				printf("nsize %d(%d) > osize %d(%d) nb %d\n",
				    (int)nsize, (int)size, (int)osize,
				    (int)ip->i_size, (int)nb);
				panic(
				    "ext2_balloc: Something is terribly wrong");
/*
 * please note there haven't been any changes from here on -
 * FFS seems to work.
 */
			}
		} else {
			if (ip->i_size < (lbn + 1) * fs->e2fs_bsize)
				nsize = fragroundup(fs, size);
			else
				nsize = fs->e2fs_bsize;
			EXT2_LOCK(ump);
			error = ext2_alloc(ip, lbn,
			    ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0),
			    nsize, cred, &newb);
			if (error)
				return (error);
			/*
			 * If the newly allocated block exceeds 32-bit limit,
			 * we can not use it in file block maps.
			 */
			if (newb > UINT_MAX)
				return (EFBIG);
			bp = getblk(vp, lbn, nsize, 0, 0, 0);
			bp->b_blkno = fsbtodb(fs, newb);
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
		}
		ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ext2_getlbns(vp, lbn, indirs, &num)) != 0)
		return (error);
#ifdef INVARIANTS
	if (num < 1)
		panic("ext2_balloc: ext2_getlbns returned indirect block");
#endif
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	if (nb == 0) {
		EXT2_LOCK(ump);
		pref = ext2_blkpref(ip, lbn, indirs[0].in_off +
		    EXT2_NDIR_BLOCKS, &ip->i_db[0], 0);
		if ((error = ext2_alloc(ip, lbn, pref, fs->e2fs_bsize, cred,
		    &newb)))
			return (error);
		if (newb > UINT_MAX)
			return (EFBIG);
		nb = newb;
		bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0, 0);
		bp->b_blkno = fsbtodb(fs, newb);
		vfs_bio_clrbuf(bp);
		/*
		 * Write synchronously so that indirect blocks
		 * never point at garbage.
		 */
		if ((error = bwrite(bp)) != 0) {
			ext2_blkfree(ip, nb, fs->e2fs_bsize);
			return (error);
		}
		ip->i_ib[indirs[0].in_off] = newb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp,
		    indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp);
		if (error) {
			brelse(bp);
			return (error);
		}
		bap = (e2fs_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		EXT2_LOCK(ump);
		if (pref == 0)
			pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap,
			    bp->b_lblkno);
		error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb);
		if (error) {
			brelse(bp);
			return (error);
		}
		if (newb > UINT_MAX)
			return (EFBIG);
		nb = newb;
		nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(nbp);
		/*
		 * Write synchronously so that indirect blocks
		 * never point at garbage.
		 */
		if ((error = bwrite(nbp)) != 0) {
			ext2_blkfree(ip, nb, fs->e2fs_bsize);
			EXT2_UNLOCK(ump);
			brelse(bp);
			return (error);
		}
		bap[indirs[i - 1].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->e2fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		EXT2_LOCK(ump);
		pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0],
		    bp->b_lblkno);
		if ((error = ext2_alloc(ip,
		    lbn, pref, (int)fs->e2fs_bsize, cred, &newb)) != 0) {
			brelse(bp);
			return (error);
		}
		if (newb > UINT_MAX)
			return (EFBIG);
		nb = newb;
		nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		if (flags & BA_CLRBUF)
			vfs_bio_clrbuf(nbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->e2fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		*bpp = nbp;
		return (0);
	}
	brelse(bp);
	if (flags & BA_CLRBUF) {
		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;

		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, ip->i_size, lbn,
			    (int)fs->e2fs_bsize, NOCRED,
			    MAXBSIZE, seqcount, 0, &nbp);
		} else {
			error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp);
		}
		if (error) {
			brelse(nbp);
			return (error);
		}
	} else {
示例#9
0
static int
ext2_ext_balloc(struct inode *ip, uint32_t lbn, int size,
    struct ucred *cred, struct buf **bpp, int flags)
{
	struct m_ext2fs *fs;
	struct buf *bp = NULL;
	struct vnode *vp = ITOV(ip);
	daddr_t newblk;
	int osize, nsize, blks, error, allocated;

	fs = ip->i_e2fs;
	blks = howmany(size, fs->e2fs_bsize);

	error = ext4_ext_get_blocks(ip, lbn, blks, cred, NULL, &allocated, &newblk);
	if (error)
		return (error);

	if (allocated) {
		if (ip->i_size < (lbn + 1) * fs->e2fs_bsize)
			nsize = fragroundup(fs, size);
		else
			nsize = fs->e2fs_bsize;

		bp = getblk(vp, lbn, nsize, 0, 0, 0);
		if(!bp)
			return (EIO);

		bp->b_blkno = fsbtodb(fs, newblk);
		if (flags & BA_CLRBUF)
			vfs_bio_clrbuf(bp);
	} else {
		if (ip->i_size >= (lbn + 1) * fs->e2fs_bsize) {

			error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, newblk);
			*bpp = bp;
			return (0);
		}

		/*
		 * Consider need to reallocate a fragment.
		 */
		osize = fragroundup(fs, blkoff(fs, ip->i_size));
		nsize = fragroundup(fs, size);
		if (nsize <= osize)
			error = bread(vp, lbn, osize, NOCRED, &bp);
		else
			error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp);
		if (error) {
			brelse(bp);
			return (error);
		}
		bp->b_blkno = fsbtodb(fs, newblk);
	}

	*bpp = bp;

	return (error);
}
示例#10
0
/*
 * Set the quota file up for a particular file system.
 * Called as the result of a quotaon (Q_QUOTAON) ioctl.
 */
static int
opendq(
	struct ufsvfs *ufsvfsp,
	struct vnode *vp,		/* quota file */
	struct cred *cr)
{
	struct inode *qip;
	struct dquot *dqp;
	int error;
	int quotaon = 0;

	if (secpolicy_fs_quota(cr, ufsvfsp->vfs_vfs) != 0)
		return (EPERM);

	VN_HOLD(vp);

	/*
	 * Check to be sure its a regular file.
	 */
	if (vp->v_type != VREG) {
		VN_RELE(vp);
		return (EACCES);
	}

	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER);

	/*
	 * We have vfs_dqrwlock as writer, so if quotas are disabled,
	 * then vfs_qinod should be NULL or we have a race somewhere.
	 */
	ASSERT((ufsvfsp->vfs_qflags & MQ_ENABLED) || (ufsvfsp->vfs_qinod == 0));

	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) != 0) {
		/*
		 * Quotas are already enabled on this file system.
		 *
		 * If the "quotas" file was replaced (different inode)
		 * while quotas were enabled we don't want to re-enable
		 * them with a new "quotas" file. Simply print a warning
		 * message to the console, release the new vnode, and
		 * return.
		 * XXX - The right way to fix this is to return EBUSY
		 * for the ioctl() issued by 'quotaon'.
		 */
		if (VTOI(vp) != ufsvfsp->vfs_qinod) {
			cmn_err(CE_WARN, "Previous quota file still in use."
			    " Disable quotas on %s before enabling.\n",
			    VTOI(vp)->i_fs->fs_fsmnt);
			VN_RELE(vp);
			rw_exit(&ufsvfsp->vfs_dqrwlock);
			return (0);
		}
		(void) quotasync(ufsvfsp, /* do_lock */ 0);
		/* remove extra hold on quota file */
		VN_RELE(vp);
		quotaon++;
		qip = ufsvfsp->vfs_qinod;
	} else {
		int qlen;

		ufsvfsp->vfs_qinod = VTOI(vp);
		qip = ufsvfsp->vfs_qinod;
		/*
		 * Force the file to have no partially allocated blocks
		 * to prevent a realloc from changing the location of
		 * the data. We must do this even if not logging in
		 * case we later remount to logging.
		 */
		qlen = qip->i_fs->fs_bsize * NDADDR;

		/*
		 * Largefiles: i_size needs to be atomically accessed now.
		 */
		rw_enter(&qip->i_contents, RW_WRITER);
		if (qip->i_size < qlen) {
			if (ufs_itrunc(qip, (u_offset_t)qlen, (int)0, cr) != 0)
				cmn_err(CE_WARN, "opendq failed to remove frags"
				    " from quota file\n");
			rw_exit(&qip->i_contents);
			(void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)qip->i_size,
			    B_INVAL, kcred, NULL);
		} else {
			rw_exit(&qip->i_contents);
		}
		TRANS_MATA_IGET(ufsvfsp, qip);
	}

	/*
	 * The file system time limits are in the dquot for uid 0.
	 * The time limits set the relative time the other users
	 * can be over quota for this file system.
	 * If it is zero a default is used (see quota.h).
	 */
	error = getdiskquota((uid_t)0, ufsvfsp, 1, &dqp);
	if (error == 0) {
		mutex_enter(&dqp->dq_lock);
		ufsvfsp->vfs_btimelimit =
		    (dqp->dq_btimelimit? dqp->dq_btimelimit: DQ_BTIMELIMIT);
		ufsvfsp->vfs_ftimelimit =
		    (dqp->dq_ftimelimit? dqp->dq_ftimelimit: DQ_FTIMELIMIT);

		ufsvfsp->vfs_qflags = MQ_ENABLED;	/* enable quotas */
		vfs_setmntopt(ufsvfsp->vfs_vfs, MNTOPT_QUOTA, NULL, 0);
		dqput(dqp);
		mutex_exit(&dqp->dq_lock);
	} else if (!quotaon) {
		/*
		 * Some sort of I/O error on the quota file, and quotas were
		 * not already on when we got here so clean up.
		 */
		ufsvfsp->vfs_qflags = 0;
		ufsvfsp->vfs_qinod = NULL;
		VN_RELE(ITOV(qip));
	}

	/*
	 * If quotas are enabled update all valid inodes in the
	 * cache with quota information.
	 */
	if (ufsvfsp->vfs_qflags & MQ_ENABLED) {
		(void) ufs_scan_inodes(0, opendq_scan_inode, ufsvfsp, ufsvfsp);
	}

	rw_exit(&ufsvfsp->vfs_dqrwlock);
	return (error);
}
示例#11
0
/*
 * Set various fields of the dqblk according to the command.
 * Q_SETQUOTA - assign an entire dqblk structure.
 * Q_SETQLIM - assign a dqblk structure except for the usage.
 */
static int
setquota(int cmd, uid_t uid, struct ufsvfs *ufsvfsp,
    caddr_t addr, struct cred *cr)
{
	struct dquot *dqp;
	struct inode	*qip;
	struct dquot *xdqp;
	struct dqblk newlim;
	int error;
	int scan_type = SQD_TYPE_NONE;
	daddr_t bn;
	int contig;

	if (secpolicy_fs_quota(cr, ufsvfsp->vfs_vfs) != 0)
		return (EPERM);

	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_WRITER);

	/*
	 * Quotas are not enabled on this file system so there is
	 * nothing more to do.
	 */
	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
		rw_exit(&ufsvfsp->vfs_dqrwlock);
		return (ESRCH);
	}

	/*
	 * At this point, the quota subsystem is quiescent on this file
	 * system so we can do all the work necessary to modify the quota
	 * information for this user.
	 */

	if (copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk)) != 0) {
		rw_exit(&ufsvfsp->vfs_dqrwlock);
		return (EFAULT);
	}
	error = getdiskquota(uid, ufsvfsp, 0, &xdqp);
	if (error) {
		rw_exit(&ufsvfsp->vfs_dqrwlock);
		return (error);
	}
	dqp = xdqp;
	/*
	 * Don't change disk usage on Q_SETQLIM
	 */
	mutex_enter(&dqp->dq_lock);
	if (cmd == Q_SETQLIM) {
		newlim.dqb_curblocks = dqp->dq_curblocks;
		newlim.dqb_curfiles = dqp->dq_curfiles;
	}
	if (uid == 0) {
		/*
		 * Timelimits for uid 0 set the relative time
		 * the other users can be over quota for this file system.
		 * If it is zero a default is used (see quota.h).
		 */
		ufsvfsp->vfs_btimelimit =
		    newlim.dqb_btimelimit? newlim.dqb_btimelimit: DQ_BTIMELIMIT;
		ufsvfsp->vfs_ftimelimit =
		    newlim.dqb_ftimelimit? newlim.dqb_ftimelimit: DQ_FTIMELIMIT;
	} else {
		if (newlim.dqb_bsoftlimit &&
		    newlim.dqb_curblocks >= newlim.dqb_bsoftlimit) {
			if (dqp->dq_bsoftlimit == 0 ||
			    dqp->dq_curblocks < dqp->dq_bsoftlimit) {
				/* If we're suddenly over the limit(s),	*/
				/* start the timer(s)			*/
				newlim.dqb_btimelimit =
				    (uint32_t)gethrestime_sec() +
				    ufsvfsp->vfs_btimelimit;
				dqp->dq_flags &= ~DQ_BLKS;
			} else {
				/* If we're currently over the soft	*/
				/* limit and were previously over the	*/
				/* soft limit then preserve the old	*/
				/* time limit but make sure the DQ_BLKS	*/
				/* flag is set since we must have been	*/
				/* previously warned.			*/
				newlim.dqb_btimelimit = dqp->dq_btimelimit;
				dqp->dq_flags |= DQ_BLKS;
			}
		} else {
			/* Either no quota or under quota, clear time limit */
			newlim.dqb_btimelimit = 0;
			dqp->dq_flags &= ~DQ_BLKS;
		}

		if (newlim.dqb_fsoftlimit &&
		    newlim.dqb_curfiles >= newlim.dqb_fsoftlimit) {
			if (dqp->dq_fsoftlimit == 0 ||
			    dqp->dq_curfiles < dqp->dq_fsoftlimit) {
				/* If we're suddenly over the limit(s),	*/
				/* start the timer(s)			*/
				newlim.dqb_ftimelimit =
				    (uint32_t)gethrestime_sec() +
				    ufsvfsp->vfs_ftimelimit;
				dqp->dq_flags &= ~DQ_FILES;
			} else {
				/* If we're currently over the soft	*/
				/* limit and were previously over the	*/
				/* soft limit then preserve the old	*/
				/* time limit but make sure the		*/
				/* DQ_FILES flag is set since we must	*/
				/* have been previously warned.		*/
				newlim.dqb_ftimelimit = dqp->dq_ftimelimit;
				dqp->dq_flags |= DQ_FILES;
			}
		} else {
			/* Either no quota or under quota, clear time limit */
			newlim.dqb_ftimelimit = 0;
			dqp->dq_flags &= ~DQ_FILES;
		}
	}

	/*
	 * If there was previously no limit and there is now at least
	 * one limit, then any inodes in the cache have NULL d_iquot
	 * fields (getinoquota() returns NULL when there are no limits).
	 */
	if ((dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 &&
	    dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) &&
	    (newlim.dqb_fhardlimit || newlim.dqb_fsoftlimit ||
	    newlim.dqb_bhardlimit || newlim.dqb_bsoftlimit)) {
		scan_type = SQD_TYPE_LIMIT;
	}

	/*
	 * If there was previously at least one limit and there is now
	 * no limit, then any inodes in the cache have non-NULL d_iquot
	 * fields need to be reset to NULL.
	 */
	else if ((dqp->dq_fhardlimit || dqp->dq_fsoftlimit ||
	    dqp->dq_bhardlimit || dqp->dq_bsoftlimit) &&
	    (newlim.dqb_fhardlimit == 0 && newlim.dqb_fsoftlimit == 0 &&
	    newlim.dqb_bhardlimit == 0 && newlim.dqb_bsoftlimit == 0)) {
		scan_type = SQD_TYPE_NO_LIMIT;
	}

	dqp->dq_dqb = newlim;
	dqp->dq_flags |= DQ_MOD;

	/*
	 *  push the new quota to disk now.  If this is a trans device
	 *  then force the page out with ufs_putpage so it will be deltaed
	 *  by ufs_startio.
	 */
	qip = ufsvfsp->vfs_qinod;
	rw_enter(&qip->i_contents, RW_WRITER);
	(void) ufs_rdwri(UIO_WRITE, FWRITE | FSYNC, qip, (caddr_t)&dqp->dq_dqb,
	    sizeof (struct dqblk), dqoff(uid), UIO_SYSSPACE,
	    (int *)NULL, kcred);
	rw_exit(&qip->i_contents);

	(void) VOP_PUTPAGE(ITOV(qip), dqoff(dqp->dq_uid) & ~qip->i_fs->fs_bmask,
	    qip->i_fs->fs_bsize, B_INVAL, kcred, NULL);

	/*
	 * We must set the dq_mof even if not we are not logging in case
	 * we are later remount to logging.
	 */
	contig = 0;
	rw_enter(&qip->i_contents, RW_WRITER);
	error = bmap_read(qip, dqoff(dqp->dq_uid), &bn, &contig);
	rw_exit(&qip->i_contents);
	if (error || (bn == UFS_HOLE)) {
		dqp->dq_mof = UFS_HOLE;
	} else {
		dqp->dq_mof = ldbtob(bn) +
		    (offset_t)((dqoff(dqp->dq_uid)) & (DEV_BSIZE - 1));
	}

	dqp->dq_flags &= ~DQ_MOD;
	dqput(dqp);
	mutex_exit(&dqp->dq_lock);
	if (scan_type) {
		struct setquota_data sqd;

		sqd.sqd_type = scan_type;
		sqd.sqd_ufsvfsp = ufsvfsp;
		sqd.sqd_uid = uid;
		(void) ufs_scan_inodes(0, setquota_scan_inode, &sqd, ufsvfsp);
	}
	rw_exit(&ufsvfsp->vfs_dqrwlock);
	return (0);
}
示例#12
0
/*
 * NFS access to vnode file systems.
 *
 * We provide dentry/inode_to_fh() and fh_to_dentry() methods so that the
 * vnode-based file system can hook up its VOP_FID() and VFS_VGET()
 * methods.  The Linux NFS server calls these methods when encoding an
 * object into a file handle to be passed to the client for future
 * use, and when decoding a file handle and looking for the file
 * system object it describes.
 *
 * VOP_FID() takes a vnode and provides a file ID (fid) that can later
 * be presented (in a pair with a VFS pointer) to VFS_VGET() to
 * reconstitute that vnode.  In a Sun ONC-NFS style kernel, VOP_FID()
 * is used twice per file handle, once for the exported directory and
 * once for the object itself.  In Linux, the NFS layer itself handles
 * the export tree checking (depending on the status of
 * NFSEXP_NOSUBTREECHECK), so the file system only needs to fill in
 * the file handle with details for the object itself.  We always
 * provide both object and parent in the file handle to be sure that
 * we don't end up short on file handle space in a future call that
 * requires both.
 *
 * On a call from the NFS client, the Linux NFS layer finds a
 * superblock pointer from the file handle passed by the NFS client,
 * then calls the fh_to_dentry() method to get a dentry.  Sun ONC-NFS
 * kernels call VFS_VGET() on a vfsp, passing the FID portion of the
 * file handle.  In this layer, we unpack the file handle, determine
 * whether the parent or the object is needed, and pass the info along
 * to a VFS_VGET() call.  Once that returns, we look for an attached
 * dentry and use it, or fabricate a new one which NFS will attempt to
 * reconnect to the namespace.
 */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
int
vnlayer_inode_to_fh(
    struct inode *inode,
    __u32 *fh,
    int *lenp,
    struct inode *parent
)
#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) */
int
vnlayer_dentry_to_fh(
    struct dentry *dent,
    __u32 *fh,
    int *lenp,
    int need_parent
)
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) */
{
    int error;
    int type;
    int mylen;
    MDKI_FID_T *lfidp = NULL;
    MDKI_FID_T *parent_fidp = NULL;
    mdki_boolean_t bailout_needed = TRUE; /* Assume we'll fail. */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
    SUPER_T *sbp;
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
    struct inode *inode = dent->d_inode;
    struct inode *parent = dent->d_parent->d_inode;
#endif

    /*
     * We use the type byte (return value) to encode the FH length.  Since we
     * always include two FIDs of the same size, the type must be even, so
     * that's how we "encode" the length of each FID (i.e. it is half the total
     * length).
     *
     * Always include parent entry; this makes sure that we only work with NFS
     * protocols that have enough room for our file handles.  (Without this, we
     * may return a directory file handle OK yet be unable to return a plain
     * file handle.)  Currently, we can just barely squeeze two standard
     * 10-byte vnode FIDs into the NFS v2 file handle.  The NFS v3 handle has
     * plenty of room.
     */
    ASSERT(ITOV(inode));
    error = VOP_FID(ITOV(inode), &lfidp);
    if (error != 0) {
        ASSERT(lfidp == NULL);
        goto bailout;
    }

#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
    /* we may be called with a NULL parent */
    if (parent == NULL) {
        /* in this case, fabricate a fake parent */
        parent_fidp = (MDKI_FID_T *) KMEM_ALLOC(MDKI_FID_LEN(lfidp),
                                                KM_SLEEP);
        if (parent_fidp == NULL) {
            MDKI_VFS_LOG(VFS_LOG_ERR, "%s: can't allocate %d bytes\n",
                         __func__,
                         (int) MDKI_FID_LEN(lfidp));
            goto bailout;
        }
        memset(parent_fidp, 0xff, MDKI_FID_LEN(lfidp));
        parent_fidp->fid_len = lfidp->fid_len;
    } else
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0) */
    {
        error = VOP_FID(ITOV(parent), &parent_fidp);
        if (error != 0) {
            ASSERT(parent_fidp == NULL);
            goto bailout;
        }
    }

    /*
     * Our encoding scheme can't tolerate different length FIDs
     * (because otherwise the type wouldn't be guaranteed to be even).
     */
    if (parent_fidp->fid_len != lfidp->fid_len) {
        MDKI_VFS_LOG(VFS_LOG_ERR,
                     "%s: unbalanced parent/child fid lengths: %d, %d\n",
                     __func__, parent_fidp->fid_len, lfidp->fid_len);
        goto bailout;
    }

    /* 
     * vnode layer needs to release the storage for a fid on
     * Linux.  The VOP_FID() function allocates its own fid in
     * non-error cases.  Other UNIX systems release this storage
     * in the caller of VOP_FID, so we have to do it here.  We
     * copy the vnode-style fid into the caller-allocated space,
     * then free our allocated version here.
     *
     * Remember: vnode lengths are counting bytes, Linux lengths count __u32
     * units.
     */
    type = parent_fidp->fid_len + lfidp->fid_len; /* Guaranteed even. */
    mylen = roundup(type + MDKI_FID_EXTRA_SIZE, sizeof(*fh));

    if (mylen == VNODE_NFS_FH_TYPE_RESERVED ||
        mylen >= VNODE_NFS_FH_TYPE_ERROR)
    {
        MDKI_VFS_LOG(VFS_LOG_ESTALE,
                     "%s: required length %d out of range (%d,%d)\n",
                     __func__, mylen,
                     VNODE_NFS_FH_TYPE_RESERVED, VNODE_NFS_FH_TYPE_ERROR);
        goto bailout;
    }
    if (((*lenp) * sizeof(*fh)) < mylen) {
        MDKI_VFS_LOG(VFS_LOG_ESTALE,
                     "%s: need %d bytes for FH, have %d\n",
                     __func__, mylen, (int) (sizeof(*fh) * (*lenp)));
        goto bailout;
    }
    /* Copy FIDs into file handle. */
    *lenp = mylen / sizeof(*fh); /* No remainder because of roundup above. */
    BZERO(fh, mylen);           /* Zero whole fh to round up to __u32 boundary */
    BCOPY(lfidp->fid_data, fh, lfidp->fid_len);
    BCOPY(parent_fidp->fid_data, ((caddr_t)fh) + (type / 2),
          parent_fidp->fid_len);

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
    /* 
     * For 64 bits OS, use a 32 bits hash of the SB pointer.
     * For 32 bits OS, use the pointer itself.
     */
    if (ITOV(inode) == NULL || 
        ITOV(inode)->v_vfsmnt == NULL) {
        MDKI_VFS_LOG(VFS_LOG_ESTALE,
                     "%s: %p is this a MVFS inode?\n",
                     __func__, inode);
        goto bailout;
    } else {
        sbp = ((struct vfsmount *)ITOV(inode)->v_vfsmnt)->mnt_sb;
    }
    MDKI_FID_SET_SB_HASH(fh, type / 2, MDKI_FID_CALC_HASH(sbp));
#endif

    bailout_needed = FALSE;         /* We're home free now. */

    if (bailout_needed) {
  bailout:
        type = VNODE_NFS_FH_TYPE_ERROR;
        *lenp = 0;
    }
#ifdef KMEMDEBUG
    if (lfidp != NULL)
        REAL_KMEM_FREE(lfidp, MDKI_FID_LEN(lfidp));
    if (parent_fidp != NULL)
        REAL_KMEM_FREE(parent_fidp, MDKI_FID_LEN(parent_fidp));
#else
    if (lfidp != NULL)
        KMEM_FREE(lfidp, MDKI_FID_LEN(lfidp));
    if (parent_fidp != NULL)
        KMEM_FREE(parent_fidp, MDKI_FID_LEN(parent_fidp));
#endif
    return type;
}
示例#13
0
extern void
mvfs_linux_umount_begin(
    struct vfsmount * mnt,
    int flags
)
#endif
{
    VNODE_T *vp;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
    LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
    struct vfsmount *mnt;
#else
    /*
     * Since 2.6.18 and before 2.6.27 we have mnt as a parameter.
     * But we still need super_p.
     */
    SUPER_T *super_p = mnt->mnt_sb;
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)
    int mount_count = 0;
#endif

    ASSERT(super_p != NULL);
    ASSERT(super_p->s_root != NULL);
    vp = ITOV(super_p->s_root->d_inode);
    ASSERT(vp != NULL);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) || \
    LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
    mnt = VTOVFSMNT(vp);
#else
    /* Check that the mountpoint passed in matches the one
     * from the vp that we are going to clear.  Skip it otherwise.
     * We know from experience that this can happen when unmounting
     * loopback (bind) mounts.
     */
     if (mnt != VTOVFSMNT(vp))
         return;
#endif
    /* Note that there is no mechanism for restoring the mount pointer
     * in the vnode if an error happens later on in the umount.  This is
     * the only callback into the mvfs during umount.  So far this has not
     * been a problem and if we don't do this here, the umount will never
     * succeed because the Linux code expects the mnt_count to be 2.
     * The count is 3 at this point from the initial allocation of the 
     * vfsmnt structure, the path_lookup call in this umount call and 
     * from when we placed the pointer in the vp.  
     */
    if (mnt == NULL) {
        MDKI_VFS_LOG(VFS_LOG_ERR, "%s: mnt is NULL\n", __FUNCTION__);
        return;
    }
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)
    mount_count = MDKI_READ_MNT_COUNT(mnt);
    if (mount_count == 3) {
        MDKI_MNTPUT(mnt);
        SET_VTOVFSMNT(vp, NULL);
    }
#else
    /*
     * may_umount returns !0 when the ref counter is 2 (and other conditions).
     * We took an extra ref, I'll drop it to test may_umount. If it is not
     * ready to be unmounted, the put is reverted.
     */
    MDKI_MNTPUT(mnt);
    if (may_umount(mnt)) {
        SET_VTOVFSMNT(vp, NULL);
    } else {
        /* not ready yet */
        MDKI_MNTGET(mnt);
    }
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) */
}
示例#14
0
/*
 * Check if source directory is in the path of the target directory.
 * Target is supplied locked, source is unlocked.
 * The target is always vput before returning.
 */
int
ext2fs_checkpath(struct inode *source, struct inode *target,
   struct ucred *cred)
{
	struct vnode *vp;
	int error, rootino, namlen;
	struct ext2fs_dirtemplate dirbuf;
	u_int32_t ino;

	vp = ITOV(target);
	if (target->i_number == source->i_number) {
		error = EEXIST;
		goto out;
	}
	rootino = ROOTINO;
	error = 0;
	if (target->i_number == rootino)
		goto out;

	for (;;) {
		if (vp->v_type != VDIR) {
			error = ENOTDIR;
			break;
		}
		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
			sizeof (struct ext2fs_dirtemplate), (off_t)0,
			UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
			curproc);
		if (error != 0)
			break;
		namlen = dirbuf.dotdot_namlen;
		if (namlen != 2 ||
			dirbuf.dotdot_name[0] != '.' ||
			dirbuf.dotdot_name[1] != '.') {
			error = ENOTDIR;
			break;
		}
		ino = fs2h32(dirbuf.dotdot_ino);
		if (ino == source->i_number) {
			error = EINVAL;
			break;
		}
		if (ino == rootino)
			break;
		vput(vp);
		error = VFS_VGET(vp->v_mount, ino, &vp);
		if (error != 0) {
			vp = NULL;
			break;
		}
	}

out:
	if (error == ENOTDIR) {
		printf("checkpath: .. not a directory\n");
		panic("checkpath");
	}
	if (vp != NULL)
		vput(vp);
	return (error);
}
示例#15
0
/* This is really VOP_SETATTR() in sheep's clothing */
int
vnode_iop_notify_change(
    DENT_T *dent_p,
    struct iattr * iattr_p
)
{
    VNODE_T *vp;
    VATTR_T *vap;
    VNODE_T *cvp;
    int err = 0;
    DENT_T *rdent;
    CALL_DATA_T cd;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
    mdki_boolean_t tooksem = FALSE;
#endif

    if (iattr_p->ia_valid & ATTR_SIZE) {
        ASSERT_I_SEM_MINE(dent_p->d_inode);
    }

    if (MDKI_INOISMVFS(dent_p->d_inode)) {
        vap = VATTR_ALLOC();
	if (vap != NULL) {
            vnode_iop_iattr2vattr(iattr_p, vap);

            /* reject attempts to use setattr to change object type */
            vap->va_mask &= ~AT_TYPE;
            mdki_linux_init_call_data(&cd);
            vp = ITOV(dent_p->d_inode);
            err = VOP_SETATTR(vp, vap, 0, &cd);
            err = mdki_errno_unix_to_linux(err);
            /* Any underlying cleartxt got its inode truncated via changeattr
             * if there's a need to change its size.
             */
            if (!err)
                mdki_linux_vattr_pullup(vp, vap, vap->va_mask);
            VATTR_FREE(vap);
            mdki_linux_destroy_call_data(&cd);
	} else {
	    err = -ENOMEM;
	}
    } else {
        rdent = REALDENTRY_LOCKED(dent_p, &cvp);
        VNODE_DGET(rdent);
        if (rdent && rdent->d_inode) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
            err = inode_setattr(dent_p->d_inode, iattr_p);
            if (err == 0) {
                if (iattr_p->ia_valid & ATTR_SIZE) {
                    LOCK_INODE(rdent->d_inode);
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13)
#if !defined RHEL_UPDATE || RHEL_UPDATE < 5
                    down_write(&rdent->d_inode->i_alloc_sem);
#endif
#endif
                    /*
                     * be paranoid and record the 'taken'ness in case
                     * the called function squashes ia_valid (as is
                     * done in nfs_setattr).
                     */
                    tooksem = TRUE;
                }
                err = MDKI_NOTIFY_CHANGE(rdent, CVN_TO_VFSMNT(cvp), iattr_p);
                if (tooksem) {
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13)
#if !defined(RHEL_UPDATE) || RHEL_UPDATE < 5
                    up_write(&rdent->d_inode->i_alloc_sem);
#endif
#endif
                    UNLOCK_INODE(rdent->d_inode);
                }
            }
#else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) */
            err = simple_setattr(dent_p, iattr_p);
            if (err == 0)
                err = MDKI_NOTIFY_CHANGE(rdent, CVN_TO_VFSMNT(cvp), iattr_p);
#endif /* else LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) */
	} else {
            /* It looks as though someone removed the realdentry on us.
	     * I am not sure why this should happen.
	     */
            err = -ENOENT;
        }
        if (rdent) {
            VNODE_DPUT(rdent);
	    REALDENTRY_UNLOCK(dent_p, cvp);
        }
    }
    return err;
}
示例#16
0
/*
 * Release blocks associated with the inode ip and stored in the indirect
 * block bn.  Blocks are free'd in LIFO order up to (but not including)
 * lastbn.  If level is greater than SINGLE, the block is an indirect block
 * and recursive calls to indirtrunc must be used to cleanse other indirect
 * blocks.
 *
 * NB: triple indirect blocks are untested.
 */
static int
lfs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
	       daddr_t lastbn, int level, daddr_t *countp,
	       daddr_t *rcountp, long *lastsegp, size_t *bcp)
{
	int i;
	struct buf *bp;
	struct lfs *fs = ip->i_lfs;
	int32_t *bap;	/* XXX ondisk32 */
	struct vnode *vp;
	daddr_t nb, nlbn, last;
	int32_t *copy = NULL;	/* XXX ondisk32 */
	daddr_t blkcount, rblkcount, factor;
	int nblocks;
	daddr_t blocksreleased = 0, real_released = 0;
	int error = 0, allerror = 0;

	ASSERT_SEGLOCK(fs);
	/*
	 * Calculate index in current block of last
	 * block to be kept.  -1 indicates the entire
	 * block so we need not calculate the index.
	 */
	factor = 1;
	for (i = SINGLE; i < level; i++)
		factor *= LFS_NINDIR(fs);
	last = lastbn;
	if (lastbn > 0)
		last /= factor;
	nblocks = lfs_btofsb(fs, lfs_sb_getbsize(fs));
	/*
	 * Get buffer of block pointers, zero those entries corresponding
	 * to blocks to be free'd, and update on disk copy first.  Since
	 * double(triple) indirect before single(double) indirect, calls
	 * to bmap on these blocks will fail.  However, we already have
	 * the on disk address, so we have to set the b_blkno field
	 * explicitly instead of letting bread do everything for us.
	 */
	vp = ITOV(ip);
	bp = getblk(vp, lbn, lfs_sb_getbsize(fs), 0, 0);
	if (bp->b_oflags & (BO_DONE | BO_DELWRI)) {
		/* Braces must be here in case trace evaluates to nothing. */
		trace(TR_BREADHIT, pack(vp, lfs_sb_getbsize(fs)), lbn);
	} else {
		trace(TR_BREADMISS, pack(vp, lfs_sb_getbsize(fs)), lbn);
		curlwp->l_ru.ru_inblock++; /* pay for read */
		bp->b_flags |= B_READ;
		if (bp->b_bcount > bp->b_bufsize)
			panic("lfs_indirtrunc: bad buffer size");
		bp->b_blkno = LFS_FSBTODB(fs, dbn);
		VOP_STRATEGY(vp, bp);
		error = biowait(bp);
	}
	if (error) {
		brelse(bp, 0);
		*countp = *rcountp = 0;
		return (error);
	}

	bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
	if (lastbn >= 0) {
		copy = lfs_malloc(fs, lfs_sb_getbsize(fs), LFS_NB_IBLOCK);
		memcpy((void *)copy, (void *)bap, lfs_sb_getbsize(fs));
		memset((void *)&bap[last + 1], 0,
		/* XXX ondisk32 */
		  (u_int)(LFS_NINDIR(fs) - (last + 1)) * sizeof (int32_t));
		error = VOP_BWRITE(bp->b_vp, bp);
		if (error)
			allerror = error;
		bap = copy;
	}

	/*
	 * Recursively free totally unused blocks.
	 */
	for (i = LFS_NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
	    i--, nlbn += factor) {
		nb = bap[i];
		if (nb == 0)
			continue;
		if (level > SINGLE) {
			error = lfs_indirtrunc(ip, nlbn, nb,
					       (daddr_t)-1, level - 1,
					       &blkcount, &rblkcount,
					       lastsegp, bcp);
			if (error)
				allerror = error;
			blocksreleased += blkcount;
			real_released += rblkcount;
		}
		lfs_blkfree(fs, ip, nb, lfs_sb_getbsize(fs), lastsegp, bcp);
		if (bap[i] > 0)
			real_released += nblocks;
		blocksreleased += nblocks;
	}

	/*
	 * Recursively free last partial block.
	 */
	if (level > SINGLE && lastbn >= 0) {
		last = lastbn % factor;
		nb = bap[i];
		if (nb != 0) {
			error = lfs_indirtrunc(ip, nlbn, nb,
					       last, level - 1, &blkcount,
					       &rblkcount, lastsegp, bcp);
			if (error)
				allerror = error;
			real_released += rblkcount;
			blocksreleased += blkcount;
		}
	}

	if (copy != NULL) {
		lfs_free(fs, copy, LFS_NB_IBLOCK);
	} else {
		mutex_enter(&bufcache_lock);
		if (bp->b_oflags & BO_DELWRI) {
			LFS_UNLOCK_BUF(bp);
			lfs_sb_addavail(fs, lfs_btofsb(fs, bp->b_bcount));
			wakeup(&fs->lfs_availsleep);
		}
		brelsel(bp, BC_INVAL);
		mutex_exit(&bufcache_lock);
	}

	*countp = blocksreleased;
	*rcountp = real_released;
	return (allerror);
}
示例#17
0
extern int
vnode_iop_create(
    INODE_T * parent,
    struct dentry * dentry,
    int mode,
    struct nameidata *nd
)
{
    int err = 0;
    VATTR_T *vap;
    VNODE_T *newvp;
    struct create_ctx ctx;
    CALL_DATA_T cd;

    ASSERT_I_SEM_MINE(parent);
    ASSERT(MDKI_INOISMVFS(parent));

    vap = VATTR_ALLOC();
    if (vap == NULL)
        return -ENOMEM;
    VATTR_NULL(vap);
    mdki_linux_init_call_data(&cd);
    /*
     * Solaris sends only type, mode, size, so we will too.
     */
    vap->va_type = VREG;
    vap->va_mode = mode & ~S_IFMT;
    vap->va_size = 0;
    vap->va_mask = AT_TYPE|AT_MODE|AT_SIZE;
    newvp = NULL;
    dentry->d_inode = NULL;
    ctx.dentry = dentry;
    ctx.parent = parent;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38)
    /* break any rcu-walk in progress */
# if defined(MRG)
    write_seqlock_barrier(&dentry->d_lock);
# else /* defined (MRG) */
    write_seqcount_barrier(&dentry->d_seq);
# endif /* else defined (MRG) */
#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,38) */
    err = VOP_CREATE(ITOV(parent),
                     (/* drop const */ char *) dentry->d_name.name,
                     vap,
                     NONEXCL, /* XXX handled by generic layer? */
                     mode, /* not used except for passthrough, see vap->va_mode */
                     &newvp,
                     &cd,
                     &ctx);
    err = mdki_errno_unix_to_linux(err);

    /* dentry reference uses the hold count from a successful create */
    if (!err) {
        if (dentry->d_inode == NULL) {
            /* Not a shadow object */
            ASSERT(newvp != NULL);
            ASSERT(VTOI(newvp) != NULL);
            VNODE_D_INSTANTIATE(dentry, VTOI(newvp));
            VATTR_SET_MASK(vap, AT_ALL);
            if (VOP_GETATTR(newvp, vap, 0, &cd) == 0)
                mdki_linux_vattr_pullup(newvp, vap, AT_ALL);
        } else {
            /* drop the extra ref returned in newvp */
            VN_RELE(newvp);
        }
        /* I nuked the code checking not VCHR, VREG--we are always VREG */
    } else {
        ASSERT(!dentry->d_inode);
        ASSERT(!newvp);
    }
    VATTR_FREE(vap);
    mdki_linux_destroy_call_data(&cd);
    return(err);
}
示例#18
0
/*
 * Check if source directory is in the path of the target directory.
 * Target is supplied locked, source is unlocked.
 * The target is always vput before returning.
 */
int
ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred)
{
	struct vnode *vp;
	int error, rootino, namlen;
	struct dirtemplate dirbuf;

	vp = ITOV(target);
	if (target->i_number == source->i_number) {
		error = EEXIST;
		goto out;
	}
	rootino = ROOTINO;
	error = 0;
	if (target->i_number == rootino)
		goto out;

	for (;;) {
		if (vp->v_type != VDIR) {
			error = ENOTDIR;
			break;
		}
		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
			IO_NODELOCKED, cred, NULL, (struct proc *)0);
		if (error != 0)
			break;
#		if (BYTE_ORDER == LITTLE_ENDIAN)
			if (vp->v_mount->mnt_maxsymlinklen > 0)
				namlen = dirbuf.dotdot_namlen;
			else
				namlen = dirbuf.dotdot_type;
#		else
			namlen = dirbuf.dotdot_namlen;
#		endif
		if (namlen != 2 ||
		    dirbuf.dotdot_name[0] != '.' ||
		    dirbuf.dotdot_name[1] != '.') {
			error = ENOTDIR;
			break;
		}
		if (dirbuf.dotdot_ino == source->i_number) {
			error = EINVAL;
			break;
		}
		if (dirbuf.dotdot_ino == rootino)
			break;
		vput(vp);
		error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp);
		if (error) {
			vp = NULL;
			break;
		}
	}

out:
	if (error == ENOTDIR)
		printf("checkpath: .. not a directory\n");
	if (vp != NULL)
		vput(vp);
	return (error);
}
示例#19
0
extern int
vnode_iop_link(
    DENT_T * olddent,
    INODE_T * parent,
    DENT_T * newdent
)
{
    int err = 0;
    struct link_ctx ctx;
    VATTR_T *vap;
    VNODE_T *parentvp;

    ASSERT_I_SEM_MINE(olddent->d_inode);
    ASSERT_I_SEM_MINE(parent);
    ASSERT(MDKI_INOISMVFS(parent));

    if (!vnlayer_link_eligible(olddent))
        return -EXDEV;

    /* VOP_REALVP will check that the parent is a loopback directory and
     * return EINVAL if it isn't.
     */
    if (VOP_REALVP(ITOV(parent), &parentvp) == 0) {
        /* We are creating a shadow link so bypass the mvfs for the rest */
        err = vnlayer_do_linux_link(parentvp, olddent, parent, newdent);
        err = mdki_errno_unix_to_linux(err);
    } else {
        /* This needs to be passed on to the mvfs to deal with */
        CALL_DATA_T cd;
        INODE_T *inode;
        if (!MDKI_INOISOURS(olddent->d_inode))
            return -EXDEV;
        ctx.parent = parent;
        ctx.newdent = newdent;
        ctx.olddent = olddent;
        ctx.done = FALSE;

        mdki_linux_init_call_data(&cd);
	if (MDKI_INOISMVFS(olddent->d_inode)) {
            err = VOP_LINK(ITOV(parent), ITOV(olddent->d_inode),
                           (char *)newdent->d_name.name, &cd, &ctx);
            err = mdki_errno_unix_to_linux(err);
            if (err == 0 && !ctx.done) {
                /* Again, a heavy handed way of bumping the inode count and
                 * handling the locking (This will use the inode lock)
                 */
                inode = igrab(olddent->d_inode);
                VNODE_D_INSTANTIATE(newdent, inode);
                if ((vap = VATTR_ALLOC()) != NULL) {
                    VATTR_SET_MASK(vap, AT_ALL);
                    if (VOP_GETATTR(ITOV(inode), vap, 0, &cd) == 0)
                        mdki_linux_vattr_pullup(ITOV(inode), vap, AT_ALL);
                    VATTR_FREE(vap);
		}
            }
	} else {
	    err = -EXDEV;
	}
        mdki_linux_destroy_call_data(&cd);
    }
    return err;
}
示例#20
0
/*
 * Balloc defines the structure of file system storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 */
int
ext2fs_balloc(struct inode *ip, daddr_t bn, int size,
    kauth_cred_t cred, struct buf **bpp, int flags)
{
	struct m_ext2fs *fs;
	daddr_t nb;
	struct buf *bp, *nbp;
	struct vnode *vp = ITOV(ip);
	struct indir indirs[EXT2FS_NIADDR + 2];
	daddr_t newb, lbn, pref;
	int32_t *bap;	/* XXX ondisk32 */
	int num, i, error;
	u_int deallocated;
	daddr_t *blkp, *allocblk, allociblk[EXT2FS_NIADDR + 1];
	int32_t *allocib;	/* XXX ondisk32 */
	int unwindidx = -1;
	UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist);

	UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0);

	if (bpp != NULL) {
		*bpp = NULL;
	}
	if (bn < 0)
		return (EFBIG);
	fs = ip->i_e2fs;
	lbn = bn;

	/*
	 * The first EXT2FS_NDADDR blocks are direct blocks
	 */
	if (bn < EXT2FS_NDADDR) {
		/* XXX ondisk32 */
		nb = fs2h32(ip->i_e2fs_blocks[bn]);
		if (nb != 0) {

			/*
			 * the block is already allocated, just read it.
			 */

			if (bpp != NULL) {
				error = bread(vp, bn, fs->e2fs_bsize, NOCRED,
					      B_MODIFY, &bp);
				if (error) {
					return (error);
				}
				*bpp = bp;
			}
			return (0);
		}

		/*
		 * allocate a new direct block.
		 */

		error = ext2fs_alloc(ip, bn,
		    ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]),
		    cred, &newb);
		if (error)
			return (error);
		ip->i_e2fs_last_lblk = lbn;
		ip->i_e2fs_last_blk = newb;
		/* XXX ondisk32 */
		ip->i_e2fs_blocks[bn] = h2fs32((int32_t)newb);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		if (bpp != NULL) {
			bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
			bp->b_blkno = EXT2_FSBTODB(fs, newb);
			if (flags & B_CLRBUF)
				clrbuf(bp);
			*bpp = bp;
		}
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0)
		return(error);
#ifdef DIAGNOSTIC
	if (num < 1)
		panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n");
#endif
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	/* XXX ondisk32 */
	nb = fs2h32(ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off]);
	allocib = NULL;
	allocblk = allociblk;
	if (nb == 0) {
		pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0);
		error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
		if (error)
			return (error);
		nb = newb;
		*allocblk++ = nb;
		ip->i_e2fs_last_blk = newb;
		bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0);
		bp->b_blkno = EXT2_FSBTODB(fs, newb);
		clrbuf(bp);
		/*
		 * Write synchronously so that indirect blocks
		 * never point at garbage.
		 */
		if ((error = bwrite(bp)) != 0)
			goto fail;
		unwindidx = 0;
		allocib = &ip->i_e2fs_blocks[EXT2FS_NDADDR + indirs[0].in_off];
		/* XXX ondisk32 */
		*allocib = h2fs32((int32_t)newb);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp,
		    indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, 0, &bp);
		if (error) {
			goto fail;
		}
		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
		nb = fs2h32(bap[indirs[i].in_off]);
		if (i == num)
			break;
		i++;
		if (nb != 0) {
			brelse(bp, 0);
			continue;
		}
		pref = ext2fs_blkpref(ip, lbn, 0, (int32_t *)0);
		error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
		if (error) {
			brelse(bp, 0);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		ip->i_e2fs_last_blk = newb;
		nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0);
		nbp->b_blkno = EXT2_FSBTODB(fs, nb);
		clrbuf(nbp);
		/*
		 * Write synchronously so that indirect blocks
		 * never point at garbage.
		 */
		if ((error = bwrite(nbp)) != 0) {
			brelse(bp, 0);
			goto fail;
		}
		if (unwindidx < 0)
			unwindidx = i - 1;
		/* XXX ondisk32 */
		bap[indirs[i - 1].in_off] = h2fs32((int32_t)nb);
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			bdwrite(bp);
		}
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]);
		error = ext2fs_alloc(ip, lbn, pref, cred, &newb);
		if (error) {
			brelse(bp, 0);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		ip->i_e2fs_last_lblk = lbn;
		ip->i_e2fs_last_blk = newb;
		/* XXX ondisk32 */
		bap[indirs[num].in_off] = h2fs32((int32_t)nb);
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			bdwrite(bp);
		}
		if (bpp != NULL) {
			nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
			nbp->b_blkno = EXT2_FSBTODB(fs, nb);
			if (flags & B_CLRBUF)
				clrbuf(nbp);
			*bpp = nbp;
		}
		return (0);
	}
	brelse(bp, 0);
	if (bpp != NULL) {
		if (flags & B_CLRBUF) {
			error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED,
				      B_MODIFY, &nbp);
			if (error) {
				goto fail;
			}
		} else {
			nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
			nbp->b_blkno = EXT2_FSBTODB(fs, nb);
		}
		*bpp = nbp;
	}
	return (0);
fail:
	/*
	 * If we have failed part way through block allocation, we
	 * have to deallocate any indirect blocks that we have allocated.
	 */
	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
		ext2fs_blkfree(ip, *blkp);
		deallocated += fs->e2fs_bsize;
	}
	if (unwindidx >= 0) {
		if (unwindidx == 0) {
			*allocib = 0;
		} else {
			int r;

			r = bread(vp, indirs[unwindidx].in_lbn,
			    (int)fs->e2fs_bsize, NOCRED, B_MODIFY, &bp);
			if (r) {
				panic("Could not unwind indirect block, error %d", r);
			} else {
				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
				bap[indirs[unwindidx].in_off] = 0;
				if (flags & B_SYNC)
					bwrite(bp);
				else
					bdwrite(bp);
			}
		}
		for (i = unwindidx + 1; i <= num; i++) {
			bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize,
			    0, 0);
			brelse(bp, BC_INVAL);
		}
	}
	if (deallocated) {
		ext2fs_setnblock(ip, ext2fs_nblock(ip) - btodb(deallocated));
		ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE;
	}
	return error;
}
示例#21
0
/*
 * Release blocks associated with the inode ip and stored in the indirect
 * block bn.  Blocks are free'd in LIFO order up to (but not including)
 * lastbn.  If level is greater than SINGLE, the block is an indirect block
 * and recursive calls to indirtrunc must be used to cleanse other indirect
 * blocks.
 *
 * NB: triple indirect blocks are untested.
 */
int
ffs_indirtrunc(struct inode *ip, daddr64_t lbn, daddr64_t dbn,
    daddr64_t lastbn, int level, long *countp)
{
	int i;
	struct buf *bp;
	struct fs *fs = ip->i_fs;
	struct vnode *vp;
	void *copy = NULL;
	daddr64_t nb, nlbn, last;
	long blkcount, factor;
	int nblocks, blocksreleased = 0;
	int error = 0, allerror = 0;
	int32_t *bap1 = NULL;
#ifdef FFS2
	int64_t *bap2 = NULL;
#endif

	/*
	 * Calculate index in current block of last
	 * block to be kept.  -1 indicates the entire
	 * block so we need not calculate the index.
	 */
	factor = 1;
	for (i = SINGLE; i < level; i++)
		factor *= NINDIR(fs);
	last = lastbn;
	if (lastbn > 0)
		last /= factor;
	nblocks = btodb(fs->fs_bsize);
	/*
	 * Get buffer of block pointers, zero those entries corresponding
	 * to blocks to be free'd, and update on disk copy first.  Since
	 * double(triple) indirect before single(double) indirect, calls
	 * to bmap on these blocks will fail.  However, we already have
	 * the on disk address, so we have to set the b_blkno field
	 * explicitly instead of letting bread do everything for us.
	 */
	vp = ITOV(ip);
	bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0);
	if (!(bp->b_flags & (B_DONE | B_DELWRI))) {
		curproc->p_stats->p_ru.ru_inblock++;	/* pay for read */
		bcstats.pendingreads++;
		bcstats.numreads++;
		bp->b_flags |= B_READ;
		if (bp->b_bcount > bp->b_bufsize)
			panic("ffs_indirtrunc: bad buffer size");
		bp->b_blkno = dbn;
		VOP_STRATEGY(bp);
		error = biowait(bp);
	}
	if (error) {
		brelse(bp);
		*countp = 0;
		return (error);
	}

#ifdef FFS2
	if (ip->i_ump->um_fstype == UM_UFS2)
		bap2 = (int64_t *)bp->b_data;
	else
#endif
		bap1 = (int32_t *)bp->b_data;

	if (lastbn != -1) {
		copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK);
		bcopy(bp->b_data, copy, (u_int) fs->fs_bsize);

		for (i = last + 1; i < NINDIR(fs); i++)
			BAP_ASSIGN(ip, i, 0);

		if (!DOINGASYNC(vp)) {
			error = bwrite(bp);
			if (error)
				allerror = error;
		} else {
			bawrite(bp);
		}

#ifdef FFS2
		if (ip->i_ump->um_fstype == UM_UFS2)
			bap2 = (int64_t *)copy;
		else
#endif
			bap1 = (int32_t *)copy;
	}

	/*
	 * Recursively free totally unused blocks.
	 */
	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
	    i--, nlbn += factor) {
		nb = BAP(ip, i);
		if (nb == 0)
			continue;
		if (level > SINGLE) {
			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
					       (daddr64_t)-1, level - 1,
					       &blkcount);
			if (error)
				allerror = error;
			blocksreleased += blkcount;
		}
		ffs_blkfree(ip, nb, fs->fs_bsize);
		blocksreleased += nblocks;
	}

	/*
	 * Recursively free last partial block.
	 */
	if (level > SINGLE && lastbn >= 0) {
		last = lastbn % factor;
		nb = BAP(ip, i);
		if (nb != 0) {
			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
					       last, level - 1, &blkcount);
			if (error)
				allerror = error;
			blocksreleased += blkcount;
		}
	}
	if (copy != NULL) {
		free(copy, M_TEMP);
	} else {
		bp->b_flags |= B_INVAL;
		brelse(bp);
	}
		
	*countp = blocksreleased;
	return (allerror);
}
示例#22
0
/*
 * Rename system call.
 * 	rename("foo", "bar");
 * is essentially
 *	unlink("bar");
 *	link("foo", "bar");
 *	unlink("foo");
 * but ``atomically''.  Can't do full commit without saving state in the
 * inode on disk which isn't feasible at this time.  Best we can do is
 * always guarantee the target exists.
 *
 * Basic algorithm is:
 *
 * 1) Bump link count on source while we're linking it to the
 *    target.  This also ensure the inode won't be deleted out
 *    from underneath us while we work (it may be truncated by
 *    a concurrent `trunc' or `open' for creation).
 * 2) Link source to destination.  If destination already exists,
 *    delete it first.
 * 3) Unlink source reference to inode if still around. If a
 *    directory was moved and the parent of the destination
 *    is different from the source, patch the ".." entry in the
 *    directory.
 */
int
ext2fs_rename(void *v)
{
	struct vop_rename_args  *ap = v;
	struct vnode *tvp = ap->a_tvp;
	struct vnode *tdvp = ap->a_tdvp;
	struct vnode *fvp = ap->a_fvp;
	struct vnode *fdvp = ap->a_fdvp;
	struct componentname *tcnp = ap->a_tcnp;
	struct componentname *fcnp = ap->a_fcnp;
	struct inode *ip, *xp, *dp;
	struct proc *p = fcnp->cn_proc;
	struct ext2fs_dirtemplate dirbuf;
	/* struct timespec ts; */
	int doingdirectory = 0, oldparent = 0, newparent = 0;
	int error = 0;
	u_char namlen;

#ifdef DIAGNOSTIC
	if ((tcnp->cn_flags & HASBUF) == 0 ||
	    (fcnp->cn_flags & HASBUF) == 0)
		panic("ext2fs_rename: no name");
#endif
	/*
	 * Check for cross-device rename.
	 */
	if ((fvp->v_mount != tdvp->v_mount) ||
	    (tvp && (fvp->v_mount != tvp->v_mount))) {
		error = EXDEV;
abortit:
		VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
		if (tdvp == tvp)
			vrele(tdvp);
		else
			vput(tdvp);
		if (tvp)
			vput(tvp);
		VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
		vrele(fdvp);
		vrele(fvp);
		return (error);
	}

	/*
	 * Check if just deleting a link name.
	 */
	if (tvp && ((VTOI(tvp)->i_e2fs_flags & (EXT2_IMMUTABLE | EXT2_APPEND)) ||
	    (VTOI(tdvp)->i_e2fs_flags & EXT2_APPEND))) {
		error = EPERM;
		goto abortit;
	}
	if (fvp == tvp) {
		if (fvp->v_type == VDIR) {
			error = EINVAL;
			goto abortit;
		}

		/* Release destination completely. */
		VOP_ABORTOP(tdvp, tcnp);
		vput(tdvp);
		vput(tvp);

		/* Delete source. */
		vrele(fdvp);
		vrele(fvp);
		fcnp->cn_flags &= ~MODMASK;
		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
		if ((fcnp->cn_flags & SAVESTART) == 0)
			panic("ext2fs_rename: lost from startdir");
		fcnp->cn_nameiop = DELETE;
		(void) vfs_relookup(fdvp, &fvp, fcnp);
		return (VOP_REMOVE(fdvp, fvp, fcnp));
	}
	if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0)
		goto abortit;
	dp = VTOI(fdvp);
	ip = VTOI(fvp);
	if ((nlink_t)ip->i_e2fs_nlink >= LINK_MAX) {
		VOP_UNLOCK(fvp, 0);
		error = EMLINK;
		goto abortit;
	}
	if ((ip->i_e2fs_flags & (EXT2_IMMUTABLE | EXT2_APPEND)) ||
		(dp->i_e2fs_flags & EXT2_APPEND)) {
		VOP_UNLOCK(fvp, 0);
		error = EPERM;
		goto abortit;
	}
	if ((ip->i_e2fs_mode & IFMT) == IFDIR) {
		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
		if (!error && tvp)
			error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred);
		if (error) {
			VOP_UNLOCK(fvp, 0);
			error = EACCES;
			goto abortit;
		}
		/*
		 * Avoid ".", "..", and aliases of "." for obvious reasons.
		 */
		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
		    dp == ip ||
			(fcnp->cn_flags&ISDOTDOT) ||
			(tcnp->cn_flags & ISDOTDOT) ||
		    (ip->i_flag & IN_RENAME)) {
			VOP_UNLOCK(fvp, 0);
			error = EINVAL;
			goto abortit;
		}
		ip->i_flag |= IN_RENAME;
		oldparent = dp->i_number;
		doingdirectory++;
	}
	vrele(fdvp);

	/*
	 * When the target exists, both the directory
	 * and target vnodes are returned locked.
	 */
	dp = VTOI(tdvp);
	xp = NULL;
	if (tvp)
		xp = VTOI(tvp);

	/*
	 * 1) Bump link count while we're moving stuff
	 *    around.  If we crash somewhere before
	 *    completing our work, the link count
	 *    may be wrong, but correctable.
	 */
	ip->i_e2fs_nlink++;
	ip->i_flag |= IN_CHANGE;
	if ((error = ext2fs_update(ip, NULL, NULL, 1)) != 0) {
		VOP_UNLOCK(fvp, 0);
		goto bad;
	}

	/*
	 * If ".." must be changed (ie the directory gets a new
	 * parent) then the source directory must not be in the
	 * directory hierarchy above the target, as this would
	 * orphan everything below the source directory. Also
	 * the user must have write permission in the source so
	 * as to be able to change "..". We must repeat the call 
	 * to namei, as the parent directory is unlocked by the
	 * call to checkpath().
	 */
	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
	VOP_UNLOCK(fvp, 0);
	if (oldparent != dp->i_number)
		newparent = dp->i_number;
	if (doingdirectory && newparent) {
		if (error)	/* write access check above */
			goto bad;
		if (xp != NULL)
			vput(tvp);
		error = ext2fs_checkpath(ip, dp, tcnp->cn_cred);
		if (error != 0)
			goto out;
		if ((tcnp->cn_flags & SAVESTART) == 0)
			panic("ext2fs_rename: lost to startdir");
		if ((error = vfs_relookup(tdvp, &tvp, tcnp)) != 0)
			goto out;
		dp = VTOI(tdvp);
		xp = NULL;
		if (tvp)
			xp = VTOI(tvp);
	}
	/*
	 * 2) If target doesn't exist, link the target
	 *    to the source and unlink the source. 
	 *    Otherwise, rewrite the target directory
	 *    entry to reference the source inode and
	 *    expunge the original entry's existence.
	 */
	if (xp == NULL) {
		if (dp->i_dev != ip->i_dev)
			panic("rename: EXDEV");
		/*
		 * Account for ".." in new directory.
		 * When source and destination have the same
		 * parent we don't fool with the link count.
		 */
		if (doingdirectory && newparent) {
			if ((nlink_t)dp->i_e2fs_nlink >= LINK_MAX) {
				error = EMLINK;
				goto bad;
			}
			dp->i_e2fs_nlink++;
			dp->i_flag |= IN_CHANGE;
			if ((error = ext2fs_update(dp, NULL, NULL, 1)) != 0)
				goto bad;
		}
		error = ext2fs_direnter(ip, tdvp, tcnp);
		if (error != 0) {
			if (doingdirectory && newparent) {
				dp->i_e2fs_nlink--;
				dp->i_flag |= IN_CHANGE;
				(void)ext2fs_update(dp, NULL, NULL, 1);
			}
			goto bad;
		}
		vput(tdvp);
	} else {
		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
			panic("rename: EXDEV");
		/*
		 * Short circuit rename(foo, foo).
		 */
		if (xp->i_number == ip->i_number)
			panic("rename: same file");
		/*
		 * If the parent directory is "sticky", then the user must
		 * own the parent directory, or the destination of the rename,
		 * otherwise the destination may not be changed (except by
		 * root). This implements append-only directories.
		 */
		if ((dp->i_e2fs_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
		    tcnp->cn_cred->cr_uid != dp->i_e2fs_uid &&
		    xp->i_e2fs_uid != tcnp->cn_cred->cr_uid) {
			error = EPERM;
			goto bad;
		}
		/*
		 * Target must be empty if a directory and have no links
		 * to it. Also, ensure source and target are compatible
		 * (both directories, or both not directories).
		 */
		if ((xp->i_e2fs_mode & IFMT) == IFDIR) {
			if (!ext2fs_dirempty(xp, dp->i_number, tcnp->cn_cred) ||
				xp->i_e2fs_nlink > 2) {
				error = ENOTEMPTY;
				goto bad;
			}
			if (!doingdirectory) {
				error = ENOTDIR;
				goto bad;
			}
			cache_purge(tdvp);
		} else if (doingdirectory) {
			error = EISDIR;
			goto bad;
		}
		error = ext2fs_dirrewrite(dp, ip, tcnp);
		if (error != 0)
			goto bad;
		/*
		 * If the target directory is in the same
		 * directory as the source directory,
		 * decrement the link count on the parent
		 * of the target directory.
		 */
		 if (doingdirectory && !newparent) {
			dp->i_e2fs_nlink--;
			dp->i_flag |= IN_CHANGE;
		}
		vput(tdvp);
		/*
		 * Adjust the link count of the target to
		 * reflect the dirrewrite above.  If this is
		 * a directory it is empty and there are
		 * no links to it, so we can squash the inode and
		 * any space associated with it.  We disallowed
		 * renaming over top of a directory with links to
		 * it above, as the remaining link would point to
		 * a directory without "." or ".." entries.
		 */
		xp->i_e2fs_nlink--;
		if (doingdirectory) {
			if (--xp->i_e2fs_nlink != 0)
				panic("rename: linked directory");
			error = ext2fs_truncate(xp, (off_t)0, IO_SYNC,
			    tcnp->cn_cred);
		}
		xp->i_flag |= IN_CHANGE;
		vput(tvp);
		xp = NULL;
	}

	/*
	 * 3) Unlink the source.
	 */
	fcnp->cn_flags &= ~MODMASK;
	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
	if ((fcnp->cn_flags & SAVESTART) == 0)
		panic("ext2fs_rename: lost from startdir");
	(void) vfs_relookup(fdvp, &fvp, fcnp);
	if (fvp != NULL) {
		xp = VTOI(fvp);
		dp = VTOI(fdvp);
	} else {
		/*
		 * From name has disappeared.
		 */
		if (doingdirectory)
			panic("ext2fs_rename: lost dir entry");
		vrele(ap->a_fvp);
		return (0);
	}
	/*
	 * Ensure that the directory entry still exists and has not
	 * changed while the new name has been entered. If the source is
	 * a file then the entry may have been unlinked or renamed. In
	 * either case there is no further work to be done. If the source
	 * is a directory then it cannot have been rmdir'ed; its link
	 * count of three would cause a rmdir to fail with ENOTEMPTY.
	 * The IRENAME flag ensures that it cannot be moved by another
	 * rename.
	 */
	if (xp != ip) {
		if (doingdirectory)
			panic("ext2fs_rename: lost dir entry");
	} else {
		/*
		 * If the source is a directory with a
		 * new parent, the link count of the old
		 * parent directory must be decremented
		 * and ".." set to point to the new parent.
		 */
		if (doingdirectory && newparent) {
			dp->i_e2fs_nlink--;
			dp->i_flag |= IN_CHANGE;
			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
				sizeof (struct ext2fs_dirtemplate), (off_t)0,
				UIO_SYSSPACE, IO_NODELOCKED, 
				tcnp->cn_cred, NULL, curproc);
			if (error == 0) {
					namlen = dirbuf.dotdot_namlen;
				if (namlen != 2 ||
				    dirbuf.dotdot_name[0] != '.' ||
				    dirbuf.dotdot_name[1] != '.') {
					ufs_dirbad(xp, (doff_t)12,
					    "ext2fs_rename: mangled dir");
				} else {
					dirbuf.dotdot_ino = h2fs32(newparent);
					(void) vn_rdwr(UIO_WRITE, fvp,
					    (caddr_t)&dirbuf,
					    sizeof (struct dirtemplate),
					    (off_t)0, UIO_SYSSPACE,
					    IO_NODELOCKED|IO_SYNC,
					    tcnp->cn_cred, NULL, curproc);
					cache_purge(fdvp);
				}
			}
		}
		error = ext2fs_dirremove(fdvp, fcnp);
		if (!error) {
			xp->i_e2fs_nlink--;
			xp->i_flag |= IN_CHANGE;
		}
		xp->i_flag &= ~IN_RENAME;
	}
	if (dp)
		vput(fdvp);
	if (xp)
		vput(fvp);
	vrele(ap->a_fvp);
	return (error);

bad:
	if (xp)
		vput(ITOV(xp));
	vput(ITOV(dp));
out:
	if (doingdirectory)
		ip->i_flag &= ~IN_RENAME;
	if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
		ip->i_e2fs_nlink--;
		ip->i_flag |= IN_CHANGE;
		vput(fvp);
	} else
		vrele(fvp);
	return (error);
}
示例#23
0
文件: osi_file.c 项目: hwr/openafs
void *
osi_UfsOpen(afs_dcache_id_t *ainode)
{
#ifdef AFS_CACHE_VNODE_PATH
    struct vnode *vp;
#else
    struct inode *ip;
#endif
    struct osi_file *afile = NULL;
    afs_int32 code = 0;
    int dummy;
#ifdef AFS_CACHE_VNODE_PATH
    char namebuf[1024];
    struct pathname lookpn;
#endif
    struct osi_stat tstat;
    afile = osi_AllocSmallSpace(sizeof(struct osi_file));
    AFS_GUNLOCK();

/*
 * AFS_CACHE_VNODE_PATH can be used with any file system, including ZFS or tmpfs.
 * The ainode is not an inode number but a path.
 */
#ifdef AFS_CACHE_VNODE_PATH
	/* Can not use vn_open or lookupname, they use user's CRED() 
	 * We need to run as root So must use low level lookuppnvp
	 * assume fname starts with /
	 */

	code = pn_get_buf(ainode->ufs, AFS_UIOSYS, &lookpn, namebuf, sizeof(namebuf));
    if (code != 0) 
        osi_Panic("UfsOpen: pn_get_buf failed %ld %s", code, ainode->ufs);
 
	VN_HOLD(rootdir); /* released in loopuppnvp */
	code = lookuppnvp(&lookpn, NULL, FOLLOW, NULL, &vp, 
           rootdir, rootdir, afs_osi_credp);
    if (code != 0)  
        osi_Panic("UfsOpen: lookuppnvp failed %ld %s", code, ainode->ufs);
	
#ifdef AFS_SUN511_ENV
    code = VOP_OPEN(&vp, FREAD|FWRITE, afs_osi_credp, NULL);
#else
    code = VOP_OPEN(&vp, FREAD|FWRITE, afs_osi_credp);
#endif

    if (code != 0)
        osi_Panic("UfsOpen: VOP_OPEN failed %ld %s", code, ainode->ufs);

#else
    code =
	igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, ainode->ufs, &ip,
		  CRED(), &dummy);
#endif
    AFS_GLOCK();
    if (code) {
	osi_FreeSmallSpace(afile);
	osi_Panic("UfsOpen: igetinode failed %ld %s", code, ainode->ufs);
    }
#ifdef AFS_CACHE_VNODE_PATH
    afile->vnode = vp;
    code = afs_osi_Stat(afile, &tstat);
    afile->size = tstat.size;
#else
    afile->vnode = ITOV(ip);
    afile->size = VTOI(afile->vnode)->i_size;
#endif
    afile->offset = 0;
    afile->proc = (int (*)())0;
    return (void *)afile;
}
示例#24
0
/*
 * Rmdir system call.
 */
int
ext2fs_rmdir(void *v)
{
	struct vop_rmdir_args *ap = v;
	struct vnode *vp = ap->a_vp;
	struct vnode *dvp = ap->a_dvp;
	struct componentname *cnp = ap->a_cnp;
	struct inode *ip, *dp;
	int error;

	ip = VTOI(vp);
	dp = VTOI(dvp);
	/*
	 * No rmdir "." please.
	 */
	if (dp == ip) {
		vrele(dvp);
		vput(vp);
		return (EINVAL);
	}
	/*
	 * Verify the directory is empty (and valid).
	 * (Rmdir ".." won't be valid since
	 *  ".." will contain a reference to
	 *  the current directory and thus be
	 *  non-empty.)
	 */
	error = 0;
	if (ip->i_e2fs_nlink != 2 ||
	    !ext2fs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
		error = ENOTEMPTY;
		goto out;
	}
	if ((dp->i_e2fs_flags & EXT2_APPEND) ||
				 (ip->i_e2fs_flags & (EXT2_IMMUTABLE | EXT2_APPEND))) {
		error = EPERM;
		goto out;
	}
	/*
	 * Delete reference to directory before purging
	 * inode.  If we crash in between, the directory
	 * will be reattached to lost+found,
	 */
	error = ext2fs_dirremove(dvp, cnp);
	if (error != 0)
		goto out;
	dp->i_e2fs_nlink--;
	dp->i_flag |= IN_CHANGE;
	cache_purge(dvp);
	vput(dvp);
	dvp = NULL;
	/*
	 * Truncate inode.  The only stuff left
	 * in the directory is "." and "..".  The
	 * "." reference is inconsequential since
	 * we're quashing it.  The ".." reference
	 * has already been adjusted above.  We've
	 * removed the "." reference and the reference
	 * in the parent directory, but there may be
	 * other hard links so decrement by 2 and
	 * worry about them later.
	 */
	ip->i_e2fs_nlink -= 2;
	error = ext2fs_truncate(ip, (off_t)0, IO_SYNC, cnp->cn_cred);
	cache_purge(ITOV(ip));
out:
	if (dvp)
		vput(dvp);
	vput(vp);
	return (error);
}
示例#25
0
static int
ext2_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn,
    daddr_t lastbn, int level, e4fs_daddr_t *countp)
{
	struct buf *bp;
	struct m_ext2fs *fs = ip->i_e2fs;
	struct vnode *vp;
	e2fs_daddr_t *bap, *copy;
	int i, nblocks, error = 0, allerror = 0;
	e2fs_lbn_t nb, nlbn, last;
	e4fs_daddr_t blkcount, factor, blocksreleased = 0;

	/*
	 * Calculate index in current block of last
	 * block to be kept.  -1 indicates the entire
	 * block so we need not calculate the index.
	 */
	factor = 1;
	for (i = SINGLE; i < level; i++)
		factor *= NINDIR(fs);
	last = lastbn;
	if (lastbn > 0)
		last /= factor;
	nblocks = btodb(fs->e2fs_bsize);
	/*
	 * Get buffer of block pointers, zero those entries corresponding
	 * to blocks to be free'd, and update on disk copy first.  Since
	 * double(triple) indirect before single(double) indirect, calls
	 * to bmap on these blocks will fail.  However, we already have
	 * the on disk address, so we have to set the b_blkno field
	 * explicitly instead of letting bread do everything for us.
	 */
	vp = ITOV(ip);
	bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0, 0);
	if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
		bp->b_iocmd = BIO_READ;
		if (bp->b_bcount > bp->b_bufsize)
			panic("ext2_indirtrunc: bad buffer size");
		bp->b_blkno = dbn;
		vfs_busy_pages(bp, 0);
		bp->b_iooffset = dbtob(bp->b_blkno);
		bstrategy(bp);
		error = bufwait(bp);
	}
	if (error) {
		brelse(bp);
		*countp = 0;
		return (error);
	}
	bap = (e2fs_daddr_t *)bp->b_data;
	copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK);
	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->e2fs_bsize);
	bzero((caddr_t)&bap[last + 1],
	    (NINDIR(fs) - (last + 1)) * sizeof(e2fs_daddr_t));
	if (last == -1)
		bp->b_flags |= B_INVAL;
	if (DOINGASYNC(vp)) {
		bdwrite(bp);
	} else {
		error = bwrite(bp);
		if (error)
			allerror = error;
	}
	bap = copy;

	/*
	 * Recursively free totally unused blocks.
	 */
	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
	    i--, nlbn += factor) {
		nb = bap[i];
		if (nb == 0)
			continue;
		if (level > SINGLE) {
			if ((error = ext2_indirtrunc(ip, nlbn,
			    fsbtodb(fs, nb), (int32_t)-1, level - 1, &blkcount)) != 0)
				allerror = error;
			blocksreleased += blkcount;
		}
		ext2_blkfree(ip, nb, fs->e2fs_bsize);
		blocksreleased += nblocks;
	}

	/*
	 * Recursively free last partial block.
	 */
	if (level > SINGLE && lastbn >= 0) {
		last = lastbn % factor;
		nb = bap[i];
		if (nb != 0) {
			if ((error = ext2_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
			    last, level - 1, &blkcount)) != 0)
				allerror = error;
			blocksreleased += blkcount;
		}
	}
	free(copy, M_TEMP);
	*countp = blocksreleased;
	return (allerror);
}
示例#26
0
/*
 * Find a cylinder to place a directory.
 *
 * The policy implemented by this algorithm is to allocate a
 * directory inode in the same cylinder group as its parent
 * directory, but also to reserve space for its files inodes
 * and data. Restrict the number of directories which may be
 * allocated one after another in the same cylinder group
 * without intervening allocation of files.
 *
 * If we allocate a first level directory then force allocation
 * in another cylinder group.
 *
 */
static u_long
ext2_dirpref(struct inode *pip)
{
    struct m_ext2fs *fs;
    int cg, prefcg, cgsize;
    u_int avgifree, avgbfree, avgndir, curdirsize;
    u_int minifree, minbfree, maxndir;
    u_int mincg, minndir;
    u_int dirsize, maxcontigdirs;

    mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
    fs = pip->i_e2fs;

    avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount;
    avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount;
    avgndir  = fs->e2fs_total_dir / fs->e2fs_gcount;

    /*
     * Force allocation in another cg if creating a first level dir.
     */
    ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
    if (ITOV(pip)->v_vflag & VV_ROOT) {
        prefcg = arc4random() % fs->e2fs_gcount;
        mincg = prefcg;
        minndir = fs->e2fs_ipg;
        for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
            if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
                    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
                    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
                mincg = cg;
                minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
            }
        for (cg = 0; cg < prefcg; cg++)
            if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir &&
                    fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree &&
                    fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) {
                mincg = cg;
                minndir = fs->e2fs_gd[cg].ext2bgd_ndirs;
            }

        return (mincg);
    }

    /*
     * Count various limits which used for
     * optimal allocation of a directory inode.
     */
    maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
    minifree = avgifree - avgifree / 4;
    if (minifree < 1)
        minifree = 1;
    minbfree = avgbfree - avgbfree / 4;
    if (minbfree < 1)
        minbfree = 1;
    cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
    dirsize = AVGDIRSIZE;
    curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
    if (dirsize < curdirsize)
        dirsize = curdirsize;
    maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
    maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
    if (maxcontigdirs == 0)
        maxcontigdirs = 1;

    /*
     * Limit number of dirs in one cg and reserve space for
     * regular files, but only if we have no deficit in
     * inodes or space.
     */
    prefcg = ino_to_cg(fs, pip->i_number);
    for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
        if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
                fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
                fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
            if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
                return (cg);
        }
    for (cg = 0; cg < prefcg; cg++)
        if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir &&
                fs->e2fs_gd[cg].ext2bgd_nifree >= minifree &&
                fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) {
            if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
                return (cg);
        }
    /*
     * This is a backstop when we have deficit in space.
     */
    for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
        if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
            return (cg);
    for (cg = 0; cg < prefcg; cg++)
        if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree)
            break;
    return (cg);
}
示例#27
0
/*
 * ffs_blkalloc allocates a disk block for ffs_pageout(), as a consequence
 * it does no buf_breads (that could lead to deadblock as the page may be already
 * marked busy as it is being paged out. Also important to note that we are not
 * growing the file in pageouts. So ip->i_size  cannot increase by this call
 * due to the way UBC works.  
 * This code is derived from ffs_balloc and many cases of that are  dealt
 * in ffs_balloc are not applicable here 
 * Do not call with B_CLRBUF flags as this should only be called only 
 * from pageouts
 */
ffs_blkalloc(
	struct inode *ip,
	ufs_daddr_t lbn,
	int size,
	kauth_cred_t cred,
	int flags)
{
	register struct fs *fs;
	register ufs_daddr_t nb;
	struct buf *bp, *nbp;
	struct vnode *vp = ITOV(ip);
	struct indir indirs[NIADDR + 2];
	ufs_daddr_t newb, *bap, pref;
	int deallocated, osize, nsize, num, i, error;
	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	int devBlockSize=0;
	struct mount *mp=vp->v_mount;
#if REV_ENDIAN_FS
	int rev_endian=(mp->mnt_flag & MNT_REVEND);
#endif /* REV_ENDIAN_FS */

	fs = ip->i_fs;

	if(size > fs->fs_bsize)
		panic("ffs_blkalloc: too large for allocation");

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_size);
	if (nb < NDADDR && nb < lbn) {
		panic("ffs_blkalloc():cannot extend file: i_size %d, lbn %d", ip->i_size, lbn);
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		if (nb != 0 && ip->i_size >= (lbn + 1) * fs->fs_bsize) {
		/* TBD: trivial case; the block  is already allocated */
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize > osize) {
				panic("ffs_allocblk: trying to extend a fragment");
			}
			return(0);
		} else {
			if (ip->i_size < (lbn + 1) * fs->fs_bsize)
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			ip->i_db[lbn] = newb;
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			return (0);
		}
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if (error = ufs_getlbns(vp, lbn, indirs, &num))
		return(error);

	if(num == 0) {
		panic("ffs_blkalloc: file with direct blocks only"); 
	}

	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	allocib = NULL;
	allocblk = allociblk;
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
	        if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    cred, &newb))
			return (error);
		nb = newb;
		*allocblk++ = nb;
		bp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[1].in_lbn)), fs->fs_bsize, 0, 0, BLK_META);
		buf_setblkno(bp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		buf_clear(bp);
		/*
		 * Write synchronously conditional on mount flags.
		 */
		if ((vp)->v_mount->mnt_flag & MNT_ASYNC) {
			error = 0;
			buf_bdwrite(bp);
		} else if (error = buf_bwrite(bp)) {
			goto fail;
		}
		allocib = &ip->i_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = (int)buf_meta_bread(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			buf_brelse(bp);
			goto fail;
		}
		bap = (ufs_daddr_t *)buf_dataptr(bp);
#if	REV_ENDIAN_FS
	if (rev_endian)
		nb = OSSwapInt32(bap[indirs[i].in_off]);
	else {
#endif	/* REV_ENDIAN_FS */
		nb = bap[indirs[i].in_off];
#if REV_ENDIAN_FS
	}
#endif /* REV_ENDIAN_FS */
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			buf_brelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
		if (error =
		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
			buf_brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		nbp = buf_getblk(vp, (daddr64_t)((unsigned)(indirs[i].in_lbn)), fs->fs_bsize, 0, 0, BLK_META);
		buf_setblkno(nbp, (daddr64_t)((unsigned)fsbtodb(fs, nb)));
		buf_clear(nbp);
		/*
		 * Write synchronously conditional on mount flags.
		 */
		if ((vp)->v_mount->mnt_flag & MNT_ASYNC) {
			error = 0;
			buf_bdwrite(nbp);
		} else if (error = buf_bwrite(nbp)) {
			buf_brelse(bp);
			goto fail;
		}
#if	REV_ENDIAN_FS
	if (rev_endian)
		bap[indirs[i - 1].in_off] = OSSwapInt32(nb);
	else {
#endif	/* REV_ENDIAN_FS */
		bap[indirs[i - 1].in_off] = nb;
#if	REV_ENDIAN_FS
	}
#endif	/* REV_ENDIAN_FS */
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			buf_bwrite(bp);
		} else {
			buf_bdwrite(bp);
		}
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		if (error = ffs_alloc(ip,
		    lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
			buf_brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
#if	REV_ENDIAN_FS
	if (rev_endian)
		bap[indirs[i].in_off] = OSSwapInt32(nb);
	else {
#endif	/* REV_ENDIAN_FS */
		bap[indirs[i].in_off] = nb;
#if	REV_ENDIAN_FS
	}
#endif	/* REV_ENDIAN_FS */
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			buf_bwrite(bp);
		} else {
			buf_bdwrite(bp);
		}
		return (0);
	}
	buf_brelse(bp);
	return (0);
fail:
	/*
	 * If we have failed part way through block allocation, we
	 * have to deallocate any indirect blocks that we have allocated.
	 */
	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
		ffs_blkfree(ip, *blkp, fs->fs_bsize);
		deallocated += fs->fs_bsize;
	}
	if (allocib != NULL)
		*allocib = 0;
	if (deallocated) {
	        devBlockSize = vfs_devblocksize(mp);
#if QUOTA
		/*
		 * Restore user's disk quota because allocation failed.
		 */
		(void) chkdq(ip, (int64_t)-deallocated, cred, FORCE);
#endif /* QUOTA */
		ip->i_blocks -= btodb(deallocated, devBlockSize);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	return (error);
}
示例#28
0
/*
 * Allocate a block in the file system.
 *
 * this takes the framework from ffs_alloc. To implement the
 * actual allocation, it calls ext2_new_block, the ported version
 * of the same Linux routine.
 *
 * we note that this is always called in connection with ext2_blkpref
 *
 * preallocation is done as Linux does it
 */
int
ext2_alloc(struct inode *ip, daddr_t lbn, daddr_t bpref, int size,
	   struct ucred *cred, daddr_t *bnp)
{
	struct ext2_sb_info *fs;
	daddr_t bno;
#if QUOTA
	int error;
#endif

	*bnp = 0;
	fs = ip->i_e2fs;
#if DIAGNOSTIC
	if ((u_int)size > fs->s_blocksize || blkoff(fs, size) != 0) {
		kprintf("dev = %s, bsize = %lu, size = %d, fs = %s\n",
		    devtoname(ip->i_dev), fs->s_blocksize, size, fs->fs_fsmnt);
		panic("ext2_alloc: bad size");
	}
	if (cred == NOCRED)
		panic("ext2_alloc: missing credential");
#endif /* DIAGNOSTIC */
	if (size == fs->s_blocksize && fs->s_es->s_free_blocks_count == 0)
		goto nospace;
	if (cred->cr_uid != 0 &&
		fs->s_es->s_free_blocks_count < fs->s_es->s_r_blocks_count)
		goto nospace;
#if QUOTA
	if ((error = ext2_chkdq(ip, (long)btodb(size), cred, 0)) != 0)
		return (error);
#endif
	if (bpref >= fs->s_es->s_blocks_count)
		bpref = 0;
	/* call the Linux code */
#ifdef EXT2_PREALLOCATE
	/* To have a preallocation hit, we must
	 * - have at least one block preallocated
	 * - and our preferred block must have that block number or one below
	 */
        if (ip->i_prealloc_count &&
            (bpref == ip->i_prealloc_block ||
             bpref + 1 == ip->i_prealloc_block))
        {
                bno = ip->i_prealloc_block++;
                ip->i_prealloc_count--;
                /* ext2_debug ("preallocation hit (%lu/%lu).\n",
                            ++alloc_hits, ++alloc_attempts); */

		/* Linux gets, clears, and releases the buffer at this
		   point - we don't have to that; we leave it to the caller
		 */
        } else {
                ext2_discard_prealloc (ip);
                /* ext2_debug ("preallocation miss (%lu/%lu).\n",
                            alloc_hits, ++alloc_attempts); */
                if (S_ISREG(ip->i_mode))
                        bno = ext2_new_block
                                (ITOV(ip)->v_mount, bpref,
                                 &ip->i_prealloc_count,
                                 &ip->i_prealloc_block);
                else
			bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount,
					bpref, 0, 0);
        }
#else
	bno = (daddr_t)ext2_new_block(ITOV(ip)->v_mount, bpref, 0, 0);
#endif

	if (bno > 0) {
		/* set next_alloc fields as done in block_getblk */
		ip->i_next_alloc_block = lbn;
		ip->i_next_alloc_goal = bno;

		ip->i_blocks += btodb(size);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*bnp = bno;
		return (0);
	}
#if QUOTA
	/*
	 * Restore user's disk quota because allocation failed.
	 */
	ext2_chkdq(ip, (long)-btodb(size), cred, FORCE);
#endif
nospace:
	ext2_fserr(fs, cred->cr_uid, "file system full");
	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
	return (ENOSPC);
}
示例#29
0
/*
 * this functino has been reduced to the actual 'find the inode number' part
 */
ino_t
ext2_new_inode(const struct inode *dir, int mode)
{
	struct ext2_sb_info * sb;
	struct buffer_head * bh;
	struct buffer_head * bh2;
	int i, j, avefreei;
	int bitmap_nr;
	struct ext2_group_desc * gdp;
	struct ext2_group_desc * tmp;
	struct ext2_super_block * es;

	if (!dir)
		return 0;
	sb = dir->i_e2fs;

        lock_super (DEVVP(dir));
        es = sb->s_es;
repeat:
        gdp = NULL; i=0;

        if (S_ISDIR(mode)) {
		avefreei = es->s_free_inodes_count /
			sb->s_groups_count;
/* I am not yet convinced that this next bit is necessary.
		i = dir->u.ext2_i.i_block_group;
		for (j = 0; j < sb->u.ext2_sb.s_groups_count; j++) {
			tmp = get_group_desc (sb, i, &bh2);
			if ((tmp->bg_used_dirs_count << 8) <
			    tmp->bg_free_inodes_count) {
				gdp = tmp;
				break;
			}
			else
			i = ++i % sb->u.ext2_sb.s_groups_count;
		}
*/
		if (!gdp) {
			for (j = 0; j < sb->s_groups_count; j++) {
				tmp = get_group_desc(ITOV(dir)->v_mount,j,&bh2);
				if (tmp->bg_free_inodes_count &&
					tmp->bg_free_inodes_count >= avefreei) {
					if (!gdp ||
					    (tmp->bg_free_blocks_count >
					     gdp->bg_free_blocks_count)) {
						i = j;
						gdp = tmp;
					}
				}
			}
		}
	}
	else
	{
		/*
		 * Try to place the inode in its parent directory
		 */
		i = dir->i_block_group;
		tmp = get_group_desc (ITOV(dir)->v_mount, i, &bh2);
		if (tmp->bg_free_inodes_count)
			gdp = tmp;
		else
		{
			/*
			 * Use a quadratic hash to find a group with a
			 * free inode
			 */
			for (j = 1; j < sb->s_groups_count; j <<= 1) {
				i += j;
				if (i >= sb->s_groups_count)
					i -= sb->s_groups_count;
				tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2);
				if (tmp->bg_free_inodes_count) {
					gdp = tmp;
					break;
				}
			}
		}
		if (!gdp) {
			/*
			 * That failed: try linear search for a free inode
			 */
			i = dir->i_block_group + 1;
			for (j = 2; j < sb->s_groups_count; j++) {
				if (++i >= sb->s_groups_count)
					i = 0;
				tmp = get_group_desc(ITOV(dir)->v_mount,i,&bh2);
				if (tmp->bg_free_inodes_count) {
					gdp = tmp;
					break;
				}
			}
		}
	}

	if (!gdp) {
		unlock_super (DEVVP(dir));
		return 0;
	}
	bitmap_nr = load_inode_bitmap (ITOV(dir)->v_mount, i);
	bh = sb->s_inode_bitmap[bitmap_nr];
	if ((j = find_first_zero_bit ((unsigned long *) bh->b_data,
				      EXT2_INODES_PER_GROUP(sb))) <
	    EXT2_INODES_PER_GROUP(sb)) {
		if (set_bit (j, bh->b_data)) {
			kprintf ( "ext2_new_inode:"
				      "bit already set for inode %d", j);
			goto repeat;
		}
/* Linux now does the following:
		mark_buffer_dirty(bh);
		if (sb->s_flags & MS_SYNCHRONOUS) {
			ll_rw_block (WRITE, 1, &bh);
			wait_on_buffer (bh);
		}
*/
		mark_buffer_dirty(bh);
	} else {
		if (gdp->bg_free_inodes_count != 0) {
			kprintf ( "ext2_new_inode:"
				    "Free inodes count corrupted in group %d",
				    i);
			unlock_super (DEVVP(dir));
			return 0;
		}
		goto repeat;
	}
	j += i * EXT2_INODES_PER_GROUP(sb) + 1;
	if (j < EXT2_FIRST_INO(sb) || j > es->s_inodes_count) {
		kprintf ( "ext2_new_inode:"
			    "reserved inode or inode > inodes count - "
			    "block_group = %d,inode=%d", i, j);
		unlock_super (DEVVP(dir));
		return 0;
	}
	gdp->bg_free_inodes_count--;
	if (S_ISDIR(mode))
		gdp->bg_used_dirs_count++;
	mark_buffer_dirty(bh2);
	es->s_free_inodes_count--;
	/* mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); */
	sb->s_dirt = 1;
	unlock_super (DEVVP(dir));
	return j;
}
示例#30
0
int
ud_dircheckforname(struct ud_inode *tdp,
	char *namep, int32_t namelen, struct slot *slotp,
	struct ud_inode **ipp, uint8_t *buf, struct cred *cr)
{
	struct udf_vfs *udf_vfsp;
	uint32_t dirsize, offset;
	struct fbuf *fbp;
	struct file_id *fid;
	int32_t sz, error = 0, sz_req, matched = 0;
	uint8_t *nm;

	uint8_t *dname;
	int32_t id_len;

	ud_printf("ud_dircheckforname\n");

	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
	fbp = NULL;

	dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);

	udf_vfsp = tdp->i_udf;

	offset = 0;
	dirsize = tdp->i_size;

	if (slotp->status != FOUND) {
		int32_t temp;

		temp = 1024; /* set to size of dname allocated above */
		if ((error = ud_compress(namelen, &temp,
				(uint8_t *)namep, dname)) != 0) {
			goto end;
		}
		sz_req = F_LEN + temp;
		sz_req  = (sz_req + 3) & ~3;
	}

	while (offset < dirsize) {
		if ((error = ud_get_next_fid(tdp, &fbp,
				offset, &fid, &nm, buf)) != 0) {
			break;
		}
		if ((error = ud_uncompress(fid->fid_idlen,
				&id_len, nm, dname)) != 0) {
			break;
		}
		if ((fid->fid_flags & FID_DELETED) == 0) {
			/* Check for name match */
			if (((namelen == id_len) &&
				(strncmp(namep, (caddr_t)dname, namelen) ==
							0)) ||
				((fid->fid_flags & FID_PARENT) &&
				(namep[0] == '.' &&
					(namelen == 1 ||
					(namelen == 2 && namep[1] == '.'))))) {

				tdp->i_diroff = offset;
				if ((fid->fid_flags & FID_PARENT) &&
					(namelen == 1) && (namep[0] == '.')) {
					struct vnode *vp = ITOV(tdp);

					*ipp = tdp;
					VN_HOLD(vp);
				} else {
					uint16_t prn;
					uint32_t loc;

					prn = SWAP_16(fid->fid_icb.lad_ext_prn);
					loc = SWAP_32(fid->fid_icb.lad_ext_loc);
					if ((error = ud_iget(tdp->i_vfs, prn,
						loc, ipp, NULL, cr)) != 0) {

						fbrelse(fbp, S_OTHER);
						goto end;
					}
				}
				slotp->status = EXIST;
				slotp->offset = offset;
				slotp->size = FID_LEN(fid);
				slotp->fbp = fbp;
				slotp->ep = fid;
				slotp->endoff = 0;
				goto end;
			}
		} else {
			/*
			 * see if we need to find an
			 * empty slot and the current slot
			 * matches
			 */
			if ((slotp->status != FOUND) ||
				(matched == 0)) {
				sz = FID_LEN(fid);
				if (sz == sz_req) {
					slotp->status = FOUND;
					slotp->offset = offset;
					slotp->size = sz;
				}
				if (matched == 0) {
					if ((namelen == id_len) &&
						(strncmp(namep, (caddr_t)dname,
						namelen) == 0)) {
						matched = 1;
						slotp->status = FOUND;
						slotp->offset = offset;
						slotp->size = sz;
					}
				}
			}
		}
		offset += FID_LEN(fid);
	}
	if (fbp) {
		fbrelse(fbp, S_OTHER);
	}
	if (slotp->status == NONE) {
		/*
		 * We didn't find a slot; the new directory entry should be put
		 * at the end of the directory.  Return an indication of where
		 * this is, and set "endoff" to zero; since we're going to have
		 * to extend the directory, we're certainly not going to
		 * trucate it.
		 */
		slotp->offset = dirsize;
		if (tdp->i_desc_type == ICB_FLAG_ONE_AD) {
			slotp->size = tdp->i_max_emb - tdp->i_size;
		} else {
			slotp->size = udf_vfsp->udf_lbsize -
				slotp->offset & udf_vfsp->udf_lbmask;
		}
		slotp->endoff = 0;
	}

	*ipp = NULL;
end:
	kmem_free((caddr_t)dname, 1024);
	return (error);
}