Пример #1
0
int
ufs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred,
    int flags)
{
	off_t neweof;	/* file size after the operation */
	off_t neweob;	/* offset next to the last block after the operation */
	off_t pagestart; /* starting offset of range covered by pgs */
	off_t eob;	/* offset next to allocated blocks */
	struct uvm_object *uobj;
	int i, delta, error, npages;
	int bshift = vp->v_mount->mnt_fs_bshift;
	int bsize = 1 << bshift;
	int ppb = MAX(bsize >> PAGE_SHIFT, 1);
	struct vm_page **pgs;
	size_t pgssize;
	UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
	UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
		    vp, off, len, vp->v_size);

	neweof = MAX(vp->v_size, off + len);
	GOP_SIZE(vp, neweof, &neweob, 0);

	error = 0;
	uobj = &vp->v_uobj;

	/*
	 * read or create pages covering the range of the allocation and
	 * keep them locked until the new block is allocated, so there
	 * will be no window where the old contents of the new block are
	 * visible to racing threads.
	 */

	pagestart = trunc_page(off) & ~(bsize - 1);
	npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT);
	pgssize = npages * sizeof(struct vm_page *);
	pgs = kmem_zalloc(pgssize, KM_SLEEP);

	mutex_enter(&uobj->vmobjlock);
	error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
	    VM_PROT_WRITE, 0,
	    PGO_SYNCIO|PGO_PASTEOF|PGO_NOBLOCKALLOC|PGO_NOTIMESTAMP);
	if (error) {
		goto out;
	}
	mutex_enter(&uobj->vmobjlock);
	mutex_enter(&uvm_pageqlock);
	for (i = 0; i < npages; i++) {
		UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
		pgs[i]->flags &= ~PG_CLEAN;
		uvm_pageactivate(pgs[i]);
	}
	mutex_exit(&uvm_pageqlock);
	mutex_exit(&uobj->vmobjlock);

	/*
	 * adjust off to be block-aligned.
	 */

	delta = off & (bsize - 1);
	off -= delta;
	len += delta;

	/*
	 * now allocate the range.
	 */

	genfs_node_wrlock(vp);
	error = GOP_ALLOC(vp, off, len, flags, cred);
	genfs_node_unlock(vp);

	/*
	 * clear PG_RDONLY on any pages we are holding
	 * (since they now have backing store) and unbusy them.
	 */

	GOP_SIZE(vp, off + len, &eob, 0);
	mutex_enter(&uobj->vmobjlock);
	for (i = 0; i < npages; i++) {
		if (error) {
			pgs[i]->flags |= PG_RELEASED;
		} else if (off <= pagestart + (i << PAGE_SHIFT) &&
		    pagestart + ((i + 1) << PAGE_SHIFT) <= eob) {
			pgs[i]->flags &= ~PG_RDONLY;
		}
	}
	if (error) {
		mutex_enter(&uvm_pageqlock);
		uvm_page_unbusy(pgs, npages);
		mutex_exit(&uvm_pageqlock);
	} else {
		uvm_page_unbusy(pgs, npages);
	}
	mutex_exit(&uobj->vmobjlock);

 out:
 	kmem_free(pgs, pgssize);
	return error;
}
Пример #2
0
/*
 * miscfs/genfs getpages routine.  This is a fair bit simpler than the
 * kernel counterpart since we're not being executed from a fault handler
 * and generally don't need to care about PGO_LOCKED or other cruft.
 * We do, however, need to care about page locking and we keep trying until
 * we get all the pages within the range.  The object locking protocol
 * is the same as for the kernel: enter with the object lock held,
 * return with it released.
 */
int
genfs_getpages(void *v)
{
	struct vop_getpages_args /* {
		struct vnode *a_vp;
		voff_t a_offset;
		struct vm_page **a_m;
		int *a_count;
		int a_centeridx;
		vm_prot_t a_access_type;
		int a_advice;
		int a_flags;
	} */ *ap = v;
	struct vnode *vp = ap->a_vp;
	struct uvm_object *uobj = (struct uvm_object *)vp;
	struct vm_page *pg;
	voff_t curoff, endoff;
	off_t diskeof;
	size_t bufsize, remain, bufoff, xfersize;
	uint8_t *tmpbuf;
	int bshift = vp->v_mount->mnt_fs_bshift;
	int bsize = 1<<bshift;
	int count = *ap->a_count;
	int async;
	int i, error;

	/*
	 * Ignore async for now, the structure of this routine
	 * doesn't exactly allow for it ...
	 */
	async = 0;

	if (ap->a_centeridx != 0)
		panic("%s: centeridx != not supported", __func__);

	if (ap->a_access_type & VM_PROT_WRITE)
		vp->v_iflag |= VI_ONWORKLST;

	curoff = ap->a_offset & ~PAGE_MASK;
	for (i = 0; i < count; i++, curoff += PAGE_SIZE) {
 retrylookup:
		pg = uvm_pagelookup(uobj, curoff);
		if (pg == NULL)
			break;

		/* page is busy?  we need to wait until it's released */
		if (pg->flags & PG_BUSY) {
			pg->flags |= PG_WANTED;
			UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, "getpg",0);
			mutex_enter(&uobj->vmobjlock);
			goto retrylookup;
		}
		pg->flags |= PG_BUSY;
		if (pg->flags & PG_FAKE)
			break;
		ap->a_m[i] = pg;
	}

	/* got everything?  if so, just return */
	if (i == count) {
		mutex_exit(&uobj->vmobjlock);
		return 0;
	}

	/*
	 * didn't?  Ok, allocate backing pages.  Start from the first
	 * one we missed.
	 */
	for (; i < count; i++, curoff += PAGE_SIZE) {
 retrylookup2:
		pg = uvm_pagelookup(uobj, curoff);

		/* found?  busy it and be happy */
		if (pg) {
			if (pg->flags & PG_BUSY) {
				pg->flags = PG_WANTED;
				UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
				    "getpg2", 0);
				mutex_enter(&uobj->vmobjlock);
				goto retrylookup2;
			} else {
				pg->flags |= PG_BUSY;
			}

		/* not found?  make a new page */
		} else {
			pg = rumpvm_makepage(uobj, curoff);
		}
		ap->a_m[i] = pg;
	}

	/*
	 * We have done all the clerical work and have all pages busied.
	 * Release the vm object for other consumers.
	 */
	mutex_exit(&uobj->vmobjlock);

	/*
	 * Now, we have all the pages here & busy.  Transfer the range
	 * starting from the missing offset and transfer into the
	 * page buffers.
	 */
	GOP_SIZE(vp, vp->v_size, &diskeof, 0);

	/* align to boundaries */
	endoff = trunc_page(ap->a_offset) + (count << PAGE_SHIFT);
	endoff = MIN(endoff, ((vp->v_writesize+bsize-1) & ~(bsize-1)));
	curoff = ap->a_offset & ~(MAX(bsize,PAGE_SIZE)-1);
	remain = endoff - curoff;
	if (diskeof > curoff)
		remain = MIN(remain, diskeof - curoff);

	DPRINTF(("a_offset: %llx, startoff: 0x%llx, endoff 0x%llx\n",
	    (unsigned long long)ap->a_offset, (unsigned long long)curoff,
	    (unsigned long long)endoff));

	/* read everything into a buffer */
	bufsize = round_page(remain);
	tmpbuf = kmem_zalloc(bufsize, KM_SLEEP);
	for (bufoff = 0; remain; remain -= xfersize, bufoff+=xfersize) {
		struct buf *bp;
		struct vnode *devvp;
		daddr_t lbn, bn;
		int run;

		lbn = (curoff + bufoff) >> bshift;
		/* XXX: assume eof */
		error = VOP_BMAP(vp, lbn, &devvp, &bn, &run);
		if (error)
			panic("%s: VOP_BMAP & lazy bum: %d", __func__, error);

		DPRINTF(("lbn %d (off %d) -> bn %d run %d\n", (int)lbn,
		    (int)(curoff+bufoff), (int)bn, run));
		xfersize = MIN(((lbn+1+run)<<bshift)-(curoff+bufoff), remain);

		/* hole? */
		if (bn == -1) {
			memset(tmpbuf + bufoff, 0, xfersize);
			continue;
		}

		bp = getiobuf(vp, true);

		bp->b_data = tmpbuf + bufoff;
		bp->b_bcount = xfersize;
		bp->b_blkno = bn;
		bp->b_lblkno = 0;
		bp->b_flags = B_READ;
		bp->b_cflags = BC_BUSY;

		if (async) {
			bp->b_flags |= B_ASYNC;
			bp->b_iodone = uvm_aio_biodone;
		}

		VOP_STRATEGY(devvp, bp);
		if (bp->b_error)
			panic("%s: VOP_STRATEGY, lazy bum", __func__);
		
		if (!async)
			putiobuf(bp);
	}

	/* skip to beginning of pages we're interested in */
	bufoff = 0;
	while (round_page(curoff + bufoff) < trunc_page(ap->a_offset))
		bufoff += PAGE_SIZE;

	DPRINTF(("first page offset 0x%x\n", (int)(curoff + bufoff)));

	for (i = 0; i < count; i++, bufoff += PAGE_SIZE) {
		/* past our prime? */
		if (curoff + bufoff >= endoff)
			break;

		pg = uvm_pagelookup(&vp->v_uobj, curoff + bufoff);
		KASSERT(pg);
		DPRINTF(("got page %p (off 0x%x)\n", pg, (int)(curoff+bufoff)));
		if (pg->flags & PG_FAKE) {
			memcpy((void *)pg->uanon, tmpbuf+bufoff, PAGE_SIZE);
			pg->flags &= ~PG_FAKE;
			pg->flags |= PG_CLEAN;
		}
		ap->a_m[i] = pg;
	}
	*ap->a_count = i;

	kmem_free(tmpbuf, bufsize);

	return 0;
}
Пример #3
0
/*
 * Write a directory entry after a call to namei, using the parameters
 * that ulfs_lookup left in nameidata and in the ulfs_lookup_results.
 *
 * DVP is the directory to be updated. It must be locked.
 * ULR is the ulfs_lookup_results structure from the final lookup step.
 * TVP is not used. (XXX: why is it here? remove it)
 * DIRP is the new directory entry contents.
 * CNP is the componentname from the final lookup step.
 * NEWDIRBP is not used and (XXX) should be removed. The previous
 * comment here said it was used by the now-removed softupdates code.
 *
 * The link count of the target inode is *not* incremented; the
 * caller does that.
 *
 * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the
 * directory entry. ulr_offset, which is the place to put the entry,
 * should be on a block boundary (and should be at the end of the
 * directory AFAIK) and a fresh block is allocated to put the new
 * directory entry in.
 *
 * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert
 * the entry into. This slot ranges from ulr_offset to ulr_offset +
 * ulr_count. However, this slot may already be partially populated
 * requiring compaction. See notes below.
 *
 * Furthermore, if ulr_count is not zero and ulr_endoff is not the
 * same as i_size, the directory is truncated to size ulr_endoff.
 */
int
ulfs_direnter(struct vnode *dvp, const struct ulfs_lookup_results *ulr,
    struct vnode *tvp, struct lfs_direct *dirp,
    struct componentname *cnp, struct buf *newdirbp)
{
	kauth_cred_t cr;
	int newentrysize;
	struct inode *dp;
	struct buf *bp;
	u_int dsize;
	struct lfs_direct *ep, *nep;
	int error, ret, lfs_blkoff, loc, spacefree;
	char *dirbuf;
	struct timespec ts;
	struct ulfsmount *ump = VFSTOULFS(dvp->v_mount);
	struct lfs *fs = ump->um_lfs;
	const int needswap = ULFS_MPNEEDSWAP(fs);
	int dirblksiz = fs->um_dirblksiz;

	error = 0;
	cr = cnp->cn_cred;

	dp = VTOI(dvp);
	newentrysize = LFS_DIRSIZ(0, dirp, 0);

	if (ulr->ulr_count == 0) {
		/*
		 * If ulr_count is 0, then namei could find no
		 * space in the directory. Here, ulr_offset will
		 * be on a directory block boundary and we will write the
		 * new entry into a fresh block.
		 */
		if (ulr->ulr_offset & (dirblksiz - 1))
			panic("ulfs_direnter: newblk");
		if ((error = lfs_balloc(dvp, (off_t)ulr->ulr_offset, dirblksiz,
		    cr, B_CLRBUF | B_SYNC, &bp)) != 0) {
			return (error);
		}
		dp->i_size = ulr->ulr_offset + dirblksiz;
		DIP_ASSIGN(dp, size, dp->i_size);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
		uvm_vnp_setsize(dvp, dp->i_size);
		dirp->d_reclen = ulfs_rw16(dirblksiz, needswap);
		dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap);
		if (FSFMT(dvp)) {
#if (BYTE_ORDER == LITTLE_ENDIAN)
			if (needswap == 0) {
#else
			if (needswap != 0) {
#endif
				u_char tmp = dirp->d_namlen;
				dirp->d_namlen = dirp->d_type;
				dirp->d_type = tmp;
			}
		}
		lfs_blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1);
		memcpy((char *)bp->b_data + lfs_blkoff, dirp, newentrysize);
#ifdef LFS_DIRHASH
		if (dp->i_dirhash != NULL) {
			ulfsdirhash_newblk(dp, ulr->ulr_offset);
			ulfsdirhash_add(dp, dirp, ulr->ulr_offset);
			ulfsdirhash_checkblock(dp, (char *)bp->b_data + lfs_blkoff,
			    ulr->ulr_offset);
		}
#endif
		error = VOP_BWRITE(bp->b_vp, bp);
		vfs_timestamp(&ts);
		ret = lfs_update(dvp, &ts, &ts, UPDATE_DIROP);
		if (error == 0)
			return (ret);
		return (error);
	}

	/*
	 * If ulr_count is non-zero, then namei found space for the new
	 * entry in the range ulr_offset to ulr_offset + ulr_count
	 * in the directory. To use this space, we may have to compact
	 * the entries located there, by copying them together towards the
	 * beginning of the block, leaving the free space in one usable
	 * chunk at the end.
	 */

	/*
	 * Increase size of directory if entry eats into new space.
	 * This should never push the size past a new multiple of
	 * DIRBLKSIZ.
	 *
	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
	 */
	if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) {
#ifdef DIAGNOSTIC
		printf("ulfs_direnter: reached 4.2-only block, "
		       "not supposed to happen\n");
#endif
		dp->i_size = ulr->ulr_offset + ulr->ulr_count;
		DIP_ASSIGN(dp, size, dp->i_size);
		dp->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Get the block containing the space for the new directory entry.
	 */
	error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true);
	if (error) {
		return (error);
	}
	/*
	 * Find space for the new entry. In the simple case, the entry at
	 * offset base will have the space. If it does not, then namei
	 * arranged that compacting the region ulr_offset to
	 * ulr_offset + ulr_count would yield the space.
	 */
	ep = (struct lfs_direct *)dirbuf;
	dsize = (ep->d_ino != 0) ? LFS_DIRSIZ(FSFMT(dvp), ep, needswap) : 0;
	spacefree = ulfs_rw16(ep->d_reclen, needswap) - dsize;
	for (loc = ulfs_rw16(ep->d_reclen, needswap); loc < ulr->ulr_count; ) {
		uint16_t reclen;

		nep = (struct lfs_direct *)(dirbuf + loc);

		/* Trim the existing slot (NB: dsize may be zero). */
		ep->d_reclen = ulfs_rw16(dsize, needswap);
		ep = (struct lfs_direct *)((char *)ep + dsize);

		reclen = ulfs_rw16(nep->d_reclen, needswap);
		loc += reclen;
		if (nep->d_ino == 0) {
			/*
			 * A mid-block unused entry. Such entries are
			 * never created by the kernel, but fsck_ffs
			 * can create them (and it doesn't fix them).
			 *
			 * Add up the free space, and initialise the
			 * relocated entry since we don't memcpy it.
			 */
			spacefree += reclen;
			ep->d_ino = 0;
			dsize = 0;
			continue;
		}
		dsize = LFS_DIRSIZ(FSFMT(dvp), nep, needswap);
		spacefree += reclen - dsize;
#ifdef LFS_DIRHASH
		if (dp->i_dirhash != NULL)
			ulfsdirhash_move(dp, nep,
			    ulr->ulr_offset + ((char *)nep - dirbuf),
			    ulr->ulr_offset + ((char *)ep - dirbuf));
#endif
		memcpy((void *)ep, (void *)nep, dsize);
	}
	/*
	 * Here, `ep' points to a directory entry containing `dsize' in-use
	 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
	 * then the entry is completely unused (dsize == 0). The value
	 * of ep->d_reclen is always indeterminate.
	 *
	 * Update the pointer fields in the previous entry (if any),
	 * copy in the new entry, and write out the block.
	 */
	if (ep->d_ino == 0 ||
	    (ulfs_rw32(ep->d_ino, needswap) == ULFS_WINO &&
	     memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
		if (spacefree + dsize < newentrysize)
			panic("ulfs_direnter: compact1");
		dirp->d_reclen = spacefree + dsize;
	} else {
		if (spacefree < newentrysize)
			panic("ulfs_direnter: compact2");
		dirp->d_reclen = spacefree;
		ep->d_reclen = ulfs_rw16(dsize, needswap);
		ep = (struct lfs_direct *)((char *)ep + dsize);
	}
	dirp->d_reclen = ulfs_rw16(dirp->d_reclen, needswap);
	dirp->d_ino = ulfs_rw32(dirp->d_ino, needswap);
	if (FSFMT(dvp)) {
#if (BYTE_ORDER == LITTLE_ENDIAN)
		if (needswap == 0) {
#else
		if (needswap != 0) {
#endif
			u_char tmp = dirp->d_namlen;
			dirp->d_namlen = dirp->d_type;
			dirp->d_type = tmp;
		}
	}
#ifdef LFS_DIRHASH
	if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
	    dirp->d_reclen == spacefree))
		ulfsdirhash_add(dp, dirp, ulr->ulr_offset + ((char *)ep - dirbuf));
#endif
	memcpy((void *)ep, (void *)dirp, (u_int)newentrysize);
#ifdef LFS_DIRHASH
	if (dp->i_dirhash != NULL)
		ulfsdirhash_checkblock(dp, dirbuf -
		    (ulr->ulr_offset & (dirblksiz - 1)),
		    ulr->ulr_offset & ~(dirblksiz - 1));
#endif
	error = VOP_BWRITE(bp->b_vp, bp);
	dp->i_flag |= IN_CHANGE | IN_UPDATE;
	/*
	 * If all went well, and the directory can be shortened, proceed
	 * with the truncation. Note that we have to unlock the inode for
	 * the entry that we just entered, as the truncation may need to
	 * lock other inodes which can lead to deadlock if we also hold a
	 * lock on the newly entered node.
	 */
	if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) {
#ifdef LFS_DIRHASH
		if (dp->i_dirhash != NULL)
			ulfsdirhash_dirtrunc(dp, ulr->ulr_endoff);
#endif
		(void) lfs_truncate(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr);
	}
	return (error);
}

/*
 * Remove a directory entry after a call to namei, using the
 * parameters that ulfs_lookup left in nameidata and in the
 * ulfs_lookup_results.
 *
 * DVP is the directory to be updated. It must be locked.
 * ULR is the ulfs_lookup_results structure from the final lookup step.
 * IP, if not null, is the inode being unlinked.
 * FLAGS may contain DOWHITEOUT.
 * ISRMDIR is not used and (XXX) should be removed.
 *
 * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout
 * instead of being cleared.
 *
 * ulr->ulr_offset contains the position of the directory entry
 * to be removed.
 *
 * ulr->ulr_reclen contains the size of the directory entry to be
 * removed.
 *
 * ulr->ulr_count contains the size of the *previous* directory
 * entry. This allows finding it, for free space management. If
 * ulr_count is 0, the target entry is at the beginning of the
 * directory. (Does this ever happen? The first entry should be ".",
 * which should only be removed at rmdir time. Does rmdir come here
 * to clear out the "." and ".." entries? Perhaps, but I doubt it.)
 *
 * The space is marked free by adding it to the record length (not
 * name length) of the preceding entry. If the first entry becomes
 * free, it is marked free by setting the inode number to 0.
 *
 * The link count of IP is decremented. Note that this is not the
 * inverse behavior of ulfs_direnter, which does not adjust link
 * counts. Sigh.
 */
int
ulfs_dirremove(struct vnode *dvp, const struct ulfs_lookup_results *ulr,
	      struct inode *ip, int flags, int isrmdir)
{
	struct inode *dp = VTOI(dvp);
	struct lfs_direct *ep;
	struct buf *bp;
	int error;
	const int needswap = ULFS_MPNEEDSWAP(dp->i_lfs);

	if (flags & DOWHITEOUT) {
		/*
		 * Whiteout entry: set d_ino to ULFS_WINO.
		 */
		error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep,
				     &bp, true);
		if (error)
			return (error);
		ep->d_ino = ulfs_rw32(ULFS_WINO, needswap);
		ep->d_type = LFS_DT_WHT;
		goto out;
	}

	if ((error = ulfs_blkatoff(dvp,
	    (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0)
		return (error);

#ifdef LFS_DIRHASH
	/*
	 * Remove the dirhash entry. This is complicated by the fact
	 * that `ep' is the previous entry when ulr_count != 0.
	 */
	if (dp->i_dirhash != NULL)
		ulfsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep :
		   (struct lfs_direct *)((char *)ep +
		   ulfs_rw16(ep->d_reclen, needswap)), ulr->ulr_offset);
#endif

	if (ulr->ulr_count == 0) {
		/*
		 * First entry in block: set d_ino to zero.
		 */
		ep->d_ino = 0;
	} else {
		/*
		 * Collapse new free space into previous entry.
		 */
		ep->d_reclen =
		    ulfs_rw16(ulfs_rw16(ep->d_reclen, needswap) + ulr->ulr_reclen,
			needswap);
	}

#ifdef LFS_DIRHASH
	if (dp->i_dirhash != NULL) {
		int dirblksiz = ip->i_lfs->um_dirblksiz;
		ulfsdirhash_checkblock(dp, (char *)ep -
		    ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)),
		    ulr->ulr_offset & ~(dirblksiz - 1));
	}
#endif

out:
	if (ip) {
		ip->i_nlink--;
		DIP_ASSIGN(ip, nlink, ip->i_nlink);
		ip->i_flag |= IN_CHANGE;
	}
	/*
	 * XXX did it ever occur to anyone that it might be a good
	 * idea to restore ip->i_nlink if this fails? Or something?
	 * Currently on error return from this function the state of
	 * ip->i_nlink depends on what happened, and callers
	 * definitely do not take this into account.
	 */
	error = VOP_BWRITE(bp->b_vp, bp);
	dp->i_flag |= IN_CHANGE | IN_UPDATE;
	/*
	 * If the last named reference to a snapshot goes away,
	 * drop its snapshot reference so that it will be reclaimed
	 * when last open reference goes away.
	 */
	if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 &&
	    ip->i_nlink == 0)
		ulfs_snapgone(ip);
	return (error);
}

/*
 * Rewrite an existing directory entry to point at the inode supplied.
 *
 * DP is the directory to update.
 * OFFSET is the position of the entry in question. It may come
 * from ulr_offset of a ulfs_lookup_results.
 * OIP is the old inode the directory previously pointed to.
 * NEWINUM is the number of the new inode.
 * NEWTYPE is the new value for the type field of the directory entry.
 * (This is ignored if the fs doesn't support that.)
 * ISRMDIR is not used and (XXX) should be removed.
 * IFLAGS are added to DP's inode flags.
 *
 * The link count of OIP is decremented. Note that the link count of
 * the new inode is *not* incremented. Yay for symmetry.
 */
int
ulfs_dirrewrite(struct inode *dp, off_t offset,
    struct inode *oip, ino_t newinum, int newtype,
    int isrmdir, int iflags)
{
	struct buf *bp;
	struct lfs_direct *ep;
	struct vnode *vdp = ITOV(dp);
	int error;

	error = ulfs_blkatoff(vdp, offset, (void *)&ep, &bp, true);
	if (error)
		return (error);
	ep->d_ino = ulfs_rw32(newinum, ULFS_IPNEEDSWAP(dp));
	if (!FSFMT(vdp))
		ep->d_type = newtype;
	oip->i_nlink--;
	DIP_ASSIGN(oip, nlink, oip->i_nlink);
	oip->i_flag |= IN_CHANGE;
	error = VOP_BWRITE(bp->b_vp, bp);
	dp->i_flag |= iflags;
	/*
	 * If the last named reference to a snapshot goes away,
	 * drop its snapshot reference so that it will be reclaimed
	 * when last open reference goes away.
	 */
	if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0)
		ulfs_snapgone(oip);
	return (error);
}

/*
 * Check if a directory is empty or not.
 * Inode supplied must be locked.
 *
 * Using a struct lfs_dirtemplate here is not precisely
 * what we want, but better than using a struct lfs_direct.
 *
 * NB: does not handle corrupted directories.
 */
int
ulfs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred)
{
	doff_t off;
	struct lfs_dirtemplate dbuf;
	struct lfs_direct *dp = (struct lfs_direct *)&dbuf;
	int error, namlen;
	size_t count;
	const int needswap = ULFS_IPNEEDSWAP(ip);
#define	MINDIRSIZ (sizeof (struct lfs_dirtemplate) / 2)

	for (off = 0; off < ip->i_size;
	    off += ulfs_rw16(dp->d_reclen, needswap)) {
		error = vn_rdwr(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ, off,
		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL);
		/*
		 * Since we read MINDIRSIZ, residual must
		 * be 0 unless we're at end of file.
		 */
		if (error || count != 0)
			return (0);
		/* avoid infinite loops */
		if (dp->d_reclen == 0)
			return (0);
		/* skip empty entries */
		if (dp->d_ino == 0 || ulfs_rw32(dp->d_ino, needswap) == ULFS_WINO)
			continue;
		/* accept only "." and ".." */
#if (BYTE_ORDER == LITTLE_ENDIAN)
		if (FSFMT(ITOV(ip)) && needswap == 0)
			namlen = dp->d_type;
		else
			namlen = dp->d_namlen;
#else
		if (FSFMT(ITOV(ip)) && needswap != 0)
			namlen = dp->d_type;
		else
			namlen = dp->d_namlen;
#endif
		if (namlen > 2)
			return (0);
		if (dp->d_name[0] != '.')
			return (0);
		/*
		 * At this point namlen must be 1 or 2.
		 * 1 implies ".", 2 implies ".." if second
		 * char is also "."
		 */
		if (namlen == 1 &&
		    ulfs_rw32(dp->d_ino, needswap) == ip->i_number)
			continue;
		if (dp->d_name[1] == '.' &&
		    ulfs_rw32(dp->d_ino, needswap) == parentino)
			continue;
		return (0);
	}
	return (1);
}

#define	ULFS_DIRRABLKS 0
int ulfs_dirrablks = ULFS_DIRRABLKS;

/*
 * ulfs_blkatoff: Return buffer with the contents of block "offset" from
 * the beginning of directory "vp".  If "res" is non-NULL, fill it in with
 * a pointer to the remaining space in the directory.  If the caller intends
 * to modify the buffer returned, "modify" must be true.
 */

int
ulfs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp,
    bool modify)
{
	struct inode *ip __diagused;
	struct buf *bp;
	daddr_t lbn;
	const int dirrablks = ulfs_dirrablks;
	daddr_t *blks;
	int *blksizes;
	int run, error;
	struct mount *mp = vp->v_mount;
	const int bshift = mp->mnt_fs_bshift;
	const int bsize = 1 << bshift;
	off_t eof;

	blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP);
	blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP);
	ip = VTOI(vp);
	KASSERT(vp->v_size == ip->i_size);
	GOP_SIZE(vp, vp->v_size, &eof, 0);
	lbn = offset >> bshift;

	for (run = 0; run <= dirrablks;) {
		const off_t curoff = lbn << bshift;
		const int size = MIN(eof - curoff, bsize);

		if (size == 0) {
			break;
		}
		KASSERT(curoff < eof);
		blks[run] = lbn;
		blksizes[run] = size;
		lbn++;
		run++;
		if (size != bsize) {
			break;
		}
	}
	KASSERT(run >= 1);
	error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1],
	    run - 1, NOCRED, (modify ? B_MODIFY : 0), &bp);
	if (error != 0) {
		*bpp = NULL;
		goto out;
	}
	if (res) {
		*res = (char *)bp->b_data + (offset & (bsize - 1));
	}
	*bpp = bp;

 out:
	kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t));
	kmem_free(blksizes, (1 + dirrablks) * sizeof(int));
	return error;
}
Пример #4
0
/*
 * This is a slightly strangely structured routine.  It always puts
 * all the pages for a vnode.  It starts by releasing pages which
 * are clean and simultaneously looks up the smallest offset for a
 * dirty page beloning to the object.  If there is no smallest offset,
 * all pages have been cleaned.  Otherwise, it finds a contiguous range
 * of dirty pages starting from the smallest offset and writes them out.
 * After this the scan is restarted.
 */
int
genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
	struct vm_page **busypg)
{
	char databuf[MAXPHYS];
	struct uvm_object *uobj = &vp->v_uobj;
	struct vm_page *pg, *pg_next;
	voff_t smallest;
	voff_t curoff, bufoff;
	off_t eof;
	size_t xfersize;
	int bshift = vp->v_mount->mnt_fs_bshift;
	int bsize = 1 << bshift;
#if 0
	int async = (flags & PGO_SYNCIO) == 0;
#else
	int async = 0;
#endif

 restart:
	/* check if all pages are clean */
	smallest = -1;
	for (pg = TAILQ_FIRST(&uobj->memq); pg; pg = pg_next) {
		pg_next = TAILQ_NEXT(pg, listq.queue);

		/*
		 * XXX: this is not correct at all.  But it's based on
		 * assumptions we can make when accessing the pages
		 * only through the file system and not through the
		 * virtual memory subsystem.  Well, at least I hope
		 * so ;)
		 */
		KASSERT((pg->flags & PG_BUSY) == 0);

		/* If we can just dump the page, do so */
		if (pg->flags & PG_CLEAN || flags & PGO_FREE) {
			uvm_pagefree(pg);
			continue;
		}

		if (pg->offset < smallest || smallest == -1)
			smallest = pg->offset;
	}

	/* all done? */
	if (TAILQ_EMPTY(&uobj->memq)) {
		vp->v_iflag &= ~VI_ONWORKLST;
		mutex_exit(&uobj->vmobjlock);
		return 0;
	}

	/* we need to flush */
	GOP_SIZE(vp, vp->v_writesize, &eof, 0);
	for (curoff = smallest; curoff < eof; curoff += PAGE_SIZE) {
		void *curva;

		if (curoff - smallest >= MAXPHYS)
			break;
		pg = uvm_pagelookup(uobj, curoff);
		if (pg == NULL)
			break;

		/* XXX: see comment about above KASSERT */
		KASSERT((pg->flags & PG_BUSY) == 0);

		curva = databuf + (curoff-smallest);
		memcpy(curva, (void *)pg->uanon, PAGE_SIZE);
		rumpvm_enterva((vaddr_t)curva, pg);

		pg->flags |= PG_CLEAN;
	}
	KASSERT(curoff > smallest);

	mutex_exit(&uobj->vmobjlock);

	/* then we write */
	for (bufoff = 0; bufoff < MIN(curoff-smallest,eof); bufoff+=xfersize) {
		struct buf *bp;
		struct vnode *devvp;
		daddr_t bn, lbn;
		int run, error;

		lbn = (smallest + bufoff) >> bshift;
		error = VOP_BMAP(vp, lbn, &devvp, &bn, &run);
		if (error)
			panic("%s: VOP_BMAP failed: %d", __func__, error);

		xfersize = MIN(((lbn+1+run) << bshift) - (smallest+bufoff),
		     curoff - (smallest+bufoff));

		/*
		 * We might run across blocks which aren't allocated yet.
		 * A reason might be e.g. the write operation being still
		 * in the kernel page cache while truncate has already
		 * enlarged the file.  So just ignore those ranges.
		 */
		if (bn == -1)
			continue;

		bp = getiobuf(vp, true);

		/* only write max what we are allowed to write */
		bp->b_bcount = xfersize;
		if (smallest + bufoff + xfersize > eof)
			bp->b_bcount -= (smallest+bufoff+xfersize) - eof;
		bp->b_bcount = (bp->b_bcount + DEV_BSIZE-1) & ~(DEV_BSIZE-1);

		KASSERT(bp->b_bcount > 0);
		KASSERT(smallest >= 0);

		DPRINTF(("putpages writing from %x to %x (vp size %x)\n",
		    (int)(smallest + bufoff),
		    (int)(smallest + bufoff + bp->b_bcount),
		    (int)eof));

		bp->b_bufsize = round_page(bp->b_bcount);
		bp->b_lblkno = 0;
		bp->b_blkno = bn + (((smallest+bufoff)&(bsize-1))>>DEV_BSHIFT);
		bp->b_data = databuf + bufoff;
		bp->b_flags = B_WRITE;
		bp->b_cflags |= BC_BUSY;

		if (async) {
			bp->b_flags |= B_ASYNC;
			bp->b_iodone = uvm_aio_biodone;
		}

		vp->v_numoutput++;
		VOP_STRATEGY(devvp, bp);
		if (bp->b_error)
			panic("%s: VOP_STRATEGY lazy bum %d",
			    __func__, bp->b_error);
		if (!async)
			putiobuf(bp);
	}
	rumpvm_flushva();

	mutex_enter(&uobj->vmobjlock);
	goto restart;
}
Пример #5
0
int
ulfs_balloc_range(struct vnode *vp, off_t off, off_t len, kauth_cred_t cred,
    int flags)
{
	off_t neweof;	/* file size after the operation */
	off_t neweob;	/* offset next to the last block after the operation */
	off_t pagestart; /* starting offset of range covered by pgs */
	off_t eob;	/* offset next to allocated blocks */
	struct uvm_object *uobj;
	int i, delta, error, npages;
	int bshift = vp->v_mount->mnt_fs_bshift;
	int bsize = 1 << bshift;
	int ppb = MAX(bsize >> PAGE_SHIFT, 1);
	struct vm_page **pgs;
	size_t pgssize;
	UVMHIST_FUNC("ulfs_balloc_range"); UVMHIST_CALLED(ubchist);
	UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
		    vp, off, len, vp->v_size);

	neweof = MAX(vp->v_size, off + len);
	GOP_SIZE(vp, neweof, &neweob, 0);

	error = 0;
	uobj = &vp->v_uobj;

	/*
	 * read or create pages covering the range of the allocation and
	 * keep them locked until the new block is allocated, so there
	 * will be no window where the old contents of the new block are
	 * visible to racing threads.
	 */

	pagestart = trunc_page(off) & ~(bsize - 1);
	npages = MIN(ppb, (round_page(neweob) - pagestart) >> PAGE_SHIFT);
	pgssize = npages * sizeof(struct vm_page *);
	pgs = kmem_zalloc(pgssize, KM_SLEEP);

	/*
	 * adjust off to be block-aligned.
	 */

	delta = off & (bsize - 1);
	off -= delta;
	len += delta;

	genfs_node_wrlock(vp);
	mutex_enter(uobj->vmobjlock);
	error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
	    VM_PROT_WRITE, 0, PGO_SYNCIO | PGO_PASTEOF | PGO_NOBLOCKALLOC |
	    PGO_NOTIMESTAMP | PGO_GLOCKHELD);
	if (error) {
		goto out;
	}

	/*
	 * now allocate the range.
	 */

	error = GOP_ALLOC(vp, off, len, flags, cred);
	genfs_node_unlock(vp);

	/*
	 * if the allocation succeeded, clear PG_CLEAN on all the pages
	 * and clear PG_RDONLY on any pages that are now fully backed
	 * by disk blocks.  if the allocation failed, we do not invalidate
	 * the pages since they might have already existed and been dirty,
	 * in which case we need to keep them around.  if we created the pages,
	 * they will be clean and read-only, and leaving such pages
	 * in the cache won't cause any problems.
	 */

	GOP_SIZE(vp, off + len, &eob, 0);
	mutex_enter(uobj->vmobjlock);
	mutex_enter(&uvm_pageqlock);
	for (i = 0; i < npages; i++) {
		KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
		if (!error) {
			if (off <= pagestart + (i << PAGE_SHIFT) &&
			    pagestart + ((i + 1) << PAGE_SHIFT) <= eob) {
				pgs[i]->flags &= ~PG_RDONLY;
			}
			pgs[i]->flags &= ~PG_CLEAN;
		}
		uvm_pageactivate(pgs[i]);
	}
	mutex_exit(&uvm_pageqlock);
	uvm_page_unbusy(pgs, npages);
	mutex_exit(uobj->vmobjlock);

 out:
 	kmem_free(pgs, pgssize);
	return error;
}