Example #1
0
int
ext2_bmaparray(struct vnode *vp, daddr_t bn, daddr_t *bnp, int *runp, int *runb)
{
	struct inode *ip;
	struct buf *bp;
	struct ext2mount *ump;
	struct mount *mp;
	struct vnode *devvp;
	struct indir a[NIADDR+1], *ap;
	daddr_t daddr;
	e2fs_lbn_t metalbn;
	int error, num, maxrun = 0, bsize;
	int *nump;

	ap = NULL;
	ip = VTOI(vp);
	mp = vp->v_mount;
	ump = VFSTOEXT2(mp);
	devvp = ump->um_devvp;

	bsize = EXT2_BLOCK_SIZE(ump->um_e2fs);

	if (runp) {
		maxrun = mp->mnt_iosize_max / bsize - 1;
		*runp = 0;
	}

	if (runb) {
		*runb = 0;
	}


	ap = a;
	nump = #
	error = ext2_getlbns(vp, bn, ap, nump);
	if (error)
		return (error);

	num = *nump;
	if (num == 0) {
		*bnp = blkptrtodb(ump, ip->i_db[bn]);
		if (*bnp == 0) {
			*bnp = -1;
		} else if (runp) {
			daddr_t bnb = bn;
			for (++bn; bn < NDADDR && *runp < maxrun &&
			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
			    ++bn, ++*runp);
			bn = bnb;
			if (runb && (bn > 0)) {
				for (--bn; (bn >= 0) && (*runb < maxrun) &&
					is_sequential(ump, ip->i_db[bn],
						ip->i_db[bn + 1]);
						--bn, ++*runb);
			}
		}
		return (0);
	}


	/* Get disk address out of indirect block array */
	daddr = ip->i_ib[ap->in_off];

	for (bp = NULL, ++ap; --num; ++ap) {
		/*
		 * Exit the loop if there is no disk address assigned yet and
		 * the indirect block isn't in the cache, or if we were
		 * looking for an indirect block and we've found it.
		 */

		metalbn = ap->in_lbn;
		if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn)
			break;
		/*
		 * If we get here, we've either got the block in the cache
		 * or we have a disk address for it, go fetch it.
		 */
		if (bp)
			bqrelse(bp);

		bp = getblk(vp, metalbn, bsize, 0, 0, 0);
		if ((bp->b_flags & B_CACHE) == 0) {
#ifdef INVARIANTS
			if (!daddr)
				panic("ext2_bmaparray: indirect block not in cache");
#endif
			bp->b_blkno = blkptrtodb(ump, daddr);
			bp->b_iocmd = BIO_READ;
			bp->b_flags &= ~B_INVAL;
			bp->b_ioflags &= ~BIO_ERROR;
			vfs_busy_pages(bp, 0);
			bp->b_iooffset = dbtob(bp->b_blkno);
			bstrategy(bp);
			curthread->td_ru.ru_inblock++;
			error = bufwait(bp);
			if (error) {
				brelse(bp);
				return (error);
			}
		}

		daddr = ((e2fs_daddr_t *)bp->b_data)[ap->in_off];
		if (num == 1 && daddr && runp) {
			for (bn = ap->in_off + 1;
			    bn < MNINDIR(ump) && *runp < maxrun &&
			    is_sequential(ump,
			    ((e2fs_daddr_t *)bp->b_data)[bn - 1],
			    ((e2fs_daddr_t *)bp->b_data)[bn]);
			    ++bn, ++*runp);
			bn = ap->in_off;
			if (runb && bn) {
				for (--bn; bn >= 0 && *runb < maxrun &&
					is_sequential(ump,
					((e2fs_daddr_t *)bp->b_data)[bn],
					((e2fs_daddr_t *)bp->b_data)[bn + 1]);
					--bn, ++*runb);
			}
		}
	}
	if (bp)
		bqrelse(bp);

	/*
	 * Since this is FFS independent code, we are out of scope for the
	 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they
	 * will fall in the range 1..um_seqinc, so we use that test and
	 * return a request for a zeroed out buffer if attempts are made
	 * to read a BLK_NOCOPY or BLK_SNAP block.
	 */
	if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){
		*bnp = -1;
		return (0);
	}
	*bnp = blkptrtodb(ump, daddr);
	if (*bnp == 0) {
		*bnp = -1;
	}
	return (0);
}
Example #2
0
/*
 * If blocks are contiguous on disk, use this to provide clustered
 * read ahead.  We will read as many blocks as possible sequentially
 * and then parcel them up into logical blocks in the buffer hash table.
 */
static struct buf *
cluster_rbuild(struct vnode *vp, u_quad_t filesize, daddr_t lbn,
    daddr_t blkno, long size, int run, int gbflags, struct buf *fbp)
{
	struct buf *bp, *tbp;
	daddr_t bn;
	off_t off;
	long tinc, tsize;
	int i, inc, j, k, toff;

	KASSERT(size == vp->v_mount->mnt_stat.f_iosize,
	    ("cluster_rbuild: size %ld != f_iosize %jd\n",
	    size, (intmax_t)vp->v_mount->mnt_stat.f_iosize));

	/*
	 * avoid a division
	 */
	while ((u_quad_t) size * (lbn + run) > filesize) {
		--run;
	}

	if (fbp) {
		tbp = fbp;
		tbp->b_iocmd = BIO_READ; 
	} else {
		tbp = getblk(vp, lbn, size, 0, 0, gbflags);
		if (tbp->b_flags & B_CACHE)
			return tbp;
		tbp->b_flags |= B_ASYNC | B_RAM;
		tbp->b_iocmd = BIO_READ;
	}
	tbp->b_blkno = blkno;
	if( (tbp->b_flags & B_MALLOC) ||
		((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
		return tbp;

	bp = trypbuf(&cluster_pbuf_freecnt);
	if (bp == NULL)
		return tbp;

	/*
	 * We are synthesizing a buffer out of vm_page_t's, but
	 * if the block size is not page aligned then the starting
	 * address may not be either.  Inherit the b_data offset
	 * from the original buffer.
	 */
	bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
	if ((gbflags & GB_UNMAPPED) != 0) {
		bp->b_data = unmapped_buf;
	} else {
		bp->b_data = (char *)((vm_offset_t)bp->b_data |
		    ((vm_offset_t)tbp->b_data & PAGE_MASK));
	}
	bp->b_iocmd = BIO_READ;
	bp->b_iodone = cluster_callback;
	bp->b_blkno = blkno;
	bp->b_lblkno = lbn;
	bp->b_offset = tbp->b_offset;
	KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset"));
	pbgetvp(vp, bp);

	TAILQ_INIT(&bp->b_cluster.cluster_head);

	bp->b_bcount = 0;
	bp->b_bufsize = 0;
	bp->b_npages = 0;

	inc = btodb(size);
	for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
		if (i == 0) {
			VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
			vfs_drain_busy_pages(tbp);
			vm_object_pip_add(tbp->b_bufobj->bo_object,
			    tbp->b_npages);
			for (k = 0; k < tbp->b_npages; k++)
				vm_page_sbusy(tbp->b_pages[k]);
			VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
		} else {
			if ((bp->b_npages * PAGE_SIZE) +
			    round_page(size) > vp->v_mount->mnt_iosize_max) {
				break;
			}

			tbp = getblk(vp, lbn + i, size, 0, 0, GB_LOCK_NOWAIT |
			    (gbflags & GB_UNMAPPED));

			/* Don't wait around for locked bufs. */
			if (tbp == NULL)
				break;

			/*
			 * Stop scanning if the buffer is fully valid
			 * (marked B_CACHE), or locked (may be doing a
			 * background write), or if the buffer is not
			 * VMIO backed.  The clustering code can only deal
			 * with VMIO-backed buffers.  The bo lock is not
			 * required for the BKGRDINPROG check since it
			 * can not be set without the buf lock.
			 */
			if ((tbp->b_vflags & BV_BKGRDINPROG) ||
			    (tbp->b_flags & B_CACHE) ||
			    (tbp->b_flags & B_VMIO) == 0) {
				bqrelse(tbp);
				break;
			}

			/*
			 * The buffer must be completely invalid in order to
			 * take part in the cluster.  If it is partially valid
			 * then we stop.
			 */
			off = tbp->b_offset;
			tsize = size;
			VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
			for (j = 0; tsize > 0; j++) {
				toff = off & PAGE_MASK;
				tinc = tsize;
				if (toff + tinc > PAGE_SIZE)
					tinc = PAGE_SIZE - toff;
				VM_OBJECT_ASSERT_WLOCKED(tbp->b_pages[j]->object);
				if ((tbp->b_pages[j]->valid &
				    vm_page_bits(toff, tinc)) != 0)
					break;
				if (vm_page_xbusied(tbp->b_pages[j]))
					break;
				vm_object_pip_add(tbp->b_bufobj->bo_object, 1);
				vm_page_sbusy(tbp->b_pages[j]);
				off += tinc;
				tsize -= tinc;
			}
			if (tsize > 0) {
clean_sbusy:
				vm_object_pip_add(tbp->b_bufobj->bo_object, -j);
				for (k = 0; k < j; k++)
					vm_page_sunbusy(tbp->b_pages[k]);
				VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
				bqrelse(tbp);
				break;
			}
			VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);

			/*
			 * Set a read-ahead mark as appropriate
			 */
			if ((fbp && (i == 1)) || (i == (run - 1)))
				tbp->b_flags |= B_RAM;

			/*
			 * Set the buffer up for an async read (XXX should
			 * we do this only if we do not wind up brelse()ing?).
			 * Set the block number if it isn't set, otherwise
			 * if it is make sure it matches the block number we
			 * expect.
			 */
			tbp->b_flags |= B_ASYNC;
			tbp->b_iocmd = BIO_READ;
			if (tbp->b_blkno == tbp->b_lblkno) {
				tbp->b_blkno = bn;
			} else if (tbp->b_blkno != bn) {
				VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
				goto clean_sbusy;
			}
		}
		/*
		 * XXX fbp from caller may not be B_ASYNC, but we are going
		 * to biodone() it in cluster_callback() anyway
		 */
		BUF_KERNPROC(tbp);
		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
			tbp, b_cluster.cluster_entry);
		VM_OBJECT_WLOCK(tbp->b_bufobj->bo_object);
		for (j = 0; j < tbp->b_npages; j += 1) {
			vm_page_t m;
			m = tbp->b_pages[j];
			if ((bp->b_npages == 0) ||
			    (bp->b_pages[bp->b_npages-1] != m)) {
				bp->b_pages[bp->b_npages] = m;
				bp->b_npages++;
			}
			if (m->valid == VM_PAGE_BITS_ALL)
				tbp->b_pages[j] = bogus_page;
		}
		VM_OBJECT_WUNLOCK(tbp->b_bufobj->bo_object);
		/*
		 * Don't inherit tbp->b_bufsize as it may be larger due to
		 * a non-page-aligned size.  Instead just aggregate using
		 * 'size'.
		 */
		if (tbp->b_bcount != size)
			printf("warning: tbp->b_bcount wrong %ld vs %ld\n", tbp->b_bcount, size);
		if (tbp->b_bufsize != size)
			printf("warning: tbp->b_bufsize wrong %ld vs %ld\n", tbp->b_bufsize, size);
		bp->b_bcount += size;
		bp->b_bufsize += size;
	}

	/*
	 * Fully valid pages in the cluster are already good and do not need
	 * to be re-read from disk.  Replace the page with bogus_page
	 */
	VM_OBJECT_WLOCK(bp->b_bufobj->bo_object);
	for (j = 0; j < bp->b_npages; j++) {
		VM_OBJECT_ASSERT_WLOCKED(bp->b_pages[j]->object);
		if (bp->b_pages[j]->valid == VM_PAGE_BITS_ALL)
			bp->b_pages[j] = bogus_page;
	}
	VM_OBJECT_WUNLOCK(bp->b_bufobj->bo_object);
	if (bp->b_bufsize > bp->b_kvasize)
		panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
		    bp->b_bufsize, bp->b_kvasize);

	if (buf_mapped(bp)) {
		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
		    (vm_page_t *)bp->b_pages, bp->b_npages);
	}
	return (bp);
}
Example #3
0
/*
 * Read data to a buf, including read-ahead if we find this to be beneficial.
 * cluster_read replaces bread.
 */
int
cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
    struct ucred *cred, long totread, int seqcount, int gbflags,
    struct buf **bpp)
{
	struct buf *bp, *rbp, *reqbp;
	struct bufobj *bo;
	daddr_t blkno, origblkno;
	int maxra, racluster;
	int error, ncontig;
	int i;

	error = 0;
	bo = &vp->v_bufobj;
	if (!unmapped_buf_allowed)
		gbflags &= ~GB_UNMAPPED;

	/*
	 * Try to limit the amount of read-ahead by a few
	 * ad-hoc parameters.  This needs work!!!
	 */
	racluster = vp->v_mount->mnt_iosize_max / size;
	maxra = seqcount;
	maxra = min(read_max, maxra);
	maxra = min(nbuf/8, maxra);
	if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize)
		maxra = (filesize / size) - lblkno;

	/*
	 * get the requested block
	 */
	*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags);
	if (bp == NULL)
		return (EBUSY);
	origblkno = lblkno;

	/*
	 * if it is in the cache, then check to see if the reads have been
	 * sequential.  If they have, then try some read-ahead, otherwise
	 * back-off on prospective read-aheads.
	 */
	if (bp->b_flags & B_CACHE) {
		if (!seqcount) {
			return 0;
		} else if ((bp->b_flags & B_RAM) == 0) {
			return 0;
		} else {
			bp->b_flags &= ~B_RAM;
			BO_RLOCK(bo);
			for (i = 1; i < maxra; i++) {
				/*
				 * Stop if the buffer does not exist or it
				 * is invalid (about to go away?)
				 */
				rbp = gbincore(&vp->v_bufobj, lblkno+i);
				if (rbp == NULL || (rbp->b_flags & B_INVAL))
					break;

				/*
				 * Set another read-ahead mark so we know 
				 * to check again. (If we can lock the
				 * buffer without waiting)
				 */
				if ((((i % racluster) == (racluster - 1)) ||
				    (i == (maxra - 1))) 
				    && (0 == BUF_LOCK(rbp, 
					LK_EXCLUSIVE | LK_NOWAIT, NULL))) {
					rbp->b_flags |= B_RAM;
					BUF_UNLOCK(rbp);
				}			
			}
			BO_RUNLOCK(bo);
			if (i >= maxra) {
				return 0;
			}
			lblkno += i;
		}
		reqbp = bp = NULL;
	/*
	 * If it isn't in the cache, then get a chunk from
	 * disk if sequential, otherwise just get the block.
	 */
	} else {
		off_t firstread = bp->b_offset;
		int nblks;
		long minread;

		KASSERT(bp->b_offset != NOOFFSET,
		    ("cluster_read: no buffer offset"));

		ncontig = 0;

		/*
		 * Adjust totread if needed
		 */
		minread = read_min * size;
		if (minread > totread)
			totread = minread;

		/*
		 * Compute the total number of blocks that we should read
		 * synchronously.
		 */
		if (firstread + totread > filesize)
			totread = filesize - firstread;
		nblks = howmany(totread, size);
		if (nblks > racluster)
			nblks = racluster;

		/*
		 * Now compute the number of contiguous blocks.
		 */
		if (nblks > 1) {
	    		error = VOP_BMAP(vp, lblkno, NULL,
				&blkno, &ncontig, NULL);
			/*
			 * If this failed to map just do the original block.
			 */
			if (error || blkno == -1)
				ncontig = 0;
		}

		/*
		 * If we have contiguous data available do a cluster
		 * otherwise just read the requested block.
		 */
		if (ncontig) {
			/* Account for our first block. */
			ncontig = min(ncontig + 1, nblks);
			if (ncontig < nblks)
				nblks = ncontig;
			bp = cluster_rbuild(vp, filesize, lblkno,
			    blkno, size, nblks, gbflags, bp);
			lblkno += (bp->b_bufsize / size);
		} else {
			bp->b_flags |= B_RAM;
			bp->b_iocmd = BIO_READ;
			lblkno += 1;
		}
	}

	/*
	 * handle the synchronous read so that it is available ASAP.
	 */
	if (bp) {
		if ((bp->b_flags & B_CLUSTER) == 0) {
			vfs_busy_pages(bp, 0);
		}
		bp->b_flags &= ~B_INVAL;
		bp->b_ioflags &= ~BIO_ERROR;
		if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL)
			BUF_KERNPROC(bp);
		bp->b_iooffset = dbtob(bp->b_blkno);
		bstrategy(bp);
#ifdef RACCT
		if (racct_enable) {
			PROC_LOCK(curproc);
			racct_add_buf(curproc, bp, 0);
			PROC_UNLOCK(curproc);
		}
#endif /* RACCT */
		curthread->td_ru.ru_inblock++;
	}

	/*
	 * If we have been doing sequential I/O, then do some read-ahead.
	 */
	while (lblkno < (origblkno + maxra)) {
		error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL);
		if (error)
			break;

		if (blkno == -1)
			break;

		/*
		 * We could throttle ncontig here by maxra but we might as
		 * well read the data if it is contiguous.  We're throttled
		 * by racluster anyway.
		 */
		if (ncontig) {
			ncontig = min(ncontig + 1, racluster);
			rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
			    size, ncontig, gbflags, NULL);
			lblkno += (rbp->b_bufsize / size);
			if (rbp->b_flags & B_DELWRI) {
				bqrelse(rbp);
				continue;
			}
		} else {
			rbp = getblk(vp, lblkno, size, 0, 0, gbflags);
			lblkno += 1;
			if (rbp->b_flags & B_DELWRI) {
				bqrelse(rbp);
				continue;
			}
			rbp->b_flags |= B_ASYNC | B_RAM;
			rbp->b_iocmd = BIO_READ;
			rbp->b_blkno = blkno;
		}
		if (rbp->b_flags & B_CACHE) {
			rbp->b_flags &= ~B_ASYNC;
			bqrelse(rbp);
			continue;
		}
		if ((rbp->b_flags & B_CLUSTER) == 0) {
			vfs_busy_pages(rbp, 0);
		}
		rbp->b_flags &= ~B_INVAL;
		rbp->b_ioflags &= ~BIO_ERROR;
		if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL)
			BUF_KERNPROC(rbp);
		rbp->b_iooffset = dbtob(rbp->b_blkno);
		bstrategy(rbp);
#ifdef RACCT
		if (racct_enable) {
			PROC_LOCK(curproc);
			racct_add_buf(curproc, rbp, 0);
			PROC_UNLOCK(curproc);
		}
#endif /* RACCT */
		curthread->td_ru.ru_inblock++;
	}

	if (reqbp) {
		/*
		 * Like bread, always brelse() the buffer when
		 * returning an error.
		 */
		error = bufwait(reqbp);
		if (error != 0) {
			brelse(reqbp);
			*bpp = NULL;
		}
	}
	return (error);
}
Example #4
0
static int
tmpfs_read (struct vop_read_args *ap)
{
	struct buf *bp;
	struct vnode *vp = ap->a_vp;
	struct uio *uio = ap->a_uio;
	struct tmpfs_node *node;
	off_t base_offset;
	size_t offset;
	size_t len;
	int error;

	error = 0;
	if (uio->uio_resid == 0) {
		return error;
	}

	node = VP_TO_TMPFS_NODE(vp);

	if (uio->uio_offset < 0)
		return (EINVAL);
	if (vp->v_type != VREG)
		return (EINVAL);

	while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
		/*
		 * Use buffer cache I/O (via tmpfs_strategy)
		 */
		offset = (size_t)uio->uio_offset & BMASK;
		base_offset = (off_t)uio->uio_offset - offset;
		bp = getcacheblk(vp, base_offset, BSIZE, 0);
		if (bp == NULL) {
			lwkt_gettoken(&vp->v_mount->mnt_token);
			error = bread(vp, base_offset, BSIZE, &bp);
			if (error) {
				brelse(bp);
				lwkt_reltoken(&vp->v_mount->mnt_token);
				kprintf("tmpfs_read bread error %d\n", error);
				break;
			}
			lwkt_reltoken(&vp->v_mount->mnt_token);
		}

		/*
		 * Figure out how many bytes we can actually copy this loop.
		 */
		len = BSIZE - offset;
		if (len > uio->uio_resid)
			len = uio->uio_resid;
		if (len > node->tn_size - uio->uio_offset)
			len = (size_t)(node->tn_size - uio->uio_offset);

		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
		bqrelse(bp);
		if (error) {
			kprintf("tmpfs_read uiomove error %d\n", error);
			break;
		}
	}

	TMPFS_NODE_LOCK(node);
	node->tn_status |= TMPFS_NODE_ACCESSED;
	TMPFS_NODE_UNLOCK(node);

	return(error);
}
Example #5
0
/*
 * Balloc defines the structure of filesystem storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 */
int
ext2_balloc(struct inode *ip, e2fs_lbn_t lbn, int size, struct ucred *cred,
    struct buf **bpp, int flags)
{
	struct m_ext2fs *fs;
	struct ext2mount *ump;
	struct buf *bp, *nbp;
	struct vnode *vp = ITOV(ip);
	struct indir indirs[NIADDR + 2];
	e4fs_daddr_t nb, newb;
	e2fs_daddr_t *bap, pref;
	int osize, nsize, num, i, error;

	*bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	fs = ip->i_e2fs;
	ump = ip->i_ump;

	/*
	 * check if this is a sequential block allocation. 
	 * If so, increment next_alloc fields to allow ext2_blkpref 
	 * to make a good guess
	 */
	if (lbn == ip->i_next_alloc_block + 1) {
		ip->i_next_alloc_block++;
		ip->i_next_alloc_goal++;
	}

	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		/* no new block is to be allocated, and no need to expand
		   the file */
		if (nb != 0 && ip->i_size >= (lbn + 1) * fs->e2fs_bsize) {
			error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lbn, osize, NOCRED, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
			} else {
			/* Godmar thinks: this shouldn't happen w/o fragments */
				printf("nsize %d(%d) > osize %d(%d) nb %d\n", 
					(int)nsize, (int)size, (int)osize, 
					(int)ip->i_size, (int)nb);
				panic(
				    "ext2_balloc: Something is terribly wrong");
/*
 * please note there haven't been any changes from here on -
 * FFS seems to work.
 */
			}
		} else {
			if (ip->i_size < (lbn + 1) * fs->e2fs_bsize)
				nsize = fragroundup(fs, size);
			else
				nsize = fs->e2fs_bsize;
			EXT2_LOCK(ump);
			error = ext2_alloc(ip, lbn,
			    ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0),
			    nsize, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lbn, nsize, 0, 0, 0);
			bp->b_blkno = fsbtodb(fs, newb);
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
		}
		ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ext2_getlbns(vp, lbn, indirs, &num)) != 0)
		return (error);
#ifdef INVARIANTS
	if (num < 1)
		panic ("ext2_balloc: ext2_getlbns returned indirect block");
#endif
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	if (nb == 0) {
		EXT2_LOCK(ump);
		pref = ext2_blkpref(ip, lbn, indirs[0].in_off + 
					     EXT2_NDIR_BLOCKS, &ip->i_db[0], 0);
		if ((error = ext2_alloc(ip, lbn, pref, fs->e2fs_bsize, cred,
			&newb)))
			return (error);
		nb = newb;
		bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0, 0);
		bp->b_blkno = fsbtodb(fs, newb);
		vfs_bio_clrbuf(bp);
		/*
		 * Write synchronously so that indirect blocks
		 * never point at garbage.
		 */
		if ((error = bwrite(bp)) != 0) {
			ext2_blkfree(ip, nb, fs->e2fs_bsize);
			return (error);
		}
		ip->i_ib[indirs[0].in_off] = newb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp,
		    indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp);
		if (error) {
			brelse(bp);
			return (error);
		}
		bap = (e2fs_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		EXT2_LOCK(ump);
		if (pref == 0)
			pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap,
						bp->b_lblkno);
		error =  ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb);
		if (error) {
			brelse(bp);
			return (error);
		}
		nb = newb;
		nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(nbp);
		/*
		 * Write synchronously so that indirect blocks
		 * never point at garbage.
		 */
		if ((error = bwrite(nbp)) != 0) {
			ext2_blkfree(ip, nb, fs->e2fs_bsize);
			EXT2_UNLOCK(ump);
			brelse(bp);
			return (error);
		}
		bap[indirs[i - 1].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->e2fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		EXT2_LOCK(ump);
		pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], 
				bp->b_lblkno);
		if ((error = ext2_alloc(ip,
		    lbn, pref, (int)fs->e2fs_bsize, cred, &newb)) != 0) {
			brelse(bp);
			return (error);
		}
		nb = newb;
		nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		if (flags & BA_CLRBUF)
			vfs_bio_clrbuf(nbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
		if (bp->b_bufsize == fs->e2fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		*bpp = nbp;
		return (0);
	}
	brelse(bp);
	if (flags & BA_CLRBUF) {
		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, ip->i_size, lbn,
			    (int)fs->e2fs_bsize, NOCRED,
			    MAXBSIZE, seqcount, 0, &nbp);
		} else {
			error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp);
		}
		if (error) {
			brelse(nbp);
			return (error);
		}
	} else {
Example #6
0
static int
bmap_truncate_indirect(struct nandfs_node *node, int level, nandfs_lbn_t *left,
    int *cleaned, struct nandfs_indir *ap, struct nandfs_indir *fp,
    nandfs_daddr_t *copy)
{
	struct buf *bp;
	nandfs_lbn_t i, lbn, nlbn, factor, tosub;
	struct nandfs_device *fsdev;
	int error, lcleaned, modified;

	DPRINTF(BMAP, ("%s: node %p level %d left %jx\n", __func__,
	    node, level, *left));

	fsdev = node->nn_nandfsdev;

	MPASS(ap->in_off >= 0 && ap->in_off < MNINDIR(fsdev));

	factor = lbn_offset(fsdev, level);
	lbn = ap->in_lbn;

	error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
	if (error) {
		brelse(bp);
		return (error);
	}

	bcopy(bp->b_data, copy, fsdev->nd_blocksize);
	bqrelse(bp);

	modified = 0;

	i = ap->in_off;

	if (ap != fp)
		ap++;
	for (nlbn = lbn + 1 - i * factor; i >= 0 && *left > 0; i--,
	    nlbn += factor) {
		lcleaned = 0;

		DPRINTF(BMAP,
		    ("%s: node %p i=%jx nlbn=%jx left=%jx ap=%p vblk %jx\n",
		    __func__, node, i, nlbn, *left, ap, copy[i]));

		if (copy[i] == 0) {
			tosub = blocks_inside(fsdev, level - 1, ap);
			if (tosub > *left)
				tosub = 0;

			*left -= tosub;
		} else {
			if (level > SINGLE) {
				if (ap == fp)
					ap->in_lbn = nlbn;

				error = bmap_truncate_indirect(node, level - 1,
				    left, &lcleaned, ap, fp,
				    copy + MNINDIR(fsdev));
				if (error)
					return (error);
			} else {
				error = nandfs_bdestroy(node, copy[i]);
				if (error)
					return (error);
				lcleaned = 1;
				*left -= 1;
			}
		}

		if (lcleaned) {
			if (level > SINGLE) {
				error = nandfs_vblock_end(fsdev, copy[i]);
				if (error)
					return (error);
			}
			copy[i] = 0;
			modified++;
		}

		ap = fp;
	}

	if (i == -1)
		*cleaned = 1;

	error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp);
	if (error) {
		brelse(bp);
		return (error);
	}
	if (modified)
		bcopy(copy, bp->b_data, fsdev->nd_blocksize);

	error = nandfs_dirty_buf_meta(bp, 0);
	if (error)
		return (error);

	return (error);
}
Example #7
0
/*
 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
 * in from disk.  If it is in core, wait for the lock bit to clear, then
 * return the inode locked.  Detection and handling of mount points must be
 * done by the calling routine.
 */
static int
ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
{
	struct m_ext2fs *fs;
	struct inode *ip;
	struct ext2mount *ump;
	struct buf *bp;
	struct vnode *vp;
	struct cdev *dev;
	struct thread *td;
	int i, error;
	int used_blocks;

	td = curthread;
	error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
	if (error || *vpp != NULL)
		return (error);

	ump = VFSTOEXT2(mp);
	dev = ump->um_dev;

	/*
	 * If this malloc() is performed after the getnewvnode()
	 * it might block, leaving a vnode with a NULL v_data to be
	 * found by ext2_sync() if a sync happens to fire right then,
	 * which will cause a panic because ext2_sync() blindly
	 * dereferences vp->v_data (as well it should).
	 */
	ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);

	/* Allocate a new vnode/inode. */
	if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) {
		*vpp = NULL;
		free(ip, M_EXT2NODE);
		return (error);
	}
	vp->v_data = ip;
	ip->i_vnode = vp;
	ip->i_e2fs = fs = ump->um_e2fs;
	ip->i_ump  = ump;
	ip->i_number = ino;

	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
	error = insmntque(vp, mp);
	if (error != 0) {
		free(ip, M_EXT2NODE);
		*vpp = NULL;
		return (error);
	}
	error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
	if (error || *vpp != NULL)
		return (error);

	/* Read in the disk contents for the inode, copy into the inode. */
	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
	    (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
		/*
		 * The inode does not contain anything useful, so it would
		 * be misleading to leave it on its hash chain. With mode
		 * still zero, it will be unlinked and returned to the free
		 * list by vput().
		 */
		brelse(bp);
		vput(vp);
		*vpp = NULL;
		return (error);
	}
	/* convert ext2 inode to dinode */
	ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) *
			ino_to_fsbo(fs, ino)), ip);
	ip->i_block_group = ino_to_cg(fs, ino);
	ip->i_next_alloc_block = 0;
	ip->i_next_alloc_goal = 0;

	/*
	 * Now we want to make sure that block pointers for unused
	 * blocks are zeroed out - ext2_balloc depends on this
	 * although for regular files and directories only
	 */
	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
		used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize;
		for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
			ip->i_db[i] = 0;
	}
/*
	ext2_print_inode(ip);
*/
	bqrelse(bp);

	/*
	 * Initialize the vnode from the inode, check for aliases.
	 * Note that the underlying vnode may have changed.
	 */
	if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) {
		vput(vp);
		*vpp = NULL;
		return (error);
	}

	/*
	 * Finish inode initialization.
	 */

	/*
	 * Set up a generation number for this inode if it does not
	 * already have one. This should only happen on old filesystems.
	 */
	if (ip->i_gen == 0) {
		ip->i_gen = random() / 2 + 1;
		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
			ip->i_flag |= IN_MODIFIED;
	}
	*vpp = vp;
	return (0);
}
int
xfs_read_file(xfs_mount_t *mp, xfs_inode_t *ip, struct uio *uio, int ioflag)
{
	xfs_fileoff_t lbn, nextlbn;
	xfs_fsize_t bytesinfile;
	long size, xfersize, blkoffset;
	struct buf *bp;
	struct vnode *vp;
	int error, orig_resid;
	int seqcount;

	seqcount = ioflag >> IO_SEQSHIFT;

	orig_resid = uio->uio_resid;
	if (orig_resid <= 0)
		return (0);

	vp = XFS_ITOV(ip)->v_vnode;

	/*
	 * Ok so we couldn't do it all in one vm trick...
	 * so cycle around trying smaller bites..
	 */
	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ip->i_d.di_size - uio->uio_offset) <= 0)
			break;

		lbn = XFS_B_TO_FSBT(mp, uio->uio_offset);
		nextlbn = lbn + 1;

		/*
		 * size of buffer.  The buffer representing the
		 * end of the file is rounded up to the size of
		 * the block type ( fragment or full block,
		 * depending ).
		 */
		size = mp->m_sb.sb_blocksize;
		blkoffset = XFS_B_FSB_OFFSET(mp, uio->uio_offset);

		/*
		 * The amount we want to transfer in this iteration is
		 * one FS block less the amount of the data before
		 * our startpoint (duh!)
		 */
		xfersize = mp->m_sb.sb_blocksize - blkoffset;

		/*
		 * But if we actually want less than the block,
		 * or the file doesn't have a whole block more of data,
		 * then use the lesser number.
		 */
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (XFS_FSB_TO_B(mp, nextlbn) >= ip->i_d.di_size ) {
			/*
			 * Don't do readahead if this is the end of the file.
			 */
			error = bread(vp, lbn, size, NOCRED, &bp);
		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			/*
			 * Otherwise if we are allowed to cluster,
			 * grab as much as we can.
			 *
			 * XXX  This may not be a win if we are not
			 * doing sequential access.
			 */
			error = cluster_read(vp, ip->i_d.di_size, lbn,
				size, NOCRED, uio->uio_resid, seqcount, &bp);
		} else if (seqcount > 1) {
			/*
			 * If we are NOT allowed to cluster, then
			 * if we appear to be acting sequentially,
			 * fire off a request for a readahead
			 * as well as a read. Note that the 4th and 5th
			 * arguments point to arrays of the size specified in
			 * the 6th argument.
			 */
			int nextsize = mp->m_sb.sb_blocksize;
			error = breadn(vp, lbn,
			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
		} else {
			/*
			 * Failing all of the above, just read what the
			 * user asked for. Interestingly, the same as
			 * the first option above.
			 */
			error = bread(vp, lbn, size, NOCRED, &bp);
		}
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
		 * will cause us to attempt to release the buffer later on
		 * and will cause the buffer cache to attempt to free the
		 * underlying pages.
		 */
		if (ioflag & IO_DIRECT)
			bp->b_flags |= B_DIRECT;

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
		 */
		size -= bp->b_resid;
		if (size < xfersize) {
			if (size == 0)
				break;
			xfersize = size;
		}

		/*
		 * otherwise use the general form
		 */
		error = uiomove((char *)bp->b_data + blkoffset,
			    (int)xfersize, uio);

		if (error)
			break;

		if (ioflag & (IO_VMIO|IO_DIRECT) ) {
			/*
			 * If there are no dependencies, and it's VMIO,
			 * then we don't need the buf, mark it available
			 * for freeing. The VM has the data.
			 */
			bp->b_flags |= B_RELBUF;
			brelse(bp);
		} else {
			/*
			 * Otherwise let whoever
			 * made the request take care of
			 * freeing it. We just queue
			 * it onto another list.
			 */
			bqrelse(bp);
		}
	}

	/*
	 * This can only happen in the case of an error
	 * because the loop above resets bp to NULL on each iteration
	 * and on normal completion has not set a new value into it.
	 * so it must have come from a 'break' statement
	 */
	if (bp != NULL) {
		if (ioflag & (IO_VMIO|IO_DIRECT)) {
			bp->b_flags |= B_RELBUF;
			brelse(bp);
		} else
			bqrelse(bp);
	}

	return (error);
}
Example #9
0
/*
 * Balloc defines the structure of filesystem storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 * This is the allocation strategy for UFS1. Below is
 * the allocation strategy for UFS2.
 */
int
ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
    struct ucred *cred, int flags, struct buf **bpp)
{
	struct inode *ip;
	struct ufs1_dinode *dp;
	ufs_lbn_t lbn, lastlbn;
	struct fs *fs;
	ufs1_daddr_t nb;
	struct buf *bp, *nbp;
	struct ufsmount *ump;
	struct indir indirs[NIADDR + 2];
	int deallocated, osize, nsize, num, i, error;
	ufs2_daddr_t newb;
	ufs1_daddr_t *bap, pref;
	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
	int unwindidx = -1;
	int saved_inbdflush;
	static struct timeval lastfail;
	static int curfail;
	int reclaimed;

	ip = VTOI(vp);
	dp = ip->i_din1;
	fs = ip->i_fs;
	ump = ip->i_ump;
	lbn = lblkno(fs, startoffset);
	size = blkoff(fs, startoffset) + size;
	reclaimed = 0;
	if (size > fs->fs_bsize)
		panic("ffs_balloc_ufs1: blk too big");
	*bpp = NULL;
	if (flags & IO_EXT)
		return (EOPNOTSUPP);
	if (lbn < 0)
		return (EFBIG);

	if (DOINGSOFTDEP(vp))
		softdep_prealloc(vp, MNT_WAIT);
	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	lastlbn = lblkno(fs, ip->i_size);
	if (lastlbn < NDADDR && lastlbn < lbn) {
		nb = lastlbn;
		osize = blksize(fs, ip, nb);
		if (osize < fs->fs_bsize && osize > 0) {
			UFS_LOCK(ump);
			error = ffs_realloccg(ip, nb, dp->di_db[nb],
			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
			   cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
				    fs->fs_bsize, osize, bp);
			ip->i_size = smalllblktosize(fs, nb + 1);
			dp->di_size = ip->i_size;
			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & IO_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		if (flags & BA_METAONLY)
			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
		nb = dp->di_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lbn, osize, NOCRED, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
			} else {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
				    &dp->di_db[0]), osize, nsize, flags,
				    cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dbtofsb(fs, bp->b_blkno), nb,
					    nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			UFS_LOCK(ump);
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
			    nsize, flags, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lbn, nsize, 0, 0, 0);
			bp->b_blkno = fsbtodb(fs, newb);
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef INVARIANTS
	if (num < 1)
		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
#endif
	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = dp->di_ib[indirs[0].in_off];
	allocib = NULL;
	allocblk = allociblk;
	lbns_remfree = lbns;
	if (nb == 0) {
		UFS_LOCK(ump);
		pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags, cred, &newb)) != 0) {
			curthread_pflags_restore(saved_inbdflush);
			return (error);
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, 0);
		bp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if (DOINGASYNC(vp))
				bdwrite(bp);
			else if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &dp->di_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
retry:
	for (i = 1;;) {
		error = bread(vp,
		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs1_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		UFS_LOCK(ump);
		if (pref == 0)
			pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
			brelse(bp);
			if (++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}
	/*
	 * If asked only for the indirect block, then return it.
	 */
	if (flags & BA_METAONLY) {
		curthread_pflags_restore(saved_inbdflush);
		*bpp = bp;
		return (0);
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		UFS_LOCK(ump);
		pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb);
		if (error) {
			brelse(bp);
			if (++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0);
		nbp->b_blkno = fsbtodb(fs, nb);
		if (flags & BA_CLRBUF)
			vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, nbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		curthread_pflags_restore(saved_inbdflush);
		*bpp = nbp;
		return (0);
	}
	brelse(bp);
	if (flags & BA_CLRBUF) {
		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
		if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, ip->i_size, lbn,
			    (int)fs->fs_bsize, NOCRED,
			    MAXBSIZE, seqcount, &nbp);
		} else {
			error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
		}
		if (error) {
			brelse(nbp);
			goto fail;
		}
	} else {
Example #10
0
/*
 * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it
 * in from disk.  If it is in core, wait for the lock bit to clear, then
 * return the inode locked.  Detection and handling of mount points must be
 * done by the calling routine.
 */
static int
ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
{
	struct m_ext2fs *fs;
	struct inode *ip;
	struct ext2mount *ump;
	struct buf *bp;
	struct vnode *vp;
	struct thread *td;
	int i, error;
	int used_blocks;

	td = curthread;
	error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL);
	if (error || *vpp != NULL)
		return (error);

	ump = VFSTOEXT2(mp);
	ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);

	/* Allocate a new vnode/inode. */
	if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) {
		*vpp = NULL;
		free(ip, M_EXT2NODE);
		return (error);
	}
	vp->v_data = ip;
	ip->i_vnode = vp;
	ip->i_e2fs = fs = ump->um_e2fs;
	ip->i_ump = ump;
	ip->i_number = ino;

	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
	error = insmntque(vp, mp);
	if (error != 0) {
		free(ip, M_EXT2NODE);
		*vpp = NULL;
		return (error);
	}
	error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL);
	if (error || *vpp != NULL)
		return (error);

	/* Read in the disk contents for the inode, copy into the inode. */
	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
	    (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) {
		/*
		 * The inode does not contain anything useful, so it would
		 * be misleading to leave it on its hash chain. With mode
		 * still zero, it will be unlinked and returned to the free
		 * list by vput().
		 */
		brelse(bp);
		vput(vp);
		*vpp = NULL;
		return (error);
	}
	/* convert ext2 inode to dinode */
	ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + EXT2_INODE_SIZE(fs) *
	    ino_to_fsbo(fs, ino)), ip);
	ip->i_block_group = ino_to_cg(fs, ino);
	ip->i_next_alloc_block = 0;
	ip->i_next_alloc_goal = 0;

	/*
	 * Now we want to make sure that block pointers for unused
	 * blocks are zeroed out - ext2_balloc depends on this
	 * although for regular files and directories only
	 *
	 * If IN_E4EXTENTS is enabled, unused blocks are not zeroed
	 * out because we could corrupt the extent tree.
	 */
	if (!(ip->i_flag & IN_E4EXTENTS) &&
	    (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) {
		used_blocks = howmany(ip->i_size, fs->e2fs_bsize);
		for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
			ip->i_db[i] = 0;
	}
#ifdef EXT2FS_DEBUG
	ext2_print_inode(ip);
#endif
	bqrelse(bp);

	/*
	 * Initialize the vnode from the inode, check for aliases.
	 * Note that the underlying vnode may have changed.
	 */
	if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) {
		vput(vp);
		*vpp = NULL;
		return (error);
	}

	/*
	 * Finish inode initialization.
	 */

	*vpp = vp;
	return (0);
}
Example #11
0
/*
 * Balloc defines the structure of file system storage
 * by allocating the physical blocks on a device given
 * the inode and the logical block number in a file.
 * This is the allocation strategy for UFS2. Above is
 * the allocation strategy for UFS1.
 */
int
ffs_balloc_ufs2(vnode *vp, off_t startoffset, int size,
    Ucred *cred, int flags, Buf **bpp)
{
	int error = 0;
	print("HARVEY TODO: %s\n", __func__);
#if 0
	struct inode *ip;
	struct ufs2_dinode *dp;
	ufs_lbn_t lbn, lastlbn;
	struct fs *fs;
	struct buf *bp, *nbp;
	struct ufsmount *ump;
	struct indir indirs[UFS_NIADDR + 2];
	ufs2_daddr_t nb, newb, *bap, pref;
	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
	int deallocated, osize, nsize, num, i, error;
	int unwindidx = -1;
	int saved_inbdflush;
	static struct timeval lastfail;
	static int curfail;
	int gbflags, reclaimed;

	ip = VTOI(vp);
	dp = ip->i_din2;
	fs = ITOFS(ip);
	ump = ITOUMP(ip);
	lbn = lblkno(fs, startoffset);
	size = blkoff(fs, startoffset) + size;
	reclaimed = 0;
	if (size > fs->fs_bsize)
		panic("ffs_balloc_ufs2: blk too big");
	*bpp = nil;
	if (lbn < 0)
		return (EFBIG);
	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;

	if (DOINGSOFTDEP(vp))
		softdep_prealloc(vp, MNT_WAIT);
	
	/*
	 * Check for allocating external data.
	 */
	if (flags & IO_EXT) {
		if (lbn >= UFS_NXADDR)
			return (EFBIG);
		/*
		 * If the next write will extend the data into a new block,
		 * and the data is currently composed of a fragment
		 * this fragment has to be extended to be a full block.
		 */
		lastlbn = lblkno(fs, dp->di_extsize);
		if (lastlbn < lbn) {
			nb = lastlbn;
			osize = sblksize(fs, dp->di_extsize, nb);
			if (osize < fs->fs_bsize && osize > 0) {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, -1 - nb,
				    dp->di_extb[nb],
				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
				    &dp->di_extb[0]), osize,
				    (int)fs->fs_bsize, flags, cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocext(ip, nb,
					    dbtofsb(fs, bp->b_blkno),
					    dp->di_extb[nb],
					    fs->fs_bsize, osize, bp);
				dp->di_extsize = smalllblktosize(fs, nb + 1);
				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
				bp->b_xflags |= BX_ALTDATA;
				ip->i_flag |= IN_CHANGE;
				if (flags & IO_SYNC)
					bwrite(bp);
				else
					bawrite(bp);
			}
		}
		/*
		 * All blocks are direct blocks
		 */
		if (flags & BA_METAONLY)
			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
		nb = dp->di_extb[lbn];
		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
			    gbflags, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			bp->b_xflags |= BX_ALTDATA;
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
				    gbflags, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
				bp->b_xflags |= BX_ALTDATA;
			} else {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, -1 - lbn,
				    dp->di_extb[lbn],
				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
				    &dp->di_extb[0]), osize, nsize, flags,
				    cred, &bp);
				if (error)
					return (error);
				bp->b_xflags |= BX_ALTDATA;
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocext(ip, lbn,
					    dbtofsb(fs, bp->b_blkno), nb,
					    nsize, osize, bp);
			}
		} else {
			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			UFS_LOCK(ump);
			error = ffs_alloc(ip, lbn,
			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
			   nsize, flags, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
			bp->b_blkno = fsbtodb(fs, newb);
			bp->b_xflags |= BX_ALTDATA;
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocext(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE;
		*bpp = bp;
		return (0);
	}
	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	lastlbn = lblkno(fs, ip->i_size);
	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
		nb = lastlbn;
		osize = blksize(fs, ip, nb);
		if (osize < fs->fs_bsize && osize > 0) {
			UFS_LOCK(ump);
			error = ffs_realloccg(ip, nb, dp->di_db[nb],
			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
			    flags, cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dbtofsb(fs, bp->b_blkno),
				    dp->di_db[nb],
				    fs->fs_bsize, osize, bp);
			ip->i_size = smalllblktosize(fs, nb + 1);
			dp->di_size = ip->i_size;
			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & IO_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
		}
	}
	/*
	 * The first UFS_NDADDR blocks are direct blocks
	 */
	if (lbn < UFS_NDADDR) {
		if (flags & BA_METAONLY)
			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
		nb = dp->di_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
			    gbflags, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_blkno = fsbtodb(fs, nb);
			*bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread_gb(vp, lbn, osize, NOCRED,
				    gbflags, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_blkno = fsbtodb(fs, nb);
			} else {
				UFS_LOCK(ump);
				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
				    &dp->di_db[0]), osize, nsize, flags,
				    cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dbtofsb(fs, bp->b_blkno), nb,
					    nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			UFS_LOCK(ump);
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
				&dp->di_db[0]), nsize, flags, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
			bp->b_blkno = fsbtodb(fs, newb);
			if (flags & BA_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef INVARIANTS
	if (num < 1)
		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
#endif
	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
	/*
	 * Fetch the first indirect block allocating if necessary.
	 */
	--num;
	nb = dp->di_ib[indirs[0].in_off];
	allocib = nil;
	allocblk = allociblk;
	lbns_remfree = lbns;
	if (nb == 0) {
		UFS_LOCK(ump);
		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
		    (ufs2_daddr_t *)0);
		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags, cred, &newb)) != 0) {
			curthread_pflags_restore(saved_inbdflush);
			return (error);
		}
		pref = newb + fs->fs_frag;
		nb = newb;
		MPASS(allocblk < allociblk + nitems(allociblk));
		MPASS(lbns_remfree < lbns + nitems(lbns));
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
		    GB_UNMAPPED);
		bp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip,
			    UFS_NDADDR + indirs[0].in_off, newb, 0,
			    fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		} else {
			if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &dp->di_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}
	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
retry:
	for (i = 1;;) {
		error = bread(vp,
		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs2_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		UFS_LOCK(ump);
		/*
		 * If parent indirect has just been allocated, try to cluster
		 * immediately following it.
		 */
		if (pref == 0)
			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
			    (ufs2_daddr_t *)0);
		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
			brelse(bp);
			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		pref = newb + fs->fs_frag;
		nb = newb;
		MPASS(allocblk < allociblk + nitems(allociblk));
		MPASS(lbns_remfree < lbns + nitems(lbns));
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
		    GB_UNMAPPED);
		nbp->b_blkno = fsbtodb(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
			if (nbp->b_bufsize == fs->fs_bsize)
				nbp->b_flags |= B_CLUSTEROK;
			bdwrite(nbp);
		} else {
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == nil && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}
	/*
	 * If asked only for the indirect block, then return it.
	 */
	if (flags & BA_METAONLY) {
		curthread_pflags_restore(saved_inbdflush);
		*bpp = bp;
		return (0);
	}
	/*
	 * Get the data block, allocating if necessary.
	 */
	if (nb == 0) {
		UFS_LOCK(ump);
		/*
		 * If allocating metadata at the front of the cylinder
		 * group and parent indirect block has just been allocated,
		 * then cluster next to it if it is the first indirect in
		 * the file. Otherwise it has been allocated in the metadata
		 * area, so we want to find our own place out in the data area.
		 */
		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
			    &bap[0]);
		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    flags | IO_BUFLOCKED, cred, &newb);
		if (error) {
			brelse(bp);
			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
				UFS_LOCK(ump);
				softdep_request_cleanup(fs, vp, cred,
				    FLUSH_BLOCKS_WAIT);
				UFS_UNLOCK(ump);
				goto retry;
			}
			if (ppsratecheck(&lastfail, &curfail, 1)) {
				ffs_fserr(fs, ip->i_number, "filesystem full");
				uprintf("\n%s: write failed, filesystem "
				    "is full\n", fs->fs_fsmnt);
			}
			goto fail;
		}
		nb = newb;
		MPASS(allocblk < allociblk + nitems(allociblk));
		MPASS(lbns_remfree < lbns + nitems(lbns));
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
		nbp->b_blkno = fsbtodb(fs, nb);
		if (flags & BA_CLRBUF)
			vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, nbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & IO_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		curthread_pflags_restore(saved_inbdflush);
		*bpp = nbp;
		return (0);
	}
	brelse(bp);
	/*
	 * If requested clear invalid portions of the buffer.  If we
	 * have to do a read-before-write (typical if BA_CLRBUF is set),
	 * try to do some read-ahead in the sequential case to reduce
	 * the number of I/O transactions.
	 */
	if (flags & BA_CLRBUF) {
		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
		if (seqcount != 0 &&
		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
			error = cluster_read(vp, ip->i_size, lbn,
			    (int)fs->fs_bsize, NOCRED,
			    MAXBSIZE, seqcount, gbflags, &nbp);
		} else {
			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
			    NOCRED, gbflags, &nbp);
		}
		if (error) {
			brelse(nbp);
			goto fail;
		}
	} else {
Example #12
0
/*
 * this function handles ext4 extents block mapping
 */
static int
ext4_ext_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct inode *ip;
        struct uio *uio;
	struct m_ext2fs *fs;
        struct buf *bp;
        struct ext4_extent nex, *ep;
        struct ext4_extent_header *ehp;
        struct ext4_extent_path path;
        daddr_t lbn, nextlbn, newblk = 0;
        off_t bytesinfile;
        u_short mode;
        int cache_type;
	int orig_resid;
        int error = 0;
        int depth = 0;
        long size, xfersize, blkoffset;

	vp = ap->a_vp;
	ip = VTOI(vp);
        mode = ip->i_mode;
        uio = ap->a_uio;
        memset(&path, 0, sizeof(path));

	orig_resid = uio->uio_resid;
	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
	if (orig_resid == 0)
		return (0);
	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
	fs = ip->I_FS;
	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
		return (EOVERFLOW);

        for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
                if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
                        break;
                lbn = lblkno(fs, uio->uio_offset);
                nextlbn = lbn + 1;
                size = BLKSIZE(fs, ip, lbn);
                blkoffset = blkoff(fs, uio->uio_offset);

                xfersize = fs->e2fs_fsize - blkoffset;
                if (uio->uio_resid < xfersize)
                        xfersize = uio->uio_resid;
                if (bytesinfile < xfersize)
                        xfersize = bytesinfile;

                /* get block from ext4 extent cache */
                cache_type = ext4_ext_in_cache(ip, lbn, &nex);
                if (cache_type != 0) {
                        /* block does not be allocated yet */
                        if (cache_type == EXT4_EXT_CACHE_GAP)
                                return (error);
                        else if (cache_type == EXT4_EXT_CACHE_IN)
                                newblk = lbn - nex.e_blk +
                                    (nex.e_start_lo | ((daddr_t)(nex.e_start_hi) << 31) << 1);
                } else {
                        ext4_ext_find_extent(fs, ip, lbn, &path);
                        depth = ((struct ext4_extent_header *)(ip->i_db))->eh_depth;
                        if (path.ep_ext == NULL && depth != 0)
                                return (EIO);

                        ehp = path.ep_header;
                        ep = path.ep_ext;
                        if (ep == NULL)
                                return (EIO);

                        ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN);

                        newblk = lbn - ep->e_blk +
                            (ep->e_start_lo | ((daddr_t)(ep->e_start_hi) << 31) << 1);

                        if (path.ep_bp != NULL) {
                                brelse(path.ep_bp);
                                path.ep_bp = NULL;
                        }
                }

                error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp);
                if (error) {
                        brelse(bp);
                        bp = NULL;
                        break;
                }

                size -= bp->b_resid;
                if (size < xfersize) {
                        if (size == 0)
                                break;
                        xfersize = size;
                }
                error = uiomove((char *)bp->b_data + blkoffset,
                    (int)xfersize, uio);
                if (error)
                        break;

                bqrelse(bp);
        }

        if (bp != NULL)
                bqrelse(bp);

        return (error);
}
Example #13
0
/*
 * this function handles traditional block mapping
 */
static int
ext2_ind_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct inode *ip;
	struct uio *uio;
	FS *fs;
	struct buf *bp;
	daddr_t lbn, nextlbn;
	off_t bytesinfile;
	long size, xfersize, blkoffset;
	int error, orig_resid, seqcount;
	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
	u_short mode;

	vp = ap->a_vp;
	ip = VTOI(vp);
	mode = ip->i_mode;
	uio = ap->a_uio;

#ifdef DIAGNOSTIC
	if (uio->uio_rw != UIO_READ)
		panic("%s: mode", READ_S);

	if (vp->v_type == VLNK) {
		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
			panic("%s: short symlink", READ_S);
	} else if (vp->v_type != VREG && vp->v_type != VDIR)
		panic("%s: type %d", READ_S, vp->v_type);
#endif
	orig_resid = uio->uio_resid;
	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
	if (orig_resid == 0)
		return (0);
	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
	fs = ip->I_FS;
	if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize)
		return (EOVERFLOW);
	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
			break;
		lbn = lblkno(fs, uio->uio_offset);
		nextlbn = lbn + 1;
		size = BLKSIZE(fs, ip, lbn);
		blkoffset = blkoff(fs, uio->uio_offset);

		xfersize = fs->e2fs_fsize - blkoffset;
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (lblktosize(fs, nextlbn) >= ip->i_size)
			error = bread(vp, lbn, size, NOCRED, &bp);
		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
		error = cluster_read(vp, ip->i_size, lbn, size,
  			NOCRED, blkoffset + uio->uio_resid, seqcount, &bp);
		else if (seqcount > 1) {
			int nextsize = BLKSIZE(fs, ip, nextlbn);
			error = breadn(vp, lbn,
			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
		} else
			error = bread(vp, lbn, size, NOCRED, &bp);
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
		 */
		size -= bp->b_resid;
		if (size < xfersize) {
			if (size == 0)
				break;
			xfersize = size;
		}
		error = uiomove((char *)bp->b_data + blkoffset,
  			(int)xfersize, uio);
		if (error)
			break;

		bqrelse(bp);
	}
	if (bp != NULL)
		bqrelse(bp);
	if ((error == 0 || uio->uio_resid != orig_resid) &&
	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
		ip->i_flag |= IN_ACCESS;
	return (error);
}
Example #14
0
/*
 * Indirect blocks are now on the vnode for the file.  They are given negative
 * logical block numbers.  Indirect blocks are addressed by the negative
 * address of the first data block to which they point.  Double indirect blocks
 * are addressed by one less than the address of the first indirect block to
 * which they point.  Triple indirect blocks are addressed by one less than
 * the address of the first double indirect block to which they point.
 *
 * ext2_bmaparray does the bmap conversion, and if requested returns the
 * array of logical blocks which must be traversed to get to a block.
 * Each entry contains the offset into that block that gets you to the
 * next block and the disk address of the block (if it is assigned).
 */
static
int
ext2_bmaparray(struct vnode *vp, ext2_daddr_t bn, ext2_daddr_t *bnp,
	      struct indir *ap, int *nump, int *runp, int *runb)
{
	struct inode *ip;
	struct buf *bp;
	struct ext2_mount *ump;
	struct mount *mp;
	struct ext2_sb_info *fs;
	struct indir a[NIADDR+1], *xap;
	ext2_daddr_t daddr;
	long metalbn;
	int error, maxrun, num;

	ip = VTOI(vp);
	mp = vp->v_mount;
	ump = VFSTOEXT2(mp);
	fs = ip->i_e2fs;
#ifdef DIAGNOSTIC
	if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL))
		panic("ext2_bmaparray: invalid arguments");
#endif

	if (runp) {
		*runp = 0;
	}

	if (runb) {
		*runb = 0;
	}

	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;

	xap = ap == NULL ? a : ap;
	if (!nump)
		nump = &num;
	error = ext2_getlbns(vp, bn, xap, nump);
	if (error)
		return (error);

	num = *nump;
	if (num == 0) {
		*bnp = blkptrtodb(ump, ip->i_db[bn]);
		if (*bnp == 0)
			*bnp = -1;
		else if (runp) {
			daddr_t bnb = bn;
			for (++bn; bn < NDADDR && *runp < maxrun &&
			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
			    ++bn, ++*runp);
			bn = bnb;
			if (runb && (bn > 0)) {
				for (--bn; (bn >= 0) && (*runb < maxrun) &&
					is_sequential(ump, ip->i_db[bn],
						ip->i_db[bn+1]);
						--bn, ++*runb);
			}
		}
		return (0);
	}


	/* Get disk address out of indirect block array */
	daddr = ip->i_ib[xap->in_off];

	for (bp = NULL, ++xap; --num; ++xap) {
		/*
		 * Exit the loop if there is no disk address assigned yet and
		 * the indirect block isn't in the cache, or if we were
		 * looking for an indirect block and we've found it.
		 */

		metalbn = xap->in_lbn;
		if ((daddr == 0 &&
		     !findblk(vp, dbtodoff(fs, metalbn), FINDBLK_TEST)) ||
		    metalbn == bn) {
			break;
		}
		/*
		 * If we get here, we've either got the block in the cache
		 * or we have a disk address for it, go fetch it.
		 */
		if (bp)
			bqrelse(bp);

		xap->in_exists = 1;
		bp = getblk(vp, lblktodoff(fs, metalbn),
			    mp->mnt_stat.f_iosize, 0, 0);
		if ((bp->b_flags & B_CACHE) == 0) {
#ifdef DIAGNOSTIC
			if (!daddr)
				panic("ext2_bmaparray: indirect block not in cache");
#endif
			/*
			 * This runs through ext2_strategy using bio2 to
			 * cache the disk offset, then comes back through
			 * bio1.  So we want to wait on bio1
			 */
			bp->b_bio1.bio_done = biodone_sync;
			bp->b_bio1.bio_flags |= BIO_SYNC;
			bp->b_bio2.bio_offset = fsbtodoff(fs, daddr);
			bp->b_flags &= ~(B_INVAL|B_ERROR);
			bp->b_cmd = BUF_CMD_READ;
			vfs_busy_pages(bp->b_vp, bp);
			vn_strategy(bp->b_vp, &bp->b_bio1);
			error = biowait(&bp->b_bio1, "biord");
			if (error) {
				brelse(bp);
				return (error);
			}
		}

		daddr = ((ext2_daddr_t *)bp->b_data)[xap->in_off];
		if (num == 1 && daddr && runp) {
			for (bn = xap->in_off + 1;
			    bn < MNINDIR(ump) && *runp < maxrun &&
			    is_sequential(ump,
			    ((ext2_daddr_t *)bp->b_data)[bn - 1],
			    ((ext2_daddr_t *)bp->b_data)[bn]);
			    ++bn, ++*runp);
			bn = xap->in_off;
			if (runb && bn) {
				for(--bn; bn >= 0 && *runb < maxrun &&
					is_sequential(ump, ((daddr_t *)bp->b_data)[bn],
					    ((daddr_t *)bp->b_data)[bn+1]);
					--bn, ++*runb);
			}
		}
	}
	if (bp)
		bqrelse(bp);

	daddr = blkptrtodb(ump, daddr);
	*bnp = daddr == 0 ? -1 : daddr;
	return (0);
}
Example #15
0
/*
 * Vnode op for reading.
 */
static int
ext2_read(struct vop_read_args *ap)
{
	struct vnode *vp;
	struct inode *ip;
	struct uio *uio;
	struct m_ext2fs *fs;
	struct buf *bp;
	daddr_t lbn, nextlbn;
	off_t bytesinfile;
	long size, xfersize, blkoffset;
	int error, orig_resid, seqcount;
	int ioflag;

	vp = ap->a_vp;
	uio = ap->a_uio;
	ioflag = ap->a_ioflag;

	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
	ip = VTOI(vp);

#ifdef INVARIANTS
	if (uio->uio_rw != UIO_READ)
		panic("%s: mode", "ext2_read");

	if (vp->v_type == VLNK) {
		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
			panic("%s: short symlink", "ext2_read");
	} else if (vp->v_type != VREG && vp->v_type != VDIR)
		panic("%s: type %d", "ext2_read", vp->v_type);
#endif
	orig_resid = uio->uio_resid;
	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
	if (orig_resid == 0)
		return (0);
	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
	fs = ip->i_e2fs;
	if (uio->uio_offset < ip->i_size &&
	    uio->uio_offset >= fs->e2fs_maxfilesize)
	    	return (EOVERFLOW);

	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
			break;
		lbn = lblkno(fs, uio->uio_offset);
		nextlbn = lbn + 1;
		size = blksize(fs, ip, lbn);
		blkoffset = blkoff(fs, uio->uio_offset);

		xfersize = fs->e2fs_fsize - blkoffset;
		if (uio->uio_resid < xfersize)
			xfersize = uio->uio_resid;
		if (bytesinfile < xfersize)
			xfersize = bytesinfile;

		if (lblktosize(fs, nextlbn) >= ip->i_size)
			error = bread(vp, lbn, size, NOCRED, &bp);
		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
			error = cluster_read(vp, ip->i_size, lbn, size,
			    NOCRED, blkoffset + uio->uio_resid, seqcount,
			    0, &bp);
		} else if (seqcount > 1) {
			int nextsize = blksize(fs, ip, nextlbn);
			error = breadn(vp, lbn,
			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
		} else
			error = bread(vp, lbn, size, NOCRED, &bp);
		if (error) {
			brelse(bp);
			bp = NULL;
			break;
		}

		/*
		 * If IO_DIRECT then set B_DIRECT for the buffer.  This
		 * will cause us to attempt to release the buffer later on
		 * and will cause the buffer cache to attempt to free the
		 * underlying pages.
		 */
		if (ioflag & IO_DIRECT)
			bp->b_flags |= B_DIRECT;

		/*
		 * We should only get non-zero b_resid when an I/O error
		 * has occurred, which should cause us to break above.
		 * However, if the short read did not cause an error,
		 * then we want to ensure that we do not uiomove bad
		 * or uninitialized data.
		 */
		size -= bp->b_resid;
		if (size < xfersize) {
			if (size == 0)
				break;
			xfersize = size;
		}
		error = uiomove((char *)bp->b_data + blkoffset,
  			(int)xfersize, uio);
		if (error)
			break;

		if (ioflag & (IO_VMIO|IO_DIRECT)) {
			/*
			 * If it's VMIO or direct I/O, then we don't
			 * need the buf, mark it available for
			 * freeing. If it's non-direct VMIO, the VM has
			 * the data.
			 */
			bp->b_flags |= B_RELBUF;
			brelse(bp);
		} else {
			/*
			 * Otherwise let whoever
			 * made the request take care of
			 * freeing it. We just queue
			 * it onto another list.
			 */
			bqrelse(bp);
		}
	}

	/* 
	 * This can only happen in the case of an error
	 * because the loop above resets bp to NULL on each iteration
	 * and on normal completion has not set a new value into it.
	 * so it must have come from a 'break' statement
	 */
	if (bp != NULL) {
		if (ioflag & (IO_VMIO|IO_DIRECT)) {
			bp->b_flags |= B_RELBUF;
			brelse(bp);
		} else {
			bqrelse(bp);
		}
	}

	if ((error == 0 || uio->uio_resid != orig_resid) &&
	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
		ip->i_flag |= IN_ACCESS;
	return (error);
}
Example #16
0
/*
 * ffs_balloc(struct vnode *a_vp, ufs_daddr_t a_lbn, int a_size,
 *	      struct ucred *a_cred, int a_flags, struct buf *a_bpp)
 *
 * Balloc defines the structure of filesystem storage by allocating
 * the physical blocks on a device given the inode and the logical
 * block number in a file.
 *
 * NOTE: B_CLRBUF - this flag tells balloc to clear invalid portions
 *	 of the buffer.  However, any dirty bits will override missing
 *	 valid bits.  This case occurs when writable mmaps are truncated
 *	 and then extended.
 */
int
ffs_balloc(struct vop_balloc_args *ap)
{
	struct inode *ip;
	ufs_daddr_t lbn;
	int size;
	struct ucred *cred;
	int flags;
	struct fs *fs;
	ufs_daddr_t nb;
	struct buf *bp, *nbp, *dbp;
	struct vnode *vp;
	struct indir indirs[NIADDR + 2];
	ufs_daddr_t newb, *bap, pref;
	int deallocated, osize, nsize, num, i, error;
	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
	ufs_daddr_t *lbns_remfree, lbns[NIADDR + 1];
	int unwindidx;
	int seqcount;

	vp = ap->a_vp;
	ip = VTOI(vp);
	fs = ip->i_fs;
	lbn = lblkno(fs, ap->a_startoffset);
	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
	if (size > fs->fs_bsize)
		panic("ffs_balloc: blk too big");
	*ap->a_bpp = NULL;
	if (lbn < 0)
		return (EFBIG);
	cred = ap->a_cred;
	flags = ap->a_flags;

	/*
	 * The vnode must be locked for us to be able to safely mess
	 * around with the inode.
	 */
	if (vn_islocked(vp) != LK_EXCLUSIVE) {
		panic("ffs_balloc: vnode %p not exclusively locked!", vp);
	}

	/*
	 * If the next write will extend the file into a new block,
	 * and the file is currently composed of a fragment
	 * this fragment has to be extended to be a full block.
	 */
	nb = lblkno(fs, ip->i_size);
	if (nb < NDADDR && nb < lbn) {
		/*
		 * The filesize prior to this write can fit in direct
		 * blocks (ex. fragmentation is possibly done)
		 * we are now extending the file write beyond
		 * the block which has end of the file prior to this write.
		 */
		osize = blksize(fs, ip, nb);
		/*
		 * osize gives disk allocated size in the last block. It is
		 * either in fragments or a file system block size.
		 */
		if (osize < fs->fs_bsize && osize > 0) {
			/* A few fragments are already allocated, since the
			 * current extends beyond this block allocated the
			 * complete block as fragments are on in last block.
			 */
			error = ffs_realloccg(ip, nb,
				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
				osize, (int)fs->fs_bsize, cred, &bp);
			if (error)
				return (error);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, nb,
				    dofftofsb(fs, bp->b_bio2.bio_offset), 
				    ip->i_db[nb], fs->fs_bsize, osize, bp);
			/* adjust the inode size, we just grew */
			ip->i_size = smalllblktosize(fs, nb + 1);
			ip->i_db[nb] = dofftofsb(fs, bp->b_bio2.bio_offset);
			ip->i_flag |= IN_CHANGE | IN_UPDATE;
			if (flags & B_SYNC)
				bwrite(bp);
			else
				bawrite(bp);
			/* bp is already released here */
		}
	}
	/*
	 * The first NDADDR blocks are direct blocks
	 */
	if (lbn < NDADDR) {
		nb = ip->i_db[lbn];
		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
			error = bread(vp, lblktodoff(fs, lbn), fs->fs_bsize, &bp);
			if (error) {
				brelse(bp);
				return (error);
			}
			bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
			*ap->a_bpp = bp;
			return (0);
		}
		if (nb != 0) {
			/*
			 * Consider need to reallocate a fragment.
			 */
			osize = fragroundup(fs, blkoff(fs, ip->i_size));
			nsize = fragroundup(fs, size);
			if (nsize <= osize) {
				error = bread(vp, lblktodoff(fs, lbn), 
					      osize, &bp);
				if (error) {
					brelse(bp);
					return (error);
				}
				bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
			} else {
				/*
				 * NOTE: ffs_realloccg() issues a bread().
				 */
				error = ffs_realloccg(ip, lbn,
				    ffs_blkpref(ip, lbn, (int)lbn,
					&ip->i_db[0]), osize, nsize, cred, &bp);
				if (error)
					return (error);
				if (DOINGSOFTDEP(vp))
					softdep_setup_allocdirect(ip, lbn,
					    dofftofsb(fs, bp->b_bio2.bio_offset),
					    nb, nsize, osize, bp);
			}
		} else {
			if (ip->i_size < smalllblktosize(fs, lbn + 1))
				nsize = fragroundup(fs, size);
			else
				nsize = fs->fs_bsize;
			error = ffs_alloc(ip, lbn,
			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
			    nsize, cred, &newb);
			if (error)
				return (error);
			bp = getblk(vp, lblktodoff(fs, lbn), nsize, 0, 0);
			bp->b_bio2.bio_offset = fsbtodoff(fs, newb);
			if (flags & B_CLRBUF)
				vfs_bio_clrbuf(bp);
			if (DOINGSOFTDEP(vp))
				softdep_setup_allocdirect(ip, lbn, newb, 0,
				    nsize, 0, bp);
		}
		ip->i_db[lbn] = dofftofsb(fs, bp->b_bio2.bio_offset);
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
		*ap->a_bpp = bp;
		return (0);
	}
	/*
	 * Determine the number of levels of indirection.
	 */
	pref = 0;
	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
		return(error);
#ifdef DIAGNOSTIC
	if (num < 1)
		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
#endif
	/*
	 * Get a handle on the data block buffer before working through 
	 * indirect blocks to avoid a deadlock between the VM system holding
	 * a locked VM page and issuing a BMAP (which tries to lock the
	 * indirect blocks), and the filesystem holding a locked indirect
	 * block and then trying to read a data block (which tries to lock
	 * the underlying VM pages).
	 */
	dbp = getblk(vp, lblktodoff(fs, lbn), fs->fs_bsize, 0, 0);

	/*
	 * Setup undo history
	 */
	allocib = NULL;
	allocblk = allociblk;
	lbns_remfree = lbns;

	unwindidx = -1;

	/*
	 * Fetch the first indirect block directly from the inode, allocating
	 * one if necessary. 
	 */
	--num;
	nb = ip->i_ib[indirs[0].in_off];
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, 0, NULL);
		/*
		 * If the filesystem has run out of space we can skip the
		 * full fsync/undo of the main [fail] case since no undo
		 * history has been built yet.  Hence the goto fail2.
		 */
	        if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
		    cred, &newb)) != 0)
			goto fail2;
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[1].in_lbn;
		bp = getblk(vp, lblktodoff(fs, indirs[1].in_lbn),
			    fs->fs_bsize, 0, 0);
		bp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		vfs_bio_clrbuf(bp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
			    newb, 0, fs->fs_bsize, 0, bp);
			bdwrite(bp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if (DOINGASYNC(vp))
				bdwrite(bp);
			else if ((error = bwrite(bp)) != 0)
				goto fail;
		}
		allocib = &ip->i_ib[indirs[0].in_off];
		*allocib = nb;
		ip->i_flag |= IN_CHANGE | IN_UPDATE;
	}

	/*
	 * Fetch through the indirect blocks, allocating as necessary.
	 */
	for (i = 1;;) {
		error = bread(vp, lblktodoff(fs, indirs[i].in_lbn), (int)fs->fs_bsize, &bp);
		if (error) {
			brelse(bp);
			goto fail;
		}
		bap = (ufs_daddr_t *)bp->b_data;
		nb = bap[indirs[i].in_off];
		if (i == num)
			break;
		i += 1;
		if (nb != 0) {
			bqrelse(bp);
			continue;
		}
		if (pref == 0)
			pref = ffs_blkpref(ip, lbn, 0, NULL);
		if ((error =
		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = indirs[i].in_lbn;
		nbp = getblk(vp, lblktodoff(fs, indirs[i].in_lbn),
			     fs->fs_bsize, 0, 0);
		nbp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		vfs_bio_clrbuf(nbp);
		if (DOINGSOFTDEP(vp)) {
			softdep_setup_allocindir_meta(nbp, ip, bp,
			    indirs[i - 1].in_off, nb);
			bdwrite(nbp);
		} else {
			/*
			 * Write synchronously so that indirect blocks
			 * never point at garbage.
			 */
			if ((error = bwrite(nbp)) != 0) {
				brelse(bp);
				goto fail;
			}
		}
		bap[indirs[i - 1].in_off] = nb;
		if (allocib == NULL && unwindidx < 0)
			unwindidx = i - 1;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
	}

	/*
	 * Get the data block, allocating if necessary.  We have already
	 * called getblk() on the data block buffer, dbp.  If we have to
	 * allocate it and B_CLRBUF has been set the inference is an intention
	 * to zero out the related disk blocks, so we do not have to issue
	 * a read.  Instead we simply call vfs_bio_clrbuf().  If B_CLRBUF is
	 * not set the caller intends to overwrite the entire contents of the
	 * buffer and we don't waste time trying to clean up the contents.
	 *
	 * bp references the current indirect block.  When allocating, 
	 * the block must be updated.
	 */
	if (nb == 0) {
		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
		error = ffs_alloc(ip,
		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
		if (error) {
			brelse(bp);
			goto fail;
		}
		nb = newb;
		*allocblk++ = nb;
		*lbns_remfree++ = lbn;
		dbp->b_bio2.bio_offset = fsbtodoff(fs, nb);
		if (flags & B_CLRBUF)
			vfs_bio_clrbuf(dbp);
		if (DOINGSOFTDEP(vp))
			softdep_setup_allocindir_page(ip, lbn, bp,
			    indirs[i].in_off, nb, 0, dbp);
		bap[indirs[i].in_off] = nb;
		/*
		 * If required, write synchronously, otherwise use
		 * delayed write.
		 */
		if (flags & B_SYNC) {
			bwrite(bp);
		} else {
			if (bp->b_bufsize == fs->fs_bsize)
				bp->b_flags |= B_CLUSTEROK;
			bdwrite(bp);
		}
		*ap->a_bpp = dbp;
		return (0);
	}
	brelse(bp);

	/*
	 * At this point all related indirect blocks have been allocated
	 * if necessary and released.  bp is no longer valid.  dbp holds
	 * our getblk()'d data block.
	 *
	 * XXX we previously performed a cluster_read operation here.
	 */
	if (flags & B_CLRBUF) {
		/*
		 * If B_CLRBUF is set we must validate the invalid portions
		 * of the buffer.  This typically requires a read-before-
		 * write.  The strategy call will fill in bio_offset in that
		 * case.
		 *
		 * If we hit this case we do a cluster read if possible
		 * since nearby data blocks are likely to be accessed soon
		 * too.
		 */
		if ((dbp->b_flags & B_CACHE) == 0) {
			bqrelse(dbp);
			seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT;
			if (seqcount &&
			    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
				error = cluster_read(vp, (off_t)ip->i_size,
					    lblktodoff(fs, lbn),
					    (int)fs->fs_bsize, 
					    fs->fs_bsize,
					    seqcount * BKVASIZE,
					    &dbp);
			} else {
				error = bread(vp, lblktodoff(fs, lbn),
					      (int)fs->fs_bsize, &dbp);
			}
			if (error)
				goto fail;
		} else {
Example #17
0
static int
tmpfs_read (struct vop_read_args *ap)
{
	struct buf *bp;
	struct vnode *vp = ap->a_vp;
	struct uio *uio = ap->a_uio;
	struct tmpfs_node *node;
	off_t base_offset;
	size_t offset;
	size_t len;
	size_t resid;
	int error;

	/*
	 * Check the basics
	 */
	if (uio->uio_offset < 0)
		return (EINVAL);
	if (vp->v_type != VREG)
		return (EINVAL);

	/*
	 * Extract node, try to shortcut the operation through
	 * the VM page cache, allowing us to avoid buffer cache
	 * overheads.
	 */
	node = VP_TO_TMPFS_NODE(vp);
        resid = uio->uio_resid;
        error = vop_helper_read_shortcut(ap);
        if (error)
                return error;
        if (uio->uio_resid == 0) {
		if (resid)
			goto finished;
		return error;
	}

	/*
	 * Fall-through to our normal read code.
	 */
	while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
		/*
		 * Use buffer cache I/O (via tmpfs_strategy)
		 */
		offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
		base_offset = (off_t)uio->uio_offset - offset;
		bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, 0);
		if (bp == NULL) {
			error = bread(vp, base_offset, TMPFS_BLKSIZE, &bp);
			if (error) {
				brelse(bp);
				kprintf("tmpfs_read bread error %d\n", error);
				break;
			}

			/*
			 * tmpfs pretty much fiddles directly with the VM
			 * system, don't let it exhaust it or we won't play
			 * nice with other processes.
			 *
			 * Only do this if the VOP is coming from a normal
			 * read/write.  The VM system handles the case for
			 * UIO_NOCOPY.
			 */
			if (uio->uio_segflg != UIO_NOCOPY)
				vm_wait_nominal();
		}
		bp->b_flags |= B_CLUSTEROK;

		/*
		 * Figure out how many bytes we can actually copy this loop.
		 */
		len = TMPFS_BLKSIZE - offset;
		if (len > uio->uio_resid)
			len = uio->uio_resid;
		if (len > node->tn_size - uio->uio_offset)
			len = (size_t)(node->tn_size - uio->uio_offset);

		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
		bqrelse(bp);
		if (error) {
			kprintf("tmpfs_read uiomove error %d\n", error);
			break;
		}
	}

finished:
	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
		TMPFS_NODE_LOCK(node);
		node->tn_status |= TMPFS_NODE_ACCESSED;
		TMPFS_NODE_UNLOCK(node);
	}
	return (error);
}
Example #18
0
static __inline int
_vnode_validate(hammer_dedup_cache_t dcp, void *data, int *errorp)
{
	struct hammer_transaction trans;
	hammer_inode_t ip;
	struct vnode *vp;
	struct buf *bp;
	off_t dooffset;
	int result, error;

	result = error = 0;
	*errorp = 0;

	hammer_simple_transaction(&trans, dcp->hmp);

	ip = hammer_get_inode(&trans, NULL, dcp->obj_id, HAMMER_MAX_TID,
	    dcp->localization, 0, &error);
	if (ip == NULL) {
		hkprintf("dedup: unable to find objid %016jx:%08x\n",
		    (intmax_t)dcp->obj_id, dcp->localization);
		*errorp = 1;
		goto failed2;
	}

	error = hammer_get_vnode(ip, &vp);
	if (error) {
		hkprintf("dedup: unable to acquire vnode for %016jx:%08x\n",
		    (intmax_t)dcp->obj_id, dcp->localization);
		*errorp = 2;
		goto failed;
	}

	if ((bp = findblk(ip->vp, dcp->file_offset, FINDBLK_NBLOCK)) != NULL) {
		bremfree(bp);

		/* XXX if (mapped to userspace) goto done, *errorp = 4 */

		if ((bp->b_flags & B_CACHE) == 0 || bp->b_flags & B_DIRTY) {
			*errorp = 5;
			goto done;
		}

		if (bp->b_bio2.bio_offset != dcp->data_offset) {
			error = VOP_BMAP(ip->vp, dcp->file_offset, &dooffset,
			    NULL, NULL, BUF_CMD_READ);
			if (error) {
				*errorp = 6;
				goto done;
			}

			if (dooffset != dcp->data_offset) {
				*errorp = 7;
				goto done;
			}
			hammer_live_dedup_bmap_saves++;
		}

		if (bcmp(data, bp->b_data, dcp->bytes) == 0)
			result = 1;

done:
		bqrelse(bp);
	} else {
		*errorp = 3;
	}
	vput(vp);

failed:
	hammer_rel_inode(ip, 0);
failed2:
	hammer_done_transaction(&trans);
	return (result);
}