예제 #1
0
/*
 * Common code for reading a buffer with various options
 *
 * Read in (if necessary) the block and return a buffer pointer.
 */
struct buf *
bread_common(void *arg, dev_t dev, daddr_t blkno, long bsize)
{
	struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg;
	struct buf *bp;
	klwp_t *lwp = ttolwp(curthread);

	CPU_STATS_ADD_K(sys, lread, 1);
	bp = getblk_common(ufsvfsp, dev, blkno, bsize, /* errflg */ 1);
	if (bp->b_flags & B_DONE)
		return (bp);
	bp->b_flags |= B_READ;
	ASSERT(bp->b_bcount == bsize);
	if (ufsvfsp == NULL) {					/* !ufs */
		(void) bdev_strategy(bp);
	} else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) {
							/* ufs && logging */
		(*bio_lufs_strategy)(ufsvfsp->vfs_log, bp);
	} else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) {
							/* ufs && snapshots */
		(*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp);
	} else {
		ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
		ub.ub_breads.value.ul++;		/* ufs && !logging */
		(void) bdev_strategy(bp);
	}
	if (lwp != NULL)
		lwp->lwp_ru.inblock++;
	CPU_STATS_ADD_K(sys, bread, 1);
	(void) biowait(bp);
	return (bp);
}
예제 #2
0
파일: fbio.c 프로젝트: JackieXie168/mac-zfs
/*
 * Perform a synchronous indirect write of the given block number
 * on the given device, using the given fbuf.  Upon return the fbp
 * is invalid.
 */
int
fbiwrite(struct fbuf *fbp, vnode_t *devvp, daddr_t bn, int bsize)
{
	struct buf *bp;
	int error, fberror;

	/*
	 * Allocate a temp bp using pageio_setup, but then use it
	 * for physio to the area mapped by fbuf which is currently
	 * all locked down in place.
	 *
	 * XXX - need to have a generalized bp header facility
	 * which we build up pageio_setup on top of.  Other places
	 * (like here and in device drivers for the raw I/O case)
	 * could then use these new facilities in a more straight
	 * forward fashion instead of playing all these games.
	 */
	bp = pageio_setup((struct page *)NULL, fbp->fb_count, devvp, B_WRITE);
	bp->b_flags &= ~B_PAGEIO;		/* XXX */
	bp->b_un.b_addr = fbp->fb_addr;

	bp->b_blkno = bn * btod(bsize);
	bp->b_dev = cmpdev(devvp->v_rdev);	/* store in old dev format */
	bp->b_edev = devvp->v_rdev;
	bp->b_proc = NULL;			/* i.e. the kernel */

	(void) bdev_strategy(bp);
	error = biowait(bp);
	pageio_done(bp);

	/*CSTYLED*/
	FBCOMMON(fbp, S_OTHER, 0, fberror = )

	return (error ? error : fberror);
}
예제 #3
0
/*
 * Common code for writing a buffer with various options.
 *
 * force_wait  - wait for write completion regardless of B_ASYNC flag
 * do_relse    - release the buffer when we are done
 * clear_flags - flags to clear from the buffer
 */
void
bwrite_common(void *arg, struct buf *bp, int force_wait,
				int do_relse, int clear_flags)
{
	register int do_wait;
	struct ufsvfs *ufsvfsp = (struct ufsvfs *)arg;
	int flag;
	klwp_t *lwp = ttolwp(curthread);
	struct cpu *cpup;

	ASSERT(SEMA_HELD(&bp->b_sem));
	flag = bp->b_flags;
	bp->b_flags &= ~clear_flags;
	if (lwp != NULL)
		lwp->lwp_ru.oublock++;
	CPU_STATS_ENTER_K();
	cpup = CPU;		/* get pointer AFTER preemption is disabled */
	CPU_STATS_ADDQ(cpup, sys, lwrite, 1);
	CPU_STATS_ADDQ(cpup, sys, bwrite, 1);
	do_wait = ((flag & B_ASYNC) == 0 || force_wait);
	if (do_wait == 0)
		CPU_STATS_ADDQ(cpup, sys, bawrite, 1);
	CPU_STATS_EXIT_K();
	if (ufsvfsp == NULL) {
		(void) bdev_strategy(bp);
	} else if (ufsvfsp->vfs_log && bio_lufs_strategy != NULL) {
							/* ufs && logging */
		(*bio_lufs_strategy)(ufsvfsp->vfs_log, bp);
	} else if (ufsvfsp->vfs_snapshot && bio_snapshot_strategy != NULL) {
							/* ufs && snapshots */
		(*bio_snapshot_strategy)(&ufsvfsp->vfs_snapshot, bp);
	} else {
		ub.ub_bwrites.value.ul++;		/* ufs && !logging */
		(void) bdev_strategy(bp);
	}
	if (do_wait) {
		(void) biowait(bp);
		if (do_relse) {
			brelse(bp);
		}
	}
}
예제 #4
0
/*
 * Read in the block, like bread, but also start I/O on the
 * read-ahead block (which is not allocated to the caller).
 */
struct buf *
breada(dev_t dev, daddr_t blkno, daddr_t rablkno, long bsize)
{
	struct buf *bp, *rabp;
	klwp_t *lwp = ttolwp(curthread);

	bp = NULL;
	if (!bio_incore(dev, blkno)) {
		CPU_STATS_ADD_K(sys, lread, 1);
		bp = GETBLK(dev, blkno, bsize);
		if ((bp->b_flags & B_DONE) == 0) {
			bp->b_flags |= B_READ;
			bp->b_bcount = bsize;
			(void) bdev_strategy(bp);
			if (lwp != NULL)
				lwp->lwp_ru.inblock++;
			CPU_STATS_ADD_K(sys, bread, 1);
		}
	}
	if (rablkno && bfreelist.b_bcount > 1 &&
	    !bio_incore(dev, rablkno)) {
		rabp = GETBLK(dev, rablkno, bsize);
		if (rabp->b_flags & B_DONE)
			brelse(rabp);
		else {
			rabp->b_flags |= B_READ|B_ASYNC;
			rabp->b_bcount = bsize;
			(void) bdev_strategy(rabp);
			if (lwp != NULL)
				lwp->lwp_ru.inblock++;
			CPU_STATS_ADD_K(sys, bread, 1);
		}
	}
	if (bp == NULL)
		return (BREAD(dev, blkno, bsize));
	(void) biowait(bp);
	return (bp);
}
예제 #5
0
파일: lufs_top.c 프로젝트: andreiw/polaris
/*
 * Called from roll thread;
 *	buffer set for reading master
 * Returns
 *	0 - success, can continue with next buffer
 *	1 - failure due to logmap deltas being in use
 */
int
top_read_roll(rollbuf_t *rbp, ml_unit_t *ul)
{
	buf_t		*bp	= &rbp->rb_bh;
	offset_t	mof	= ldbtob(bp->b_blkno);

	/*
	 * get a list of deltas
	 */
	if (logmap_list_get_roll(ul->un_logmap, mof, rbp)) {
		/* logmap deltas are in use */
		return (1);
	}

	/*
	 * no deltas were found, nothing to roll
	 */
	if (rbp->rb_age == NULL) {
		bp->b_flags |= B_INVAL;
		return (0);
	}

	/*
	 * If there is one cached roll buffer that cover all the deltas then
	 * we can use that instead of copying to a separate roll buffer.
	 */
	if (rbp->rb_crb) {
		rbp->rb_bh.b_blkno = lbtodb(rbp->rb_crb->c_mof);
		return (0);
	}

	/*
	 * Set up the read.
	 * If no read is needed logmap_setup_read() returns 0.
	 */
	if (logmap_setup_read(rbp->rb_age, rbp)) {
		/*
		 * async read the data from master
		 */
		logstats.ls_rreads.value.ui64++;
		bp->b_bcount = MAPBLOCKSIZE;
		(void) bdev_strategy(bp);
		lwp_stat_update(LWP_STAT_INBLK, 1);
	} else {
		sema_v(&bp->b_io); /* mark read as complete */
	}
	return (0);
}
예제 #6
0
/*ARGSUSED*/
int
ufs_rdwr_data(
	vnode_t		*vnodep,
	u_offset_t	offset,
	size_t		len,
	fdbuffer_t	*fdbp,
	int		flags,
	cred_t		*credp)
{
	struct inode	*ip = VTOI(vnodep);
	struct fs	*fs;
	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
	struct buf	*bp;
	krw_t		rwtype = RW_READER;
	u_offset_t	offset1 = offset;	/* Initial offset */
	size_t		iolen;
	int		curlen = 0;
	int		pplen;
	daddr_t		bn;
	int		contig = 0;
	int		error = 0;
	int		nbytes;			/* Number bytes this IO */
	int		offsetn;		/* Start point this IO */
	int		iswrite = flags & B_WRITE;
	int		io_started = 0;		/* No IO started */
	struct ulockfs	*ulp;
	uint_t		protp = PROT_ALL;

	error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, !iswrite,
	    &protp);
	if (error) {
		if (flags & B_ASYNC) {
			fdb_ioerrdone(fdbp, error);
		}
		return (error);
	}
	fs = ufsvfsp->vfs_fs;
	iolen = len;

	DEBUGF((CE_CONT, "?ufs_rdwr: %s vp: %p pages:%p  off %llx len %lx"
	    " isize: %llx fdb: %p\n",
	    flags & B_READ ? "READ" : "WRITE", (void *)vnodep,
	    (void *)vnodep->v_pages, offset1, iolen, ip->i_size, (void *)fdbp));

	rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
	rw_enter(&ip->i_contents, rwtype);

	ASSERT(offset1 < ip->i_size);

	if ((offset1 + iolen) > ip->i_size) {
		iolen = ip->i_size - offset1;
	}
	while (!error && curlen < iolen) {

		contig = 0;

		if ((error = bmap_read(ip, offset1, &bn, &contig)) != 0) {
			break;
		}
		ASSERT(!(bn == UFS_HOLE && iswrite));
		if (bn == UFS_HOLE) {
			/*
			 * If the above assertion is true,
			 * then the following if statement can never be true.
			 */
			if (iswrite && (rwtype == RW_READER)) {
				rwtype = RW_WRITER;
				if (!rw_tryupgrade(&ip->i_contents)) {
					rw_exit(&ip->i_contents);
					rw_enter(&ip->i_contents, rwtype);
					continue;
				}
			}
			offsetn = blkoff(fs, offset1);
			pplen = P2ROUNDUP(len, PAGESIZE);
			nbytes = MIN((pplen - curlen),
			    (fs->fs_bsize - offsetn));
			ASSERT(nbytes > 0);

			/*
			 * We may be reading or writing.
			 */
			DEBUGF((CE_CONT, "?ufs_rdwr_data: hole %llx - %lx\n",
			    offset1, (iolen - curlen)));

			if (iswrite) {
				printf("**WARNING: ignoring hole in write\n");
				error = ENOSPC;
			} else {
				fdb_add_hole(fdbp, offset1 - offset, nbytes);
			}
			offset1 += nbytes;
			curlen += nbytes;
			continue;

		}
		ASSERT(contig > 0);
		pplen = P2ROUNDUP(len, PAGESIZE);

		contig = MIN(contig, len - curlen);
		contig = P2ROUNDUP(contig, DEV_BSIZE);

		bp = fdb_iosetup(fdbp, offset1 - offset, contig, vnodep, flags);

		bp->b_edev = ip->i_dev;
		bp->b_dev = cmpdev(ip->i_dev);
		bp->b_blkno = bn;
		bp->b_file = ip->i_vnode;
		bp->b_offset = (offset_t)offset1;

		if (ufsvfsp->vfs_snapshot) {
			fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
		} else {
			(void) bdev_strategy(bp);
		}
		io_started = 1;

		offset1 += contig;
		curlen += contig;
		if (iswrite)
			lwp_stat_update(LWP_STAT_OUBLK, 1);
		else
			lwp_stat_update(LWP_STAT_INBLK, 1);

		if ((flags & B_ASYNC) == 0) {
			error = biowait(bp);
			fdb_iodone(bp);
		}

		DEBUGF((CE_CONT, "?loop ufs_rdwr_data.. off %llx len %lx\n",
		    offset1, (iolen - curlen)));
	}

	DEBUGF((CE_CONT, "?ufs_rdwr_data: off %llx len %lx pages: %p ------\n",
	    offset1, (iolen - curlen), (void *)vnodep->v_pages));

	rw_exit(&ip->i_contents);
	rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);

	if (flags & B_ASYNC) {
		/*
		 * Show that no more asynchronous IO will be added
		 */
		fdb_ioerrdone(fdbp, error);
	}
	if (ulp) {
		ufs_lockfs_end(ulp);
	}
	if (io_started && flags & B_ASYNC) {
		return (0);
	} else {
		return (error);
	}
}
예제 #7
0
/*
 * ufs_alloc_data - supports allocating space and reads or writes
 * that involve changes to file length or space allocation.
 *
 * This function is more expensive, because of the UFS log transaction,
 * so ufs_rdwr_data() should be used when space or file length changes
 * will not occur.
 *
 * Inputs:
 * fdb - A null pointer instructs this function to only allocate
 *	space for the specified offset and length.
 *	An actual fdbuffer instructs this function to perform
 *	the read or write operation.
 * flags - defaults (zero value) to synchronous write
 *	B_READ - indicates read operation
 *	B_ASYNC - indicates perform operation asynchronously
 */
int
ufs_alloc_data(
	vnode_t		*vnodep,
	u_offset_t	offset,
	size_t		*len,
	fdbuffer_t	*fdbp,
	int		flags,
	cred_t		*credp)
{
	struct inode	*ip = VTOI(vnodep);
	size_t		done_len, io_len;
	int		contig;
	u_offset_t	uoff, io_off;
	int		error = 0;		/* No error occured */
	int		offsetn;		/* Start point this IO */
	int		nbytes;			/* Number bytes in this IO */
	daddr_t		bn;
	struct fs	*fs;
	struct ufsvfs	*ufsvfsp = ip->i_ufsvfs;
	int		i_size_changed = 0;
	u_offset_t	old_i_size;
	struct ulockfs	*ulp;
	int		trans_size;
	int		issync;			/* UFS Log transaction */
						/* synchronous when non-zero */

	int		io_started = 0;		/* No IO started */
	uint_t		protp = PROT_ALL;

	ASSERT((flags & B_WRITE) == 0);

	/*
	 * Obey the lockfs protocol
	 */
	error = ufs_lockfs_begin_getpage(ufsvfsp, &ulp, segkmap, 0, &protp);
	if (error) {
		if ((fdbp != NULL) && (flags & B_ASYNC)) {
			fdb_ioerrdone(fdbp, error);
		}
		return (error);
	}
	if (ulp) {
		/*
		 * Try to begin a UFS log transaction
		 */
		trans_size = TOP_GETPAGE_SIZE(ip);
		TRANS_TRY_BEGIN_CSYNC(ufsvfsp, issync, TOP_GETPAGE,
		    trans_size, error);
		if (error == EWOULDBLOCK) {
			ufs_lockfs_end(ulp);
			if ((fdbp != NULL) && (flags & B_ASYNC)) {
				fdb_ioerrdone(fdbp, EDEADLK);
			}
			return (EDEADLK);
		}
	}

	uoff = offset;
	io_off = offset;
	io_len = *len;
	done_len = 0;

	DEBUGF((CE_CONT, "?ufs_alloc: off %llx len %lx size %llx fdb: %p\n",
	    uoff, (io_len - done_len), ip->i_size, (void *)fdbp));

	rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
	rw_enter(&ip->i_contents, RW_WRITER);

	ASSERT((ip->i_mode & IFMT) == IFREG);

	fs = ip->i_fs;

	while (error == 0 && done_len < io_len) {
		uoff = (u_offset_t)(io_off + done_len);
		offsetn = (int)blkoff(fs, uoff);
		nbytes = (int)MIN(fs->fs_bsize - offsetn, io_len - done_len);

		DEBUGF((CE_CONT, "?ufs_alloc_data: offset: %llx len %x\n",
		    uoff, nbytes));

		if (uoff + nbytes > ip->i_size) {
			/*
			 * We are extending the length of the file.
			 * bmap is used so that we are sure that
			 * if we need to allocate new blocks, that it
			 * is done here before we up the file size.
			 */
			DEBUGF((CE_CONT, "?ufs_alloc_data: grow %llx -> %llx\n",
			    ip->i_size, uoff + nbytes));

			error = bmap_write(ip, uoff, (offsetn + nbytes),
			    BI_ALLOC_ONLY, NULL, credp);
			if (ip->i_flag & (ICHG|IUPD))
				ip->i_seq++;
			if (error) {
				DEBUGF((CE_CONT, "?ufs_alloc_data: grow "
				    "failed err: %d\n", error));
				break;
			}
			if (fdbp != NULL) {
				if (uoff >= ip->i_size) {
					/*
					 * Desired offset is past end of bytes
					 * in file, so we have a hole.
					 */
					fdb_add_hole(fdbp, uoff - offset,
					    nbytes);
				} else {
					int contig;
					buf_t *bp;

					error = bmap_read(ip, uoff, &bn,
					    &contig);
					if (error) {
						break;
					}

					contig = ip->i_size - uoff;
					contig = P2ROUNDUP(contig, DEV_BSIZE);

					bp = fdb_iosetup(fdbp, uoff - offset,
					    contig, vnodep, flags);

					bp->b_edev = ip->i_dev;
					bp->b_dev = cmpdev(ip->i_dev);
					bp->b_blkno = bn;
					bp->b_file = ip->i_vnode;
					bp->b_offset = (offset_t)uoff;

					if (ufsvfsp->vfs_snapshot) {
						fssnap_strategy(
						    &ufsvfsp->vfs_snapshot, bp);
					} else {
						(void) bdev_strategy(bp);
					}
					io_started = 1;

					lwp_stat_update(LWP_STAT_OUBLK, 1);

					if ((flags & B_ASYNC) == 0) {
						error = biowait(bp);
						fdb_iodone(bp);
						if (error) {
							break;
						}
					}
					if (contig > (ip->i_size - uoff)) {
						contig -= ip->i_size - uoff;

						fdb_add_hole(fdbp,
						    ip->i_size - offset,
						    contig);
					}
				}
			}

			i_size_changed = 1;
			old_i_size = ip->i_size;
			UFS_SET_ISIZE(uoff + nbytes, ip);
			TRANS_INODE(ip->i_ufsvfs, ip);
			/*
			 * file has grown larger than 2GB. Set flag
			 * in superblock to indicate this, if it
			 * is not already set.
			 */
			if ((ip->i_size > MAXOFF32_T) &&
			    !(fs->fs_flags & FSLARGEFILES)) {
				ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
				mutex_enter(&ufsvfsp->vfs_lock);
				fs->fs_flags |= FSLARGEFILES;
				ufs_sbwrite(ufsvfsp);
				mutex_exit(&ufsvfsp->vfs_lock);
			}
		} else {
			/*
			 * The file length is not being extended.
			 */
			error = bmap_read(ip, uoff, &bn, &contig);
			if (error) {
				DEBUGF((CE_CONT, "?ufs_alloc_data: "
				    "bmap_read err: %d\n", error));
				break;
			}

			if (bn != UFS_HOLE) {
				/*
				 * Did not map a hole in the file
				 */
				int	contig = P2ROUNDUP(nbytes, DEV_BSIZE);
				buf_t	*bp;

				if (fdbp != NULL) {
					bp = fdb_iosetup(fdbp, uoff - offset,
					    contig, vnodep, flags);

					bp->b_edev = ip->i_dev;
					bp->b_dev = cmpdev(ip->i_dev);
					bp->b_blkno = bn;
					bp->b_file = ip->i_vnode;
					bp->b_offset = (offset_t)uoff;

					if (ufsvfsp->vfs_snapshot) {
						fssnap_strategy(
						    &ufsvfsp->vfs_snapshot, bp);
					} else {
						(void) bdev_strategy(bp);
					}
					io_started = 1;

					lwp_stat_update(LWP_STAT_OUBLK, 1);

					if ((flags & B_ASYNC) == 0) {
						error = biowait(bp);
						fdb_iodone(bp);
						if (error) {
							break;
						}
					}
				}
			} else {
				/*
				 * We read a hole in the file.
				 * We have to allocate blocks for the hole.
				 */
				error = bmap_write(ip, uoff, (offsetn + nbytes),
				    BI_ALLOC_ONLY, NULL, credp);
				if (ip->i_flag & (ICHG|IUPD))
					ip->i_seq++;
				if (error) {
					DEBUGF((CE_CONT, "?ufs_alloc_data: fill"
					    " hole failed error: %d\n", error));
					break;
				}
				if (fdbp != NULL) {
					fdb_add_hole(fdbp, uoff - offset,
					    nbytes);
				}
			}
		}
		done_len += nbytes;
	}

	if (error) {
		if (i_size_changed) {
			/*
			 * Allocation of the blocks for the file failed.
			 * So truncate the file size back to its original size.
			 */
			(void) ufs_itrunc(ip, old_i_size, 0, credp);
		}
	}

	DEBUGF((CE_CONT, "?ufs_alloc: uoff %llx len %lx\n",
	    uoff, (io_len - done_len)));

	if ((offset + *len) < (NDADDR * fs->fs_bsize)) {
		*len = (size_t)(roundup(offset + *len, fs->fs_fsize) - offset);
	} else {
		*len = (size_t)(roundup(offset + *len, fs->fs_bsize) - offset);
	}

	/*
	 * Flush cached pages.
	 *
	 * XXX - There should be no pages involved, since the I/O was performed
	 * through the device strategy routine and the page cache was bypassed.
	 * However, testing has demonstrated that this VOP_PUTPAGE is
	 * necessary. Without this, data might not always be read back as it
	 * was written.
	 *
	 */
	(void) VOP_PUTPAGE(vnodep, 0, 0, B_INVAL, credp);

	rw_exit(&ip->i_contents);
	rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);

	if ((fdbp != NULL) && (flags & B_ASYNC)) {
		/*
		 * Show that no more asynchronous IO will be added
		 */
		fdb_ioerrdone(fdbp, error);
	}
	if (ulp) {
		/*
		 * End the UFS Log transaction
		 */
		TRANS_END_CSYNC(ufsvfsp, error, issync, TOP_GETPAGE,
		    trans_size);
		ufs_lockfs_end(ulp);
	}
	if (io_started && (flags & B_ASYNC)) {
		return (0);
	} else {
		return (error);
	}
}
예제 #8
0
/*
 * Read a cluster from the snapshotted block device to the cache.
 */
static int
fss_read_cluster(struct fss_softc *sc, u_int32_t cl)
{
	int error, todo, offset, len;
	daddr_t dblk;
	struct buf *bp, *mbp;
	struct fss_cache *scp, *scl;

	/*
	 * Get a free cache slot.
	 */
	scl = sc->sc_cache+sc->sc_cache_size;

	mutex_enter(&sc->sc_slock);

restart:
	if (isset(sc->sc_copied, cl) || !FSS_ISVALID(sc)) {
		mutex_exit(&sc->sc_slock);
		return 0;
	}

	for (scp = sc->sc_cache; scp < scl; scp++)
		if (scp->fc_cluster == cl) {
			if (scp->fc_type == FSS_CACHE_VALID) {
				mutex_exit(&sc->sc_slock);
				return 0;
			} else if (scp->fc_type == FSS_CACHE_BUSY) {
				cv_wait(&scp->fc_state_cv, &sc->sc_slock);
				goto restart;
			}
		}

	for (scp = sc->sc_cache; scp < scl; scp++)
		if (scp->fc_type == FSS_CACHE_FREE) {
			scp->fc_type = FSS_CACHE_BUSY;
			scp->fc_cluster = cl;
			break;
		}
	if (scp >= scl) {
		cv_wait(&sc->sc_cache_cv, &sc->sc_slock);
		goto restart;
	}

	mutex_exit(&sc->sc_slock);

	/*
	 * Start the read.
	 */
	dblk = btodb(FSS_CLTOB(sc, cl));
	if (cl == sc->sc_clcount-1) {
		todo = sc->sc_clresid;
		memset((char *)scp->fc_data + todo, 0, FSS_CLSIZE(sc) - todo);
	} else
		todo = FSS_CLSIZE(sc);
	offset = 0;
	mbp = getiobuf(NULL, true);
	mbp->b_bufsize = todo;
	mbp->b_data = scp->fc_data;
	mbp->b_resid = mbp->b_bcount = todo;
	mbp->b_flags = B_READ;
	mbp->b_cflags = BC_BUSY;
	mbp->b_dev = sc->sc_bdev;
	while (todo > 0) {
		len = todo;
		if (len > MAXPHYS)
			len = MAXPHYS;
		if (btodb(FSS_CLTOB(sc, cl)) == dblk && len == todo)
			bp = mbp;
		else {
			bp = getiobuf(NULL, true);
			nestiobuf_setup(mbp, bp, offset, len);
		}
		bp->b_lblkno = 0;
		bp->b_blkno = dblk;
		bdev_strategy(bp);
		dblk += btodb(len);
		offset += len;
		todo -= len;
	}
	error = biowait(mbp);
	putiobuf(mbp);

	mutex_enter(&sc->sc_slock);
	scp->fc_type = (error ? FSS_CACHE_FREE : FSS_CACHE_VALID);
	cv_broadcast(&scp->fc_state_cv);
	if (error == 0) {
		setbit(sc->sc_copied, scp->fc_cluster);
		cv_signal(&sc->sc_work_cv);
	}
	mutex_exit(&sc->sc_slock);

	return error;
}
예제 #9
0
/*
 * The kernel thread (one for every active snapshot).
 *
 * After wakeup it cleans the cache and runs the I/O requests.
 */
static void
fss_bs_thread(void *arg)
{
	bool thread_idle, is_valid;
	int error, i, todo, len, crotor, is_read;
	long off;
	char *addr;
	u_int32_t c, cl, ch, *indirp;
	struct buf *bp, *nbp;
	struct fss_softc *sc;
	struct fss_cache *scp, *scl;

	sc = arg;
	scl = sc->sc_cache+sc->sc_cache_size;
	crotor = 0;
	thread_idle = false;

	mutex_enter(&sc->sc_slock);

	for (;;) {
		if (thread_idle)
			cv_wait(&sc->sc_work_cv, &sc->sc_slock);
		thread_idle = true;
		if ((sc->sc_flags & FSS_BS_THREAD) == 0) {
			mutex_exit(&sc->sc_slock);
			kthread_exit(0);
		}

		/*
		 * Process I/O requests (persistent)
		 */

		if (sc->sc_flags & FSS_PERSISTENT) {
			if ((bp = bufq_get(sc->sc_bufq)) == NULL)
				continue;
			is_valid = FSS_ISVALID(sc);
			is_read = (bp->b_flags & B_READ);
			thread_idle = false;
			mutex_exit(&sc->sc_slock);

			if (is_valid) {
				disk_busy(sc->sc_dkdev);
				error = fss_bs_io(sc, FSS_READ, 0,
				    dbtob(bp->b_blkno), bp->b_bcount,
				    bp->b_data);
				disk_unbusy(sc->sc_dkdev,
				    (error ? 0 : bp->b_bcount), is_read);
			} else
				error = ENXIO;

			bp->b_error = error;
			bp->b_resid = (error ? bp->b_bcount : 0);
			biodone(bp);

			mutex_enter(&sc->sc_slock);
			continue;
		}

		/*
		 * Clean the cache
		 */
		for (i = 0; i < sc->sc_cache_size; i++) {
			crotor = (crotor + 1) % sc->sc_cache_size;
			scp = sc->sc_cache + crotor;
			if (scp->fc_type != FSS_CACHE_VALID)
				continue;
			mutex_exit(&sc->sc_slock);

			thread_idle = false;
			indirp = fss_bs_indir(sc, scp->fc_cluster);
			if (indirp != NULL) {
				error = fss_bs_io(sc, FSS_WRITE, sc->sc_clnext,
				    0, FSS_CLSIZE(sc), scp->fc_data);
			} else
				error = EIO;

			mutex_enter(&sc->sc_slock);
			if (error == 0) {
				*indirp = sc->sc_clnext++;
				sc->sc_indir_dirty = 1;
			} else
				fss_error(sc, "write error on backing store");

			scp->fc_type = FSS_CACHE_FREE;
			cv_broadcast(&sc->sc_cache_cv);
			break;
		}

		/*
		 * Process I/O requests
		 */
		if ((bp = bufq_get(sc->sc_bufq)) == NULL)
			continue;
		is_valid = FSS_ISVALID(sc);
		is_read = (bp->b_flags & B_READ);
		thread_idle = false;

		if (!is_valid) {
			mutex_exit(&sc->sc_slock);

			bp->b_error = ENXIO;
			bp->b_resid = bp->b_bcount;
			biodone(bp);

			mutex_enter(&sc->sc_slock);
			continue;
		}

		disk_busy(sc->sc_dkdev);

		/*
		 * First read from the snapshotted block device unless
		 * this request is completely covered by backing store.
		 */

		cl = FSS_BTOCL(sc, dbtob(bp->b_blkno));
		off = FSS_CLOFF(sc, dbtob(bp->b_blkno));
		ch = FSS_BTOCL(sc, dbtob(bp->b_blkno)+bp->b_bcount-1);
		error = 0;
		bp->b_resid = 0;
		bp->b_error = 0;
		for (c = cl; c <= ch; c++) {
			if (isset(sc->sc_copied, c))
				continue;
			mutex_exit(&sc->sc_slock);

			/* Not on backing store, read from device. */
			nbp = getiobuf(NULL, true);
			nbp->b_flags = B_READ;
			nbp->b_resid = nbp->b_bcount = bp->b_bcount;
			nbp->b_bufsize = bp->b_bcount;
			nbp->b_data = bp->b_data;
			nbp->b_blkno = bp->b_blkno;
			nbp->b_lblkno = 0;
			nbp->b_dev = sc->sc_bdev;
			SET(nbp->b_cflags, BC_BUSY);	/* mark buffer busy */

			bdev_strategy(nbp);

			error = biowait(nbp);
			if (error != 0) {
				bp->b_resid = bp->b_bcount;
				bp->b_error = nbp->b_error;
				disk_unbusy(sc->sc_dkdev, 0, is_read);
				biodone(bp);
			}
			putiobuf(nbp);

			mutex_enter(&sc->sc_slock);
			break;
		}
		if (error)
			continue;

		/*
		 * Replace those parts that have been saved to backing store.
		 */

		addr = bp->b_data;
		todo = bp->b_bcount;
		for (c = cl; c <= ch; c++, off = 0, todo -= len, addr += len) {
			len = FSS_CLSIZE(sc)-off;
			if (len > todo)
				len = todo;
			if (isclr(sc->sc_copied, c))
				continue;
			mutex_exit(&sc->sc_slock);

			indirp = fss_bs_indir(sc, c);
			if (indirp == NULL || *indirp == 0) {
				/*
				 * Not on backing store. Either in cache
				 * or hole in the snapshotted block device.
				 */

				mutex_enter(&sc->sc_slock);
				for (scp = sc->sc_cache; scp < scl; scp++)
					if (scp->fc_type == FSS_CACHE_VALID &&
					    scp->fc_cluster == c)
						break;
				if (scp < scl)
					memcpy(addr, (char *)scp->fc_data+off,
					    len);
				else
					memset(addr, 0, len);
				continue;
			}

			/*
			 * Read from backing store.
			 */
			error =
			    fss_bs_io(sc, FSS_READ, *indirp, off, len, addr);

			mutex_enter(&sc->sc_slock);
			if (error) {
				bp->b_resid = bp->b_bcount;
				bp->b_error = error;
				break;
			}
		}
		mutex_exit(&sc->sc_slock);

		disk_unbusy(sc->sc_dkdev, (error ? 0 : bp->b_bcount), is_read);
		biodone(bp);

		mutex_enter(&sc->sc_slock);
	}
}
예제 #10
0
void
lqfs_write_strategy(ml_unit_t *ul, buf_t *bp)
{
	offset_t	mof	= ldbtob(bp->b_blkno);
	off_t		nb	= bp->b_bcount;
	char		*va;
	mapentry_t	*me;
	uchar_t		ord;
	qfsvfs_t	*qfsvfsp = ul->un_qfsvfs;
#ifdef LUFS
#else
	caddr_t		buf;

	va = bp_mapin_common(bp, VM_SLEEP);
	buf = bp->b_un.b_addr;
#endif /* LUFS */

	ASSERT((nb & DEV_BMASK) == 0);
	ul->un_logmap->mtm_ref = 1;

	/*
	 * if there are deltas, move into log
	 */
	ord = lqfs_find_ord(qfsvfsp, bp);
#ifdef LUFS
	me = deltamap_remove(ul->un_deltamap, mof, ord, nb);
	if (me) {
		va = bp_mapin_common(bp, VM_SLEEP);

		ASSERT(((ul->un_debug & MT_WRITE_CHECK) == 0) ||
		    (ul->un_matamap == NULL)||
		    matamap_within(ul->un_matamap, mof, ord, nb));

		/*
		 * move to logmap
		 */
		if (qfs_crb_enable) {
			logmap_add_buf(ul, va, mof, ord, me,
			    bp->b_un.b_addr, nb);
		} else {
			logmap_add(ul, va, mof, ord, me);
		}

		if (ul->un_flags & LDL_ERROR) {
			bp->b_flags |= B_ERROR;
			bp->b_error = EIO;
		}
		biodone(bp);
		return;
	}
#else
	if (buf && qfs_crb_enable) {
		uint32_t	bufsz;
		offset_t	vamof;
		offset_t	hmof;
		uchar_t		vaord;
		uchar_t		hord;
		uint32_t	hnb, nb1;

		bufsz = bp->b_bcount;
		ASSERT((bufsz & DEV_BMASK) == 0);

		vamof = mof;
		vaord = ord;

		/*
		 * Move any deltas to the logmap. Split requests that
		 * straddle MAPBLOCKSIZE hash boundaries (i.e. summary info).
		 */
		for (hmof = vamof - (va - buf), nb1 = nb; bufsz;
		    bufsz -= hnb, hmof += hnb, buf += hnb, nb1 -= hnb) {
			hnb = MAPBLOCKSIZE - (hmof & MAPBLOCKOFF);
			if (hnb > bufsz) {
				hnb = bufsz;
			}
			LQFS_MSG(CE_WARN, "lqfs_write_strategy(): Removing "
			    "deltamap deltas within mof 0x%llx ord %d nb %d\n",
			    MAX(hmof, vamof), vaord, MIN(hnb, nb1));
			me = deltamap_remove(ul->un_deltamap,
			    MAX(hmof, vamof), vaord, MIN(hnb, nb1));
			hord = vaord;
			if (me) {
				logmap_add_buf(ul, va, hmof, hord,
				    me, buf, hnb);

				if (ul->un_flags & LDL_ERROR) {
					bp->b_flags |= B_ERROR;
					bp->b_error = EIO;
				}
				biodone(bp);
				return;
			}
		}
	} else {
		/*
		 * if there are deltas
		 */
		LQFS_MSG(CE_WARN, "lqfs_write_strategy(): Removing "
		    "deltamap deltas within mof 0x%x ord %d nb %d\n",
		    mof, ord, nb);
		me = deltamap_remove(ul->un_deltamap, mof, ord, nb);
		if (me) {
			ASSERT(((ul->un_debug & MT_WRITE_CHECK) == 0) ||
			    (ul->un_matamap == NULL)||
			    matamap_within(ul->un_matamap, mof, ord, nb));

			/*
			 * move to logmap
			 */
			logmap_add(ul, va, mof, ord, me);

			if (ul->un_flags & LDL_ERROR) {
				bp->b_flags |= B_ERROR;
				bp->b_error = EIO;
			}
			biodone(bp);
			return;
		}
	}
#endif /* LUFS */
	if (ul->un_flags & LDL_ERROR) {
		bp->b_flags |= B_ERROR;
		bp->b_error = EIO;
		biodone(bp);
		return;
	}

	/*
	 * Check that we are not updating metadata, or if so then via B_PHYS.
	 */
	ASSERT((ul->un_matamap == NULL) ||
	    !(matamap_overlap(ul->un_matamap, mof, ord, nb) &&
	    ((bp->b_flags & B_PHYS) == 0)));

	LQFS_SET_IOTSTAMP(ul->un_qfsvfs, ddi_get_lbolt());
	logstats.ls_lwrites.value.ui64++;

#ifdef LQFS_TODO_SNAPSHOT
	/* If snapshots are enabled, write through the snapshot driver */
	if (ul->un_qfsvfs->vfs_snapshot) {
		fssnap_strategy(&ul->un_qfsvfs->vfs_snapshot, bp);
	} else {
#endif /* LQFS_TODO_SNAPSHOT */
		if ((bp->b_flags & B_READ) == 0) {
			LQFS_MSG(CE_WARN, "lqfs_write_strategy(): "
			    "bdev_strategy writing mof 0x%x edev %ld "
			    "nb %d\n", bp->b_blkno * 512, bp->b_edev,
			    bp->b_bcount);
		} else {
			LQFS_MSG(CE_WARN, "lqfs_write_strategy(): "
			    "bdev_strategy reading mof 0x%x edev %ld "
			    "nb %d\n", bp->b_blkno * 512, bp->b_edev,
			    bp->b_bcount);
		}
		(void) bdev_strategy(bp);
#ifdef LQFS_TODO_SNAPSHOT
	}
#endif /* LQFS_TODO_SNAPSHOT */

#ifdef LQFS_TODO_STATS
	lwp_stat_update(LWP_STAT_OUBLK, 1);
#endif /* LQFS_TODO_STATS */
}
예제 #11
0
void
lqfs_read_strategy(ml_unit_t *ul, buf_t *bp)
{
	mt_map_t	*logmap	= ul->un_logmap;
	offset_t	mof	= ldbtob(bp->b_blkno);
	off_t		nb	= bp->b_bcount;
	mapentry_t	*age;
	char		*va;
	int		(*saviodone)();
	int		entire_range;
	uchar_t		ord;
	qfsvfs_t	*qfsvfsp = ul->un_qfsvfs;

	/*
	 * get a linked list of overlapping deltas
	 * returns with &mtm->mtm_rwlock held
	 */
	ord = lqfs_find_ord(qfsvfsp, bp);
	entire_range = logmap_list_get(logmap, mof, ord, nb, &age);

	/*
	 * no overlapping deltas were found; read master
	 */
	if (age == NULL) {
		rw_exit(&logmap->mtm_rwlock);
		if (ul->un_flags & LDL_ERROR) {
			bp->b_flags |= B_ERROR;
			bp->b_error = EIO;
			biodone(bp);
		} else {
			LQFS_SET_IOTSTAMP(ul->un_qfsvfs, ddi_get_lbolt());
			logstats.ls_lreads.value.ui64++;
			if ((bp->b_flags & B_READ) == 0) {
				LQFS_MSG(CE_WARN, "lqfs_read_strategy(): "
				    "bdev_strategy writing mof 0x%x "
				    "edev %ld nb %d\n", bp->b_blkno * 512,
				    bp->b_edev, bp->b_bcount);
			} else {
				LQFS_MSG(CE_WARN, "lqfs_read_strategy(): "
				    "bdev_strategy reading mof 0x%x "
				    "edev %ld nb %d\n", bp->b_blkno * 512,
				    bp->b_edev, bp->b_bcount);
			}
			(void) bdev_strategy(bp);
#ifdef LQFS_TODO_STATS
			lwp_stat_update(LWP_STAT_INBLK, 1);
#endif /* LQFS_TODO_STATS */
		}
		return;
	}

	va = bp_mapin_common(bp, VM_SLEEP);
	/*
	 * if necessary, sync read the data from master
	 *	errors are returned in bp
	 */
	if (!entire_range) {
		saviodone = bp->b_iodone;
		bp->b_iodone = trans_not_done;
		logstats.ls_mreads.value.ui64++;
		if ((bp->b_flags & B_READ) == 0) {
			LQFS_MSG(CE_WARN, "lqfs_read_strategy(): "
			    "bdev_strategy writing mof 0x%x edev %ld "
			    "nb %d\n", bp->b_blkno * 512, bp->b_edev,
			    bp->b_bcount);
		} else {
			LQFS_MSG(CE_WARN, "lqfs_read_strategy(): "
			    "bdev_strategy reading mof 0x%x edev %ld "
			    "nb %d\n", bp->b_blkno * 512, bp->b_edev,
			    bp->b_bcount);
		}
		(void) bdev_strategy(bp);
#ifdef LQFS_TODO_STATS
		lwp_stat_update(LWP_STAT_INBLK, 1);
#endif /* LQFS_TODO_STATS */
		if (trans_not_wait(bp)) {
			ldl_seterror(ul, "Error reading master");
		}
		bp->b_iodone = saviodone;
	}

	/*
	 * sync read the data from the log
	 *	errors are returned inline
	 */
	if (ldl_read(ul, va, mof, ord, nb, age)) {
		bp->b_flags |= B_ERROR;
		bp->b_error = EIO;
	}

	/*
	 * unlist the deltas
	 */
	logmap_list_put(logmap, age);

	/*
	 * all done
	 */
	if (ul->un_flags & LDL_ERROR) {
		bp->b_flags |= B_ERROR;
		bp->b_error = EIO;
	}
	biodone(bp);
}