Example #1
0
static
int
nvtruncbuf_bp_trunc(struct buf *bp, void *data)
{
	struct truncbuf_info *info = data;

	/*
	 * Do not try to use a buffer we cannot immediately lock,
	 * but sleep anyway to prevent a livelock.  The code will
	 * loop until all buffers can be acted upon.
	 */
	if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
		atomic_add_int(&bp->b_refs, 1);
		if (BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL) == 0)
			BUF_UNLOCK(bp);
		atomic_subtract_int(&bp->b_refs, 1);
	} else if ((info->clean && (bp->b_flags & B_DELWRI)) ||
		   (info->clean == 0 && (bp->b_flags & B_DELWRI) == 0) ||
		   bp->b_vp != info->vp ||
		   nvtruncbuf_bp_trunc_cmp(bp, data)) {
		BUF_UNLOCK(bp);
	} else {
		bremfree(bp);
		bp->b_flags |= (B_INVAL | B_RELBUF | B_NOCACHE);
		brelse(bp);
	}
	lwkt_yield();
	return(1);
}
Example #2
0
/*
 * Assign a buffer for the given block.
 *
 * The block is selected from the buffer list with LRU
 * algorithm.  If the appropriate block already exists in the
 * block list, return it.  Otherwise, the least recently used
 * block is used.
 */
struct buf *
getblk(dev_t dev, int blkno)
{
    struct buf *bp;

    DPRINTF(VFSDB_BIO, ("getblk: dev=%llx blkno=%d\n", (long long)dev, blkno));
start:
    BIO_LOCK();
    bp = incore(dev, blkno);
    if (bp != NULL) {
        /* Block found in cache. */
        if (ISSET(bp->b_flags, B_BUSY)) {
            /*
             * Wait buffer ready.
             */
            BIO_UNLOCK();
            BUF_LOCK(bp);
            BUF_UNLOCK(bp);
            /* Scan again if it's busy */
            goto start;
        }
        bio_remove(bp);
        SET(bp->b_flags, B_BUSY);
    } else {
        bp = bio_remove_head();
        if (ISSET(bp->b_flags, B_DELWRI)) {
            BIO_UNLOCK();
            bwrite(bp);
            goto start;
        }
        bp->b_flags = B_BUSY;
        bp->b_dev = dev;
        bp->b_blkno = blkno;
    }
    BUF_LOCK(bp);
    BIO_UNLOCK();
    DPRINTF(VFSDB_BIO, ("getblk: done bp=%p\n", bp));
    return bp;
}
Example #3
0
/*
 * Invalidate all buffers.
 * This is called when unmount.
 */
void
bio_sync(void)
{
    struct buf *bp;
    int i;

start:
    BIO_LOCK();
    for (i = 0; i < NBUFS; i++) {
        bp = &buf_table[i];
        if (ISSET(bp->b_flags, B_BUSY)) {
            BIO_UNLOCK();
            BUF_LOCK(bp);
            BUF_UNLOCK(bp);
            goto start;
        }
        if (ISSET(bp->b_flags, B_DELWRI))
            bwrite(bp);
    }
    BIO_UNLOCK();
}
Example #4
0
/*
 * Read data to a buf, including read-ahead if we find this to be beneficial.
 * cluster_read replaces bread.
 */
int
cluster_read(struct vnode *vp, u_quad_t filesize, daddr_t lblkno, long size,
    struct ucred *cred, long totread, int seqcount, int gbflags,
    struct buf **bpp)
{
	struct buf *bp, *rbp, *reqbp;
	struct bufobj *bo;
	daddr_t blkno, origblkno;
	int maxra, racluster;
	int error, ncontig;
	int i;

	error = 0;
	bo = &vp->v_bufobj;
	if (!unmapped_buf_allowed)
		gbflags &= ~GB_UNMAPPED;

	/*
	 * Try to limit the amount of read-ahead by a few
	 * ad-hoc parameters.  This needs work!!!
	 */
	racluster = vp->v_mount->mnt_iosize_max / size;
	maxra = seqcount;
	maxra = min(read_max, maxra);
	maxra = min(nbuf/8, maxra);
	if (((u_quad_t)(lblkno + maxra + 1) * size) > filesize)
		maxra = (filesize / size) - lblkno;

	/*
	 * get the requested block
	 */
	*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0, gbflags);
	if (bp == NULL)
		return (EBUSY);
	origblkno = lblkno;

	/*
	 * if it is in the cache, then check to see if the reads have been
	 * sequential.  If they have, then try some read-ahead, otherwise
	 * back-off on prospective read-aheads.
	 */
	if (bp->b_flags & B_CACHE) {
		if (!seqcount) {
			return 0;
		} else if ((bp->b_flags & B_RAM) == 0) {
			return 0;
		} else {
			bp->b_flags &= ~B_RAM;
			BO_RLOCK(bo);
			for (i = 1; i < maxra; i++) {
				/*
				 * Stop if the buffer does not exist or it
				 * is invalid (about to go away?)
				 */
				rbp = gbincore(&vp->v_bufobj, lblkno+i);
				if (rbp == NULL || (rbp->b_flags & B_INVAL))
					break;

				/*
				 * Set another read-ahead mark so we know 
				 * to check again. (If we can lock the
				 * buffer without waiting)
				 */
				if ((((i % racluster) == (racluster - 1)) ||
				    (i == (maxra - 1))) 
				    && (0 == BUF_LOCK(rbp, 
					LK_EXCLUSIVE | LK_NOWAIT, NULL))) {
					rbp->b_flags |= B_RAM;
					BUF_UNLOCK(rbp);
				}			
			}
			BO_RUNLOCK(bo);
			if (i >= maxra) {
				return 0;
			}
			lblkno += i;
		}
		reqbp = bp = NULL;
	/*
	 * If it isn't in the cache, then get a chunk from
	 * disk if sequential, otherwise just get the block.
	 */
	} else {
		off_t firstread = bp->b_offset;
		int nblks;
		long minread;

		KASSERT(bp->b_offset != NOOFFSET,
		    ("cluster_read: no buffer offset"));

		ncontig = 0;

		/*
		 * Adjust totread if needed
		 */
		minread = read_min * size;
		if (minread > totread)
			totread = minread;

		/*
		 * Compute the total number of blocks that we should read
		 * synchronously.
		 */
		if (firstread + totread > filesize)
			totread = filesize - firstread;
		nblks = howmany(totread, size);
		if (nblks > racluster)
			nblks = racluster;

		/*
		 * Now compute the number of contiguous blocks.
		 */
		if (nblks > 1) {
	    		error = VOP_BMAP(vp, lblkno, NULL,
				&blkno, &ncontig, NULL);
			/*
			 * If this failed to map just do the original block.
			 */
			if (error || blkno == -1)
				ncontig = 0;
		}

		/*
		 * If we have contiguous data available do a cluster
		 * otherwise just read the requested block.
		 */
		if (ncontig) {
			/* Account for our first block. */
			ncontig = min(ncontig + 1, nblks);
			if (ncontig < nblks)
				nblks = ncontig;
			bp = cluster_rbuild(vp, filesize, lblkno,
			    blkno, size, nblks, gbflags, bp);
			lblkno += (bp->b_bufsize / size);
		} else {
			bp->b_flags |= B_RAM;
			bp->b_iocmd = BIO_READ;
			lblkno += 1;
		}
	}

	/*
	 * handle the synchronous read so that it is available ASAP.
	 */
	if (bp) {
		if ((bp->b_flags & B_CLUSTER) == 0) {
			vfs_busy_pages(bp, 0);
		}
		bp->b_flags &= ~B_INVAL;
		bp->b_ioflags &= ~BIO_ERROR;
		if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL)
			BUF_KERNPROC(bp);
		bp->b_iooffset = dbtob(bp->b_blkno);
		bstrategy(bp);
#ifdef RACCT
		if (racct_enable) {
			PROC_LOCK(curproc);
			racct_add_buf(curproc, bp, 0);
			PROC_UNLOCK(curproc);
		}
#endif /* RACCT */
		curthread->td_ru.ru_inblock++;
	}

	/*
	 * If we have been doing sequential I/O, then do some read-ahead.
	 */
	while (lblkno < (origblkno + maxra)) {
		error = VOP_BMAP(vp, lblkno, NULL, &blkno, &ncontig, NULL);
		if (error)
			break;

		if (blkno == -1)
			break;

		/*
		 * We could throttle ncontig here by maxra but we might as
		 * well read the data if it is contiguous.  We're throttled
		 * by racluster anyway.
		 */
		if (ncontig) {
			ncontig = min(ncontig + 1, racluster);
			rbp = cluster_rbuild(vp, filesize, lblkno, blkno,
			    size, ncontig, gbflags, NULL);
			lblkno += (rbp->b_bufsize / size);
			if (rbp->b_flags & B_DELWRI) {
				bqrelse(rbp);
				continue;
			}
		} else {
			rbp = getblk(vp, lblkno, size, 0, 0, gbflags);
			lblkno += 1;
			if (rbp->b_flags & B_DELWRI) {
				bqrelse(rbp);
				continue;
			}
			rbp->b_flags |= B_ASYNC | B_RAM;
			rbp->b_iocmd = BIO_READ;
			rbp->b_blkno = blkno;
		}
		if (rbp->b_flags & B_CACHE) {
			rbp->b_flags &= ~B_ASYNC;
			bqrelse(rbp);
			continue;
		}
		if ((rbp->b_flags & B_CLUSTER) == 0) {
			vfs_busy_pages(rbp, 0);
		}
		rbp->b_flags &= ~B_INVAL;
		rbp->b_ioflags &= ~BIO_ERROR;
		if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL)
			BUF_KERNPROC(rbp);
		rbp->b_iooffset = dbtob(rbp->b_blkno);
		bstrategy(rbp);
#ifdef RACCT
		if (racct_enable) {
			PROC_LOCK(curproc);
			racct_add_buf(curproc, rbp, 0);
			PROC_UNLOCK(curproc);
		}
#endif /* RACCT */
		curthread->td_ru.ru_inblock++;
	}

	if (reqbp) {
		/*
		 * Like bread, always brelse() the buffer when
		 * returning an error.
		 */
		error = bufwait(reqbp);
		if (error != 0) {
			brelse(reqbp);
			*bpp = NULL;
		}
	}
	return (error);
}
Example #5
0
/* Perform I/O on a subdisk */
void
sdio(struct buf *bp)
{
    int s;						    /* spl */
    struct sd *sd;
    struct sdbuf *sbp;
    daddr_t endoffset;
    struct drive *drive;

#if VINUMDEBUG
    if (debug & DEBUG_LASTREQS)
	logrq(loginfo_sdio, (union rqinfou) bp, bp);
#endif
    sd = &SD[Sdno(bp->b_dev)];				    /* point to the subdisk */
    drive = &DRIVE[sd->driveno];

    if (drive->state != drive_up) {
	if (sd->state >= sd_crashed) {
	    if (bp->b_flags & B_READ)			    /* reading, */
		set_sd_state(sd->sdno, sd_crashed, setstate_force);
	    else
		set_sd_state(sd->sdno, sd_stale, setstate_force);
	}
	bp->b_flags |= B_ERROR;
	bp->b_error = EIO;
	biodone(bp);
	return;
    }
    /*
     * We allow access to any kind of subdisk as long as we can expect
     * to get the I/O performed.
     */
    if (sd->state < sd_empty) {				    /* nothing to talk to, */
	bp->b_flags |= B_ERROR;
	bp->b_error = EIO;
	biodone(bp);
	return;
    }
    /* Get a buffer */
    sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
    if (sbp == NULL) {
	bp->b_flags |= B_ERROR;
	bp->b_error = ENOMEM;
	biodone(bp);
	return;
    }
    bzero(sbp, sizeof(struct sdbuf));			    /* start with nothing */
    sbp->b.b_flags = bp->b_flags | B_CALL;		    /* inform us when it's done */
    sbp->b.b_bufsize = bp->b_bufsize;			    /* buffer size */
    sbp->b.b_bcount = bp->b_bcount;			    /* number of bytes to transfer */
    sbp->b.b_resid = bp->b_resid;			    /* and amount waiting */
    sbp->b.b_dev = DRIVE[sd->driveno].dev;		    /* device */
    sbp->b.b_data = bp->b_data;				    /* data buffer */
    sbp->b.b_blkno = bp->b_blkno + sd->driveoffset;
    sbp->b.b_iodone = sdio_done;			    /* come here on completion */
    BUF_LOCKINIT(&sbp->b);				    /* get a lock for the buffer */
    BUF_LOCK(&sbp->b, LK_EXCLUSIVE);			    /* and lock it */
    sbp->bp = bp;					    /* note the address of the original header */
    sbp->sdno = sd->sdno;				    /* note for statistics */
    sbp->driveno = sd->driveno;
    endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE;  /* final sector offset */
    if (endoffset > sd->sectors) {			    /* beyond the end */
	sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
	if (sbp->b.b_bcount <= 0) {			    /* nothing to transfer */
	    bp->b_resid = bp->b_bcount;			    /* nothing transferred */
	    biodone(bp);
	    Free(sbp);
	    return;
	}
    }
#if VINUMDEBUG
    if (debug & DEBUG_ADDRESSES)
	log(LOG_DEBUG,
	    "  %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
	    sbp->b.b_flags & B_READ ? "Read" : "Write",
	    major(sbp->b.b_dev),
	    minor(sbp->b.b_dev),
	    sbp->sdno,
	    (u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
	    (int) sbp->b.b_blkno,
	    sbp->b.b_bcount);
#endif
    s = splbio();
#if VINUMDEBUG
    if (debug & DEBUG_LASTREQS)
	logrq(loginfo_sdiol, (union rqinfou) &sbp->b, &sbp->b);
#endif
    BUF_STRATEGY(&sbp->b, 0);
    splx(s);
}
Example #6
0
/* Fill in the struct buf part of a request element. */
enum requeststatus
build_rq_buffer(struct rqelement *rqe, struct plex *plex)
{
    struct sd *sd;					    /* point to subdisk */
    struct volume *vol;
    struct buf *bp;
    struct buf *ubp;					    /* user (high level) buffer header */

    vol = &VOL[rqe->rqg->rq->volplex.volno];
    sd = &SD[rqe->sdno];				    /* point to subdisk */
    bp = &rqe->b;
    ubp = rqe->rqg->rq->bp;				    /* pointer to user buffer header */

    /* Initialize the buf struct */
    /* copy these flags from user bp */
    bp->b_flags = ubp->b_flags & (B_ORDERED | B_NOCACHE | B_READ | B_ASYNC);
    bp->b_flags |= B_CALL;				    /* inform us when it's done */
    BUF_LOCKINIT(bp);					    /* get a lock for the buffer */
    BUF_LOCK(bp, LK_EXCLUSIVE);				    /* and lock it */

    bp->b_iodone = complete_rqe;			    /* by calling us here */
    /*
     * You'd think that we wouldn't need to even
     * build the request buffer for a dead subdisk,
     * but in some cases we need information like
     * the user buffer address.  Err on the side of
     * generosity and supply what we can.  That
     * obviously doesn't include drive information
     * when the drive is dead.
     */
    if ((rqe->flags & XFR_BAD_SUBDISK) == 0) {		    /* subdisk is accessible, */
	bp->b_dev = DRIVE[rqe->driveno].dev;		    /* drive device */
    }
    bp->b_blkno = rqe->sdoffset + sd->driveoffset;	    /* start address */
    bp->b_bcount = rqe->buflen << DEV_BSHIFT;		    /* number of bytes to transfer */
    bp->b_resid = bp->b_bcount;				    /* and it's still all waiting */
    bp->b_bufsize = bp->b_bcount;			    /* and buffer size */
    bp->b_rcred = FSCRED;				    /* we have the file system credentials */
    bp->b_wcred = FSCRED;				    /* we have the file system credentials */

    if (rqe->flags & XFR_MALLOCED) {			    /* this operation requires a malloced buffer */
	bp->b_data = Malloc(bp->b_bcount);		    /* get a buffer to put it in */
	if (bp->b_data == NULL) {			    /* failed */
	    abortrequest(rqe->rqg->rq, ENOMEM);
	    return REQUEST_ENOMEM;			    /* no memory */
	}
    } else
	/*
	 * Point directly to user buffer data.  This means
	 * that we don't need to do anything when we have
	 * finished the transfer
	 */
	bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
    /*
     * On a recovery read, we perform an XOR of
     * all blocks to the user buffer.  To make
     * this work, we first clean out the buffer
     */
    if ((rqe->flags & (XFR_RECOVERY_READ | XFR_BAD_SUBDISK))
	== (XFR_RECOVERY_READ | XFR_BAD_SUBDISK)) {	    /* bad subdisk of a recovery read */
	int length = rqe->grouplen << DEV_BSHIFT;	    /* and count involved */
	char *data = (char *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* destination */

	bzero(data, length);				    /* clean it out */
    }
    return 0;
}
Example #7
0
/*
 * Convert a vnode strategy call into a device strategy call.  Vnode strategy
 * calls are not limited to device DMA limits so we have to deal with the
 * case.
 *
 * spec_strategy(struct vnode *a_vp, struct bio *a_bio)
 */
static int
devfs_spec_strategy(struct vop_strategy_args *ap)
{
	struct bio *bio = ap->a_bio;
	struct buf *bp = bio->bio_buf;
	struct buf *nbp;
	struct vnode *vp;
	struct mount *mp;
	int chunksize;
	int maxiosize;

	if (bp->b_cmd != BUF_CMD_READ && LIST_FIRST(&bp->b_dep) != NULL)
		buf_start(bp);

	/*
	 * Collect statistics on synchronous and asynchronous read
	 * and write counts for disks that have associated filesystems.
	 */
	vp = ap->a_vp;
	KKASSERT(vp->v_rdev != NULL);	/* XXX */
	if (vn_isdisk(vp, NULL) && (mp = vp->v_rdev->si_mountpoint) != NULL) {
		if (bp->b_cmd == BUF_CMD_READ) {
			if (bp->b_flags & BIO_SYNC)
				mp->mnt_stat.f_syncreads++;
			else
				mp->mnt_stat.f_asyncreads++;
		} else {
			if (bp->b_flags & BIO_SYNC)
				mp->mnt_stat.f_syncwrites++;
			else
				mp->mnt_stat.f_asyncwrites++;
		}
	}

        /*
         * Device iosize limitations only apply to read and write.  Shortcut
         * the I/O if it fits.
         */
	if ((maxiosize = vp->v_rdev->si_iosize_max) == 0) {
		devfs_debug(DEVFS_DEBUG_DEBUG,
			    "%s: si_iosize_max not set!\n",
			    dev_dname(vp->v_rdev));
		maxiosize = MAXPHYS;
	}
#if SPEC_CHAIN_DEBUG & 2
	maxiosize = 4096;
#endif
        if (bp->b_bcount <= maxiosize ||
            (bp->b_cmd != BUF_CMD_READ && bp->b_cmd != BUF_CMD_WRITE)) {
                dev_dstrategy_chain(vp->v_rdev, bio);
                return (0);
        }

	/*
	 * Clone the buffer and set up an I/O chain to chunk up the I/O.
	 */
	nbp = kmalloc(sizeof(*bp), M_DEVBUF, M_INTWAIT|M_ZERO);
	initbufbio(nbp);
	buf_dep_init(nbp);
	BUF_LOCK(nbp, LK_EXCLUSIVE);
	BUF_KERNPROC(nbp);
	nbp->b_vp = vp;
	nbp->b_flags = B_PAGING | (bp->b_flags & B_BNOCLIP);
	nbp->b_data = bp->b_data;
	nbp->b_bio1.bio_done = devfs_spec_strategy_done;
	nbp->b_bio1.bio_offset = bio->bio_offset;
	nbp->b_bio1.bio_caller_info1.ptr = bio;

	/*
	 * Start the first transfer
	 */
	if (vn_isdisk(vp, NULL))
		chunksize = vp->v_rdev->si_bsize_phys;
	else
		chunksize = DEV_BSIZE;
	chunksize = maxiosize / chunksize * chunksize;
#if SPEC_CHAIN_DEBUG & 1
	devfs_debug(DEVFS_DEBUG_DEBUG,
		    "spec_strategy chained I/O chunksize=%d\n",
		    chunksize);
#endif
	nbp->b_cmd = bp->b_cmd;
	nbp->b_bcount = chunksize;
	nbp->b_bufsize = chunksize;	/* used to detect a short I/O */
	nbp->b_bio1.bio_caller_info2.index = chunksize;

#if SPEC_CHAIN_DEBUG & 1
	devfs_debug(DEVFS_DEBUG_DEBUG,
		    "spec_strategy: chain %p offset %d/%d bcount %d\n",
		    bp, 0, bp->b_bcount, nbp->b_bcount);
#endif

	dev_dstrategy(vp->v_rdev, &nbp->b_bio1);

	if (DEVFS_NODE(vp)) {
		nanotime(&DEVFS_NODE(vp)->atime);
		nanotime(&DEVFS_NODE(vp)->mtime);
	}

	return (0);
}