Ejemplo n.º 1
0
/*
 * XXX - vop_strategy must be hand coded because it has no
 * YYY - and it is not coherent with anything
 *
 * vnode in its arguments.
 * This goes away with a merged VM/buffer cache.
 *
 * union_strategy(struct vnode *a_vp, struct bio *a_bio)
 */
static int
union_strategy(struct vop_strategy_args *ap)
{
	struct bio *bio = ap->a_bio;
	struct buf *bp = bio->bio_buf;
	struct vnode *othervp = OTHERVP(ap->a_vp);

#ifdef DIAGNOSTIC
	if (othervp == NULLVP)
		panic("union_strategy: nil vp");
	if (bp->b_cmd != BUF_CMD_READ && (othervp == LOWERVP(ap->a_vp)))
		panic("union_strategy: writing to lowervp");
#endif
	return (vn_strategy(othervp, bio));
}
Ejemplo n.º 2
0
/*
 * Do IO operation, called from dmstrategy routine.
 */
static int
dm_target_linear_strategy(dm_table_entry_t * table_en, struct buf * bp)
{
	dm_target_linear_config_t *tlc;

	tlc = table_en->target_config;

/*	printf("Linear target read function called %" PRIu64 "!!\n",
	tlc->offset);*/

#if 0
	bp->b_blkno += tlc->offset;
#endif
	bp->b_bio1.bio_offset += tlc->offset * DEV_BSIZE;

	vn_strategy(tlc->pdev->pdev_vnode, &bp->b_bio1);

	return 0;

}
Ejemplo n.º 3
0
/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 *
 * In order to be able to swap to a file, the VOP_BMAP operation may not
 * deadlock on memory.  See hpfs_bmap() for details. XXXXXXX (not impl)
 *
 * hpfs_strategy(struct vnode *a_vp, struct bio *a_bio)
 */
int
hpfs_strategy(struct vop_strategy_args *ap)
{
	struct bio *bio = ap->a_bio;
	struct bio *nbio;
	struct buf *bp = bio->bio_buf;
	struct vnode *vp = ap->a_vp;
	struct hpfsnode *hp;
	int error;

	dprintf(("hpfs_strategy(): \n"));

	if (vp->v_type == VBLK || vp->v_type == VCHR)
		panic("hpfs_strategy: spec");

	nbio = push_bio(bio);
	if (nbio->bio_offset == NOOFFSET) {
		error = VOP_BMAP(vp, bio->bio_offset, &nbio->bio_offset,
				 NULL, NULL, bp->b_cmd);
		if (error) {
			kprintf("hpfs_strategy: VOP_BMAP FAILED %d\n", error);
			bp->b_error = error;
			bp->b_flags |= B_ERROR;
			/* I/O was never started on nbio, must biodone(bio) */
			biodone(bio);
			return (error);
		}
		if (nbio->bio_offset == NOOFFSET)
			vfs_bio_clrbuf(bp);
	}
	if (nbio->bio_offset == NOOFFSET) {
		/* I/O was never started on nbio, must biodone(bio) */
		biodone(bio);
		return (0);
	}
        hp = VTOHP(ap->a_vp);
	vn_strategy(hp->h_devvp, nbio);
	return (0);
}
Ejemplo n.º 4
0
/*
 * Release blocks associated with the inode ip and stored in the indirect
 * block bn.  Blocks are free'd in LIFO order up to (but not including)
 * lastbn.  If level is greater than SINGLE, the block is an indirect block
 * and recursive calls to indirtrunc must be used to cleanse other indirect
 * blocks.
 *
 * NB: triple indirect blocks are untested.
 */
static int
ffs_indirtrunc(struct inode *ip, ufs_daddr_t lbn, ufs_daddr_t dbn,
	       ufs_daddr_t lastbn, int level, long *countp)
{
	int i;
	struct buf *bp;
	struct fs *fs = ip->i_fs;
	ufs_daddr_t *bap;
	struct vnode *vp;
	ufs_daddr_t *copy = NULL, nb, nlbn, last;
	long blkcount, factor;
	int nblocks, blocksreleased = 0;
	int error = 0, allerror = 0;

	/*
	 * Calculate index in current block of last
	 * block to be kept.  -1 indicates the entire
	 * block so we need not calculate the index.
	 */
	factor = 1;
	for (i = SINGLE; i < level; i++)
		factor *= NINDIR(fs);
	last = lastbn;
	if (lastbn > 0)
		last /= factor;
	nblocks = btodb(fs->fs_bsize);
	/*
	 * Get buffer of block pointers, zero those entries corresponding
	 * to blocks to be free'd, and update on disk copy first.  Since
	 * double(triple) indirect before single(double) indirect, calls
	 * to bmap on these blocks will fail.  However, we already have
	 * the on disk address, so we have to set the bio_offset field
	 * explicitly instead of letting bread do everything for us.
	 */
	vp = ITOV(ip);
	bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->fs_bsize, 0, 0);
	if ((bp->b_flags & B_CACHE) == 0) {
		bp->b_flags &= ~(B_ERROR|B_INVAL);
		bp->b_cmd = BUF_CMD_READ;
		if (bp->b_bcount > bp->b_bufsize)
			panic("ffs_indirtrunc: bad buffer size");
		/*
		 * BIO is bio2 which chains back to bio1.  We wait
		 * on bio1.
		 */
		bp->b_bio2.bio_offset = dbtodoff(fs, dbn);
		bp->b_bio1.bio_done = biodone_sync;
		bp->b_bio1.bio_flags |= BIO_SYNC;
		vfs_busy_pages(vp, bp);
		/*
		 * Access the block device layer using the device vnode
		 * and the translated block number (bio2) instead of the
		 * file vnode (vp) and logical block number (bio1).
		 *
		 * Even though we are bypassing the vnode layer, we still
		 * want the vnode state to indicate that an I/O on its behalf
		 * is in progress.
		 */
		bio_start_transaction(&bp->b_bio1, &vp->v_track_read);
		vn_strategy(ip->i_devvp, &bp->b_bio2);
		error = biowait(&bp->b_bio1, "biord");
	}
	if (error) {
		brelse(bp);
		*countp = 0;
		return (error);
	}

	bap = (ufs_daddr_t *)bp->b_data;
	if (lastbn != -1) {
		copy = kmalloc(fs->fs_bsize, M_TEMP, M_WAITOK);
		bcopy((caddr_t)bap, (caddr_t)copy, (uint)fs->fs_bsize);
		bzero((caddr_t)&bap[last + 1],
		    (uint)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t));
		if (DOINGASYNC(vp)) {
			bawrite(bp);
		} else {
			error = bwrite(bp);
			if (error)
				allerror = error;
		}
		bap = copy;
	}

	/*
	 * Recursively free totally unused blocks.
	 */
	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
	    i--, nlbn += factor) {
		nb = bap[i];
		if (nb == 0)
			continue;
		if (level > SINGLE) {
			if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
			    (ufs_daddr_t)-1, level - 1, &blkcount)) != 0)
				allerror = error;
			blocksreleased += blkcount;
		}
		ffs_blkfree(ip, nb, fs->fs_bsize);
		blocksreleased += nblocks;
	}

	/*
	 * Recursively free last partial block.
	 */
	if (level > SINGLE && lastbn >= 0) {
		last = lastbn % factor;
		nb = bap[i];
		if (nb != 0) {
			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
			    last, level - 1, &blkcount);
			if (error)
				allerror = error;
			blocksreleased += blkcount;
		}
	}
	if (copy != NULL) {
		kfree(copy, M_TEMP);
	} else {
		bp->b_flags |= B_INVAL | B_NOCACHE;
		brelse(bp);
	}
		
	*countp = blocksreleased;
	return (allerror);
}
Ejemplo n.º 5
0
static int
ffs_rawread_readahead(struct vnode *vp, caddr_t udata, off_t loffset,
		      size_t len, struct buf *bp)
{
	int error;
	int iolen;
	int blockoff;
	int bsize;
	struct vnode *dp;
	int bforwards;
	
	bsize = vp->v_mount->mnt_stat.f_iosize;

	/*
	 * Make sure it fits into the pbuf
	 */
	iolen = (int)(intptr_t)udata & PAGE_MASK;
	if (len + iolen > bp->b_kvasize) {
		len = bp->b_kvasize;
		if (iolen != 0)
			len -= PAGE_SIZE;
	}

	/*
	 * Raw disk address is in bio2, but we wait for it to
	 * chain to bio1.
	 */
	bp->b_flags &= ~B_ERROR;
	bp->b_loffset = loffset;
	bp->b_bio2.bio_offset = NOOFFSET;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;

	blockoff = (loffset % bsize) / DEV_BSIZE;

	error = VOP_BMAP(vp, bp->b_loffset, &bp->b_bio2.bio_offset,
			 &bforwards, NULL, BUF_CMD_READ);
	if (error != 0)
		return error;
	dp = VTOI(vp)->i_devvp;
	if (bp->b_bio2.bio_offset == NOOFFSET) {
		/* 
		 * Fill holes with NULs to preserve semantics 
		 */
		if (len + blockoff * DEV_BSIZE > bsize)
			len = bsize - blockoff * DEV_BSIZE;
		
		if (vmapbuf(bp, udata, len) < 0)
			return EFAULT;
		
		lwkt_user_yield();
		bzero(bp->b_data, bp->b_bcount);

		/* Mark operation completed (similar to bufdone()) */

		bp->b_resid = 0;
		return 0;
	}
	
	if (len + blockoff * DEV_BSIZE > bforwards)
		len = bforwards - blockoff * DEV_BSIZE;
	bp->b_bio2.bio_offset += blockoff * DEV_BSIZE;
	
	if (vmapbuf(bp, udata, len) < 0)
		return EFAULT;
	
	/*
	 * Access the block device layer using the device vnode (dp) and
	 * the translated block number (bio2) instead of the logical block
	 * number (bio1).
	 *
	 * Even though we are bypassing the vnode layer, we still
	 * want the vnode state to indicate that an I/O on its behalf
	 * is in progress.
	 */
	bp->b_cmd = BUF_CMD_READ;
	bio_start_transaction(&bp->b_bio1, &vp->v_track_read);
	vn_strategy(dp, &bp->b_bio2);
	return 0;
}
Ejemplo n.º 6
0
static int
ext2_indirtrunc(struct inode *ip, daddr_t lbn, off_t doffset, daddr_t lastbn,
                int level, long *countp)
{
    int i;
    struct buf *bp;
    struct ext2_sb_info *fs = ip->i_e2fs;
    daddr_t *bap;
    struct vnode *vp;
    daddr_t *copy, nb, nlbn, last;
    long blkcount, factor;
    int nblocks, blocksreleased = 0;
    int error = 0, allerror = 0;

    /*
     * Calculate index in current block of last
     * block to be kept.  -1 indicates the entire
     * block so we need not calculate the index.
     */
    factor = 1;
    for (i = SINGLE; i < level; i++)
        factor *= NINDIR(fs);
    last = lastbn;
    if (lastbn > 0)
        last /= factor;
    nblocks = btodb(fs->s_blocksize);
    /*
     * Get buffer of block pointers, zero those entries corresponding
     * to blocks to be free'd, and update on disk copy first.  Since
     * double(triple) indirect before single(double) indirect, calls
     * to bmap on these blocks will fail.  However, we already have
     * the on disk address, so we have to set the bio_offset field
     * explicitly instead of letting bread do everything for us.
     */
    vp = ITOV(ip);
    bp = getblk(vp, lblktodoff(fs, lbn), (int)fs->s_blocksize, 0, 0);
    if ((bp->b_flags & B_CACHE) == 0) {
        bp->b_flags &= ~(B_ERROR | B_INVAL);
        bp->b_cmd = BUF_CMD_READ;
        if (bp->b_bcount > bp->b_bufsize)
            panic("ext2_indirtrunc: bad buffer size");
        bp->b_bio2.bio_offset = doffset;
        bp->b_bio1.bio_done = biodone_sync;
        bp->b_bio1.bio_flags |= BIO_SYNC;
        vfs_busy_pages(bp->b_vp, bp);
        vn_strategy(vp, &bp->b_bio1);
        error = biowait(&bp->b_bio1, "biord");
    }
    if (error) {
        brelse(bp);
        *countp = 0;
        return (error);
    }

    bap = (daddr_t *)bp->b_data;
    MALLOC(copy, daddr_t *, fs->s_blocksize, M_TEMP, M_WAITOK);
    bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->s_blocksize);
    bzero((caddr_t)&bap[last + 1],
          (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
    if (last == -1)
        bp->b_flags |= B_INVAL;
    error = bwrite(bp);
    if (error)
        allerror = error;
    bap = copy;

    /*
     * Recursively free totally unused blocks.
     */
    for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
            i--, nlbn += factor) {
        nb = bap[i];
        if (nb == 0)
            continue;
        if (level > SINGLE) {
            if ((error = ext2_indirtrunc(ip, nlbn,
                                         fsbtodoff(fs, nb), (daddr_t)-1, level - 1, &blkcount)) != 0)
                allerror = error;
            blocksreleased += blkcount;
        }
        ext2_blkfree(ip, nb, fs->s_blocksize);
        blocksreleased += nblocks;
    }

    /*
     * Recursively free last partial block.
     */
    if (level > SINGLE && lastbn >= 0) {
        last = lastbn % factor;
        nb = bap[i];
        if (nb != 0) {
            error = ext2_indirtrunc(ip, nlbn, fsbtodoff(fs, nb),
                                    last, level - 1, &blkcount);
            if (error)
                allerror = error;
            blocksreleased += blkcount;
        }
    }
    FREE(copy, M_TEMP);
    *countp = blocksreleased;
    return (allerror);
}
Ejemplo n.º 7
0
/*
 * Strategy routine called from dm_strategy.
 */
static int
dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp)
{
	dm_target_stripe_config_t *tsc;
	struct bio *bio = &bp->b_bio1;
	struct buf *nestbuf;
	uint64_t blkno, blkoff;
	uint64_t stripe, blknr;
	uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
	int devnr;

	tsc = table_en->target_config;
	if (tsc == NULL)
		return 0;

	/* calculate extent of request */
	KKASSERT(bp->b_resid % DEV_BSIZE == 0);

	switch(bp->b_cmd) {
	case BUF_CMD_READ:
	case BUF_CMD_WRITE:
	case BUF_CMD_FREEBLKS:
		/*
		 * Loop through to individual operations
		 */
		blkno = bp->b_bio1.bio_offset / DEV_BSIZE;
		blkoff = 0;
		num_blks = bp->b_resid / DEV_BSIZE;
		nestiobuf_init(bio);

		while (num_blks > 0) {
			/* blockno to strip piece nr */
			stripe = blkno / tsc->stripe_chunksize;
			stripe_off = blkno % tsc->stripe_chunksize;

			/* where we are inside the strip */
			devnr = stripe % tsc->stripe_num;
			blknr = stripe / tsc->stripe_num;

			/* how much is left before we hit a boundary */
			stripe_rest = tsc->stripe_chunksize - stripe_off;

			/* issue this piece on stripe `stripe' */
			issue_blks = MIN(stripe_rest, num_blks);
			nestbuf = getpbuf(NULL);
			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;

			nestiobuf_add(bio, nestbuf, blkoff,
					issue_blks * DEV_BSIZE, NULL);

			/* I need number of bytes. */
			nestbuf->b_bio1.bio_offset =
				blknr * tsc->stripe_chunksize + stripe_off;
			nestbuf->b_bio1.bio_offset +=
				tsc->stripe_devs[devnr].offset;
			nestbuf->b_bio1.bio_offset *= DEV_BSIZE;

			vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode,
				    &nestbuf->b_bio1);

			blkno += issue_blks;
			blkoff += issue_blks * DEV_BSIZE;
			num_blks -= issue_blks;
		}
		nestiobuf_start(bio);
		break;
	case BUF_CMD_FLUSH:
		nestiobuf_init(bio);
		for (devnr = 0; devnr < tsc->stripe_num; ++devnr) {
			nestbuf = getpbuf(NULL);
			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;

			nestiobuf_add(bio, nestbuf, 0, 0, NULL);
			nestbuf->b_bio1.bio_offset = 0;
			vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode,
				    &nestbuf->b_bio1);
		}
		nestiobuf_start(bio);
		break;
	default:
		bp->b_flags |= B_ERROR;
		bp->b_error = EIO;
		biodone(bio);
		break;
	}
	return 0;
}
/* Start the second phase of a RAID-4 or RAID-5 group write operation. */
void
complete_raid5_write(struct rqelement *rqe)
{
    int *sdata;						    /* source */
    int *pdata;						    /* and parity block data */
    int length;						    /* and count involved */
    int count;						    /* loop counter */
    int rqno;						    /* request index */
    int rqoffset;					    /* offset of request data from parity data */
    struct bio *ubio;					    /* user buffer header */
    struct request *rq;					    /* pointer to our request */
    struct rqgroup *rqg;				    /* and to the request group */
    struct rqelement *prqe;				    /* point to the parity block */
    struct drive *drive;				    /* drive to access */
    rqg = rqe->rqg;					    /* and to our request group */
    rq = rqg->rq;					    /* point to our request */
    ubio = rq->bio;					    /* user's buffer header */
    prqe = &rqg->rqe[0];				    /* point to the parity block */

    /*
     * If we get to this function, we have normal or
     * degraded writes, or a combination of both.  We do
     * the same thing in each case: we perform an
     * exclusive or to the parity block.  The only
     * difference is the origin of the data and the
     * address range.
     */
    if (rqe->flags & XFR_DEGRADED_WRITE) {		    /* do the degraded write stuff */
	pdata = (int *) (&prqe->b.b_data[(prqe->groupoffset) << DEV_BSHIFT]); /* parity data pointer */
	bzero(pdata, prqe->grouplen << DEV_BSHIFT);	    /* start with nothing in the parity block */

	/* Now get what data we need from each block */
	for (rqno = 1; rqno < rqg->count; rqno++) {	    /* for all the data blocks */
	    rqe = &rqg->rqe[rqno];			    /* this request */
	    sdata = (int *) (&rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]); /* old data */
	    length = rqe->grouplen << (DEV_BSHIFT - 2);	    /* and count involved */

	    /*
	     * Add the data block to the parity block.  Before
	     * we started the request, we zeroed the parity
	     * block, so the result of adding all the other
	     * blocks and the block we want to write will be
	     * the correct parity block.
	     */
	    for (count = 0; count < length; count++)
		pdata[count] ^= sdata[count];
	    if ((rqe->flags & XFR_MALLOCED)		    /* the buffer was malloced, */
	    &&((rqg->flags & XFR_NORMAL_WRITE) == 0)) {	    /* and we have no normal write, */
		Free(rqe->b.b_data);			    /* free it now */
		rqe->flags &= ~XFR_MALLOCED;
	    }
	}
    }
    if (rqg->flags & XFR_NORMAL_WRITE) {		    /* do normal write stuff */
	/* Get what data we need from each block */
	for (rqno = 1; rqno < rqg->count; rqno++) {	    /* for all the data blocks */
	    rqe = &rqg->rqe[rqno];			    /* this request */
	    if ((rqe->flags & (XFR_DATA_BLOCK | XFR_BAD_SUBDISK | XFR_NORMAL_WRITE))
		== (XFR_DATA_BLOCK | XFR_NORMAL_WRITE)) {   /* good data block to write */
		sdata = (int *) &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* old data contents */
		rqoffset = rqe->dataoffset + rqe->sdoffset - prqe->sdoffset; /* corresponding parity block offset */
		pdata = (int *) (&prqe->b.b_data[rqoffset << DEV_BSHIFT]); /* parity data pointer */
		length = rqe->datalen * (DEV_BSIZE / sizeof(int)); /* and number of ints */

		/*
		 * "remove" the old data block
		 * from the parity block
		 */
		if ((pdata < ((int *) prqe->b.b_data))
		    || (&pdata[length] > ((int *) (prqe->b.b_data + prqe->b.b_bcount)))
		    || (sdata < ((int *) rqe->b.b_data))
		    || (&sdata[length] > ((int *) (rqe->b.b_data + rqe->b.b_bcount))))
		    panic("complete_raid5_write: bounds overflow");
		for (count = 0; count < length; count++)
		    pdata[count] ^= sdata[count];

		/* "add" the new data block */
		sdata = (int *) (&ubio->bio_buf->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */
		if ((sdata < ((int *) ubio->bio_buf->b_data))
		    || (&sdata[length] > ((int *) (ubio->bio_buf->b_data + ubio->bio_buf->b_bcount))))
		    panic("complete_raid5_write: bounds overflow");
		for (count = 0; count < length; count++)
		    pdata[count] ^= sdata[count];

		/* Free the malloced buffer */
		if (rqe->flags & XFR_MALLOCED) {	    /* the buffer was malloced, */
		    Free(rqe->b.b_data);		    /* free it */
		    rqe->flags &= ~XFR_MALLOCED;
		} else
		    panic("complete_raid5_write: malloc conflict");

		if ((rqe->b.b_cmd == BUF_CMD_READ)	    /* this was a read */
		&&((rqe->flags & XFR_BAD_SUBDISK) == 0)) {  /* and we can write this block */
		    rqe->b.b_cmd = BUF_CMD_WRITE;   /* we're writing now */
		    rqe->b.b_bio1.bio_done = complete_rqe;	    /* by calling us here */
		    rqe->flags &= ~XFR_PARITYOP;	    /* reset flags that brought us here */
		    rqe->b.b_data = &ubio->bio_buf->b_data[rqe->useroffset << DEV_BSHIFT]; /* point to the user data */
		    rqe->b.b_bcount = rqe->datalen << DEV_BSHIFT; /* length to write */
		    rqe->b.b_resid = rqe->b.b_bcount;	    /* nothing transferred */
		    rqe->b.b_bio1.bio_offset += (off_t)rqe->dataoffset << DEV_BSHIFT;	    /* point to the correct block */
		    drive = &DRIVE[rqe->driveno];	    /* drive to access */
		    rqe->b.b_bio1.bio_driver_info = drive->dev;
		    rqg->active++;			    /* another active request */

							    /* We can't sleep here, so we just increment the counters. */
		    drive->active++;
		    if (drive->active >= drive->maxactive)
			drive->maxactive = drive->active;
		    vinum_conf.active++;
		    if (vinum_conf.active >= vinum_conf.maxactive)
			vinum_conf.maxactive = vinum_conf.active;
#if VINUMDEBUG
		    if (debug & DEBUG_ADDRESSES)
			log(LOG_DEBUG,
			    "  %s dev %s, sd %d, offset 0x%jx, devoffset 0x%jx, length %d\n",
			    (rqe->b.b_cmd == BUF_CMD_READ) ? "Read" : "Write",
			    drive->devicename,
			    rqe->sdno,
			    (uintmax_t)(rqe->b.b_bio1.bio_offset - ((off_t)SD[rqe->sdno].driveoffset << DEV_BSHIFT)),
			    (uintmax_t)rqe->b.b_bio1.bio_offset,
			    rqe->b.b_bcount);
		    if (debug & DEBUG_LASTREQS)
			logrq(loginfo_raid5_data, (union rqinfou) rqe, ubio);
#endif
		    vn_strategy(drive->vp, &rqe->b.b_bio1);
		}
	    }
	}
    }
    /* Finally, write the parity block */
    rqe = &rqg->rqe[0];
    rqe->b.b_cmd = BUF_CMD_WRITE;		    /* we're writing now */
    rqe->b.b_bio1.bio_done = complete_rqe;			    /* by calling us here */
    rqg->flags &= ~XFR_PARITYOP;			    /* reset flags that brought us here */
    rqe->b.b_bcount = rqe->buflen << DEV_BSHIFT;	    /* length to write */
    rqe->b.b_resid = rqe->b.b_bcount;			    /* nothing transferred */
    drive = &DRIVE[rqe->driveno];			    /* drive to access */
    rqe->b.b_bio1.bio_driver_info = drive->dev;
    rqg->active++;					    /* another active request */

    /* We can't sleep here, so we just increment the counters. */
    drive->active++;
    if (drive->active >= drive->maxactive)
	drive->maxactive = drive->active;
    vinum_conf.active++;
    if (vinum_conf.active >= vinum_conf.maxactive)
	vinum_conf.maxactive = vinum_conf.active;

#if VINUMDEBUG
    if (debug & DEBUG_ADDRESSES)
	log(LOG_DEBUG,
	    "  %s dev %s, sd %d, offset 0x%jx, devoffset 0x%jx, length %d\n",
	    (rqe->b.b_cmd == BUF_CMD_READ) ? "Read" : "Write",
	    drive->devicename,
	    rqe->sdno,
	    (uintmax_t)(rqe->b.b_bio1.bio_offset - ((off_t)SD[rqe->sdno].driveoffset << DEV_BSHIFT)),
	    (uintmax_t)rqe->b.b_bio1.bio_offset,
	    rqe->b.b_bcount);
    if (debug & DEBUG_LASTREQS)
	logrq(loginfo_raid5_parity, (union rqinfou) rqe, ubio);
#endif
    vn_strategy(drive->vp, &rqe->b.b_bio1);
}
Ejemplo n.º 9
0
/*
 * Indirect blocks are now on the vnode for the file.  They are given negative
 * logical block numbers.  Indirect blocks are addressed by the negative
 * address of the first data block to which they point.  Double indirect blocks
 * are addressed by one less than the address of the first indirect block to
 * which they point.  Triple indirect blocks are addressed by one less than
 * the address of the first double indirect block to which they point.
 *
 * ext2_bmaparray does the bmap conversion, and if requested returns the
 * array of logical blocks which must be traversed to get to a block.
 * Each entry contains the offset into that block that gets you to the
 * next block and the disk address of the block (if it is assigned).
 */
static
int
ext2_bmaparray(struct vnode *vp, ext2_daddr_t bn, ext2_daddr_t *bnp,
	      struct indir *ap, int *nump, int *runp, int *runb)
{
	struct inode *ip;
	struct buf *bp;
	struct ext2_mount *ump;
	struct mount *mp;
	struct ext2_sb_info *fs;
	struct indir a[NIADDR+1], *xap;
	ext2_daddr_t daddr;
	long metalbn;
	int error, maxrun, num;

	ip = VTOI(vp);
	mp = vp->v_mount;
	ump = VFSTOEXT2(mp);
	fs = ip->i_e2fs;
#ifdef DIAGNOSTIC
	if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL))
		panic("ext2_bmaparray: invalid arguments");
#endif

	if (runp) {
		*runp = 0;
	}

	if (runb) {
		*runb = 0;
	}

	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;

	xap = ap == NULL ? a : ap;
	if (!nump)
		nump = &num;
	error = ext2_getlbns(vp, bn, xap, nump);
	if (error)
		return (error);

	num = *nump;
	if (num == 0) {
		*bnp = blkptrtodb(ump, ip->i_db[bn]);
		if (*bnp == 0)
			*bnp = -1;
		else if (runp) {
			daddr_t bnb = bn;
			for (++bn; bn < NDADDR && *runp < maxrun &&
			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
			    ++bn, ++*runp);
			bn = bnb;
			if (runb && (bn > 0)) {
				for (--bn; (bn >= 0) && (*runb < maxrun) &&
					is_sequential(ump, ip->i_db[bn],
						ip->i_db[bn+1]);
						--bn, ++*runb);
			}
		}
		return (0);
	}


	/* Get disk address out of indirect block array */
	daddr = ip->i_ib[xap->in_off];

	for (bp = NULL, ++xap; --num; ++xap) {
		/*
		 * Exit the loop if there is no disk address assigned yet and
		 * the indirect block isn't in the cache, or if we were
		 * looking for an indirect block and we've found it.
		 */

		metalbn = xap->in_lbn;
		if ((daddr == 0 &&
		     !findblk(vp, dbtodoff(fs, metalbn), FINDBLK_TEST)) ||
		    metalbn == bn) {
			break;
		}
		/*
		 * If we get here, we've either got the block in the cache
		 * or we have a disk address for it, go fetch it.
		 */
		if (bp)
			bqrelse(bp);

		xap->in_exists = 1;
		bp = getblk(vp, lblktodoff(fs, metalbn),
			    mp->mnt_stat.f_iosize, 0, 0);
		if ((bp->b_flags & B_CACHE) == 0) {
#ifdef DIAGNOSTIC
			if (!daddr)
				panic("ext2_bmaparray: indirect block not in cache");
#endif
			/*
			 * This runs through ext2_strategy using bio2 to
			 * cache the disk offset, then comes back through
			 * bio1.  So we want to wait on bio1
			 */
			bp->b_bio1.bio_done = biodone_sync;
			bp->b_bio1.bio_flags |= BIO_SYNC;
			bp->b_bio2.bio_offset = fsbtodoff(fs, daddr);
			bp->b_flags &= ~(B_INVAL|B_ERROR);
			bp->b_cmd = BUF_CMD_READ;
			vfs_busy_pages(bp->b_vp, bp);
			vn_strategy(bp->b_vp, &bp->b_bio1);
			error = biowait(&bp->b_bio1, "biord");
			if (error) {
				brelse(bp);
				return (error);
			}
		}

		daddr = ((ext2_daddr_t *)bp->b_data)[xap->in_off];
		if (num == 1 && daddr && runp) {
			for (bn = xap->in_off + 1;
			    bn < MNINDIR(ump) && *runp < maxrun &&
			    is_sequential(ump,
			    ((ext2_daddr_t *)bp->b_data)[bn - 1],
			    ((ext2_daddr_t *)bp->b_data)[bn]);
			    ++bn, ++*runp);
			bn = xap->in_off;
			if (runb && bn) {
				for(--bn; bn >= 0 && *runb < maxrun &&
					is_sequential(ump, ((daddr_t *)bp->b_data)[bn],
					    ((daddr_t *)bp->b_data)[bn+1]);
					--bn, ++*runb);
			}
		}
	}
	if (bp)
		bqrelse(bp);

	daddr = blkptrtodb(ump, daddr);
	*bnp = daddr == 0 ? -1 : daddr;
	return (0);
}
Ejemplo n.º 10
0
/*
 * spec_getpages() - get pages associated with device vnode.
 *
 * Note that spec_read and spec_write do not use the buffer cache, so we
 * must fully implement getpages here.
 */
static int
devfs_spec_getpages(struct vop_getpages_args *ap)
{
	vm_offset_t kva;
	int error;
	int i, pcount, size;
	struct buf *bp;
	vm_page_t m;
	vm_ooffset_t offset;
	int toff, nextoff, nread;
	struct vnode *vp = ap->a_vp;
	int blksiz;
	int gotreqpage;

	error = 0;
	pcount = round_page(ap->a_count) / PAGE_SIZE;

	/*
	 * Calculate the offset of the transfer and do sanity check.
	 */
	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;

	/*
	 * Round up physical size for real devices.  We cannot round using
	 * v_mount's block size data because v_mount has nothing to do with
	 * the device.  i.e. it's usually '/dev'.  We need the physical block
	 * size for the device itself.
	 *
	 * We can't use v_rdev->si_mountpoint because it only exists when the
	 * block device is mounted.  However, we can use v_rdev.
	 */
	if (vn_isdisk(vp, NULL))
		blksiz = vp->v_rdev->si_bsize_phys;
	else
		blksiz = DEV_BSIZE;

	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);

	bp = getpbuf_kva(NULL);
	kva = (vm_offset_t)bp->b_data;

	/*
	 * Map the pages to be read into the kva.
	 */
	pmap_qenter(kva, ap->a_m, pcount);

	/* Build a minimal buffer header. */
	bp->b_cmd = BUF_CMD_READ;
	bp->b_bcount = size;
	bp->b_resid = 0;
	bsetrunningbufspace(bp, size);

	bp->b_bio1.bio_offset = offset;
	bp->b_bio1.bio_done = devfs_spec_getpages_iodone;

	mycpu->gd_cnt.v_vnodein++;
	mycpu->gd_cnt.v_vnodepgsin += pcount;

	/* Do the input. */
	vn_strategy(ap->a_vp, &bp->b_bio1);

	crit_enter();

	/* We definitely need to be at splbio here. */
	while (bp->b_cmd != BUF_CMD_DONE)
		tsleep(bp, 0, "spread", 0);

	crit_exit();

	if (bp->b_flags & B_ERROR) {
		if (bp->b_error)
			error = bp->b_error;
		else
			error = EIO;
	}

	/*
	 * If EOF is encountered we must zero-extend the result in order
	 * to ensure that the page does not contain garabge.  When no
	 * error occurs, an early EOF is indicated if b_bcount got truncated.
	 * b_resid is relative to b_bcount and should be 0, but some devices
	 * might indicate an EOF with b_resid instead of truncating b_bcount.
	 */
	nread = bp->b_bcount - bp->b_resid;
	if (nread < ap->a_count)
		bzero((caddr_t)kva + nread, ap->a_count - nread);
	pmap_qremove(kva, pcount);

	gotreqpage = 0;
	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
		nextoff = toff + PAGE_SIZE;
		m = ap->a_m[i];

		m->flags &= ~PG_ZERO;

		/*
		 * NOTE: vm_page_undirty/clear_dirty etc do not clear the
		 *	 pmap modified bit.  pmap modified bit should have
		 *	 already been cleared.
		 */
		if (nextoff <= nread) {
			m->valid = VM_PAGE_BITS_ALL;
			vm_page_undirty(m);
		} else if (toff < nread) {
			/*
			 * Since this is a VM request, we have to supply the
			 * unaligned offset to allow vm_page_set_valid()
			 * to zero sub-DEV_BSIZE'd portions of the page.
			 */
			vm_page_set_valid(m, 0, nread - toff);
			vm_page_clear_dirty_end_nonincl(m, 0, nread - toff);
		} else {
			m->valid = 0;
			vm_page_undirty(m);
		}

		if (i != ap->a_reqpage) {
			/*
			 * Just in case someone was asking for this page we
			 * now tell them that it is ok to use.
			 */
			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
				if (m->valid) {
					if (m->flags & PG_REFERENCED) {
						vm_page_activate(m);
					} else {
						vm_page_deactivate(m);
					}
					vm_page_wakeup(m);
				} else {
					vm_page_free(m);
				}
			} else {
				vm_page_free(m);
			}
		} else if (m->valid) {
			gotreqpage = 1;
			/*
			 * Since this is a VM request, we need to make the
			 * entire page presentable by zeroing invalid sections.
			 */
			if (m->valid != VM_PAGE_BITS_ALL)
			    vm_page_zero_invalid(m, FALSE);
		}
	}
	if (!gotreqpage) {
		m = ap->a_m[ap->a_reqpage];
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "spec_getpages:(%s) I/O read failure: (error=%d) bp %p vp %p\n",
			devtoname(vp->v_rdev), error, bp, bp->b_vp);
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "               size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
		    size, bp->b_resid, ap->a_count, m->valid);
		devfs_debug(DEVFS_DEBUG_WARNING,
	    "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
		    nread, ap->a_reqpage, (u_long)m->pindex, pcount);
		/*
		 * Free the buffer header back to the swap buffer pool.
		 */
		relpbuf(bp, NULL);
		return VM_PAGER_ERROR;
	}
	/*
	 * Free the buffer header back to the swap buffer pool.
	 */
	relpbuf(bp, NULL);
	if (DEVFS_NODE(ap->a_vp))
		nanotime(&DEVFS_NODE(ap->a_vp)->mtime);
	return VM_PAGER_OK;
}