Ejemplo n.º 1
0
static int
nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp)
{
	int err;

	bp->bio_driver1 = NULL;
	atomic_add_int(&ndisk->cur_depth, 1);
	err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
	if (err) {
		atomic_add_int(&ndisk->cur_depth, -1);
		if (__predict_false(bp->bio_flags & BIO_ORDERED))
			atomic_add_int(&ndisk->ordered_in_flight, -1);
		bp->bio_error = err;
		bp->bio_flags |= BIO_ERROR;
		bp->bio_resid = bp->bio_bcount;
		biodone(bp);
		return (-1);
	}

	return (0);
}
Ejemplo n.º 2
0
/* Main flash handling task. */
static void
opalflash_task(void *arg)
{
	struct opalflash_softc *sc;
	struct bio *bp;
	device_t dev;

	sc = arg;

	for (;;) {
		dev = sc->sc_dev;
		OPALFLASH_LOCK(sc);
		do {
			bp = bioq_first(&sc->sc_bio_queue);
			if (bp == NULL)
				msleep(sc, &sc->sc_mtx, PRIBIO, "opalflash", 0);
		} while (bp == NULL);
		bioq_remove(&sc->sc_bio_queue, bp);
		OPALFLASH_UNLOCK(sc);

		switch (bp->bio_cmd) {
		case BIO_DELETE:
			bp->bio_error = opalflash_erase(sc, bp->bio_offset,
			    bp->bio_bcount);
			break;
		case BIO_READ:
			bp->bio_error = opalflash_read(sc, bp->bio_offset,
			    bp->bio_data, bp->bio_bcount);
			break;
		case BIO_WRITE:
			bp->bio_error = opalflash_write(sc, bp->bio_offset,
			    bp->bio_data, bp->bio_bcount);
			break;
		default:
			bp->bio_error = EINVAL;
		}
		biodone(bp);
	}
}
Ejemplo n.º 3
0
void
puffs_parkdone_asyncbiowrite(struct puffs_mount *pmp,
	struct puffs_req *preq, void *arg)
{
	struct puffs_vnmsg_write *write_msg = (void *)preq;
	struct buf *bp = arg;

	DPRINTF(("%s\n", __func__));

	bp->b_error = checkerr(pmp, preq->preq_rv, __func__);
	if (bp->b_error == 0) {
		if (write_msg->pvnr_resid > bp->b_bcount) {
			puffs_senderr(pmp, PUFFS_ERR_WRITE, E2BIG,
			    "resid grew", preq->preq_cookie);
			bp->b_error = E2BIG;
		} else {
			bp->b_resid = write_msg->pvnr_resid;
		}
	}

	biodone(bp);
}
Ejemplo n.º 4
0
/*
 * Memory file system I/O.
 *
 * Trivial on the HP since buffer has already been mapped into KVA space.
 */
void
mfs_doio(struct mfsnode *mfsp, struct buf *bp)
{
	caddr_t base;
	long offset = bp->b_blkno << DEV_BSHIFT;
	int s;

	if (bp->b_bcount > mfsp->mfs_size - offset)
		bp->b_bcount = mfsp->mfs_size - offset;

	base = mfsp->mfs_baseoff + offset;
	if (bp->b_flags & B_READ)
		bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
	else
		bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
	if (bp->b_error)
		bp->b_flags |= B_ERROR;
	else
		bp->b_resid = 0;
	s = splbio();
	biodone(bp);
	splx(s);
}
Ejemplo n.º 5
0
static void
dk_done1(struct dk_softc *dksc, struct buf *bp, bool lock)
{
	struct disk *dk = &dksc->sc_dkdev;

	if (bp->b_error != 0) {
		struct cfdriver *cd = device_cfdriver(dksc->sc_dev);

		diskerr(bp, cd->cd_name, "error", LOG_PRINTF, 0,
			dk->dk_label);
		printf("\n");
	}

	if (lock)
		mutex_enter(&dksc->sc_iolock);
	disk_unbusy(dk, bp->b_bcount - bp->b_resid, (bp->b_flags & B_READ));
	if (lock)
		mutex_exit(&dksc->sc_iolock);

	rnd_add_uint32(&dksc->sc_rnd_source, bp->b_rawblkno);

	biodone(bp);
}
Ejemplo n.º 6
0
static void
destroy_geom_disk(struct nvd_disk *ndisk)
{
	struct bio *bp;

	taskqueue_free(ndisk->tq);
	disk_destroy(ndisk->disk);

	mtx_lock(&ndisk->bioqlock);
	for (;;) {
		bp = bioq_takefirst(&ndisk->bioq);
		if (bp == NULL)
			break;
		bp->bio_error = EIO;
		bp->bio_flags |= BIO_ERROR;
		bp->bio_resid = bp->bio_bcount;

		biodone(bp);
	}
	mtx_unlock(&ndisk->bioqlock);

	mtx_destroy(&ndisk->bioqlock);
}
/* I/O on subdisk completed */
void
sdio_done(struct bio *bio)
{
    struct sdbuf *sbp;

    get_mplock();

    sbp = (struct sdbuf *) bio->bio_buf;
    if (sbp->b.b_flags & B_ERROR) {			    /* had an error */
	sbp->bio->bio_buf->b_flags |= B_ERROR;			    /* propagate upwards */
	sbp->bio->bio_buf->b_error = sbp->b.b_error;
    }
#ifdef VINUMDEBUG
    if (debug & DEBUG_LASTREQS)
	logrq(loginfo_sdiodone, (union rqinfou)bio, bio);
#endif
    sbp->bio->bio_buf->b_resid = sbp->b.b_resid;			    /* copy the resid field */
    /* Now update the statistics */
    if (sbp->b.b_cmd == BUF_CMD_READ) {			    /* read operation */
	DRIVE[sbp->driveno].reads++;
	DRIVE[sbp->driveno].bytes_read += sbp->b.b_bcount;
	SD[sbp->sdno].reads++;
	SD[sbp->sdno].bytes_read += sbp->b.b_bcount;
    } else {						    /* write operation */
	DRIVE[sbp->driveno].writes++;
	DRIVE[sbp->driveno].bytes_written += sbp->b.b_bcount;
	SD[sbp->sdno].writes++;
	SD[sbp->sdno].bytes_written += sbp->b.b_bcount;
    }
    biodone_sync(bio);
    biodone(sbp->bio);					    /* complete the caller's I/O */
    BUF_UNLOCK(&sbp->b);
    uninitbufbio(&sbp->b);
    Free(sbp);
    rel_mplock();
}
Ejemplo n.º 8
0
static void
mfi_disk_strategy(struct bio *bio)
{
	struct mfi_disk *sc;
	struct mfi_softc *controller;

	sc = bio->bio_disk->d_drv1;

	if (sc == NULL) {
		bio->bio_error = EINVAL;
		bio->bio_flags |= BIO_ERROR;
		bio->bio_resid = bio->bio_bcount;
		biodone(bio);
		return;
	}

	controller = sc->ld_controller;
	bio->bio_driver1 = (void *)(uintptr_t)sc->ld_id;
	mtx_lock(&controller->mfi_io_lock);
	mfi_enqueue_bio(controller, bio);
	mfi_startio(controller);
	mtx_unlock(&controller->mfi_io_lock);
	return;
}
Ejemplo n.º 9
0
void
rdstrategy(struct buf *bp)
{
	struct rdsoftc *rd;
	struct hdcsoftc *sc;
	struct disklabel *lp;
	int s;

	if ((rd = device_lookup_private(&rd_cd, DISKUNIT(bp->b_dev))) == NULL) {
		bp->b_error = ENXIO;
		goto done;
	}
	sc = rd->sc_hdc;

	lp = rd->sc_disk.dk_label;
	if ((bounds_check_with_label(&rd->sc_disk, bp, 1)) <= 0)
		goto done;

	if (bp->b_bcount == 0)
		goto done;

	bp->b_rawblkno =
	    bp->b_blkno + lp->d_partitions[DISKPART(bp->b_dev)].p_offset;
	bp->b_cylinder = bp->b_rawblkno / lp->d_secpercyl;

	s = splbio();
	BUFQ_PUT(sc->sc_q, bp);
	if (inq == 0) {
		inq = 1;
		vsbus_dma_start(&sc->sc_vd);
	}
	splx(s);
	return;

done:	biodone(bp);
}
Ejemplo n.º 10
0
/*
 * Read/write routine for a buffer.  Finds the proper unit, range checks
 * arguments, and schedules the transfer.  Does not wait for the transfer
 * to complete.  Multi-page transfers are supported.  All I/O requests must
 * be a multiple of a sector in length.
 */
static void
idad_strategy(struct bio *bp)
{
	struct idad_softc *drv;
	int s;

	drv = bp->bio_disk->d_drv1;
	if (drv == NULL) {
    		bp->bio_error = EINVAL;
		goto bad;
	}

	/*
	 * software write protect check
	 */
	if (drv->flags & DRV_WRITEPROT && (bp->bio_cmd == BIO_WRITE)) {
		bp->bio_error = EROFS;
		goto bad;
	}

	bp->bio_driver1 = drv;
	s = splbio();
	ida_submit_buf(drv->controller, bp);
	splx(s);
	return;

bad:
	bp->bio_flags |= BIO_ERROR;

	/*
	 * Correctly set the buf to indicate a completed transfer
	 */
	bp->bio_resid = bp->bio_bcount;
	biodone(bp);
	return;
}
Ejemplo n.º 11
0
void
destroy_geom_disk(struct nand_chip *chip)
{
	struct bio *bp;

	taskqueue_free(chip->tq);
	disk_destroy(chip->ndisk);
	disk_destroy(chip->rdisk);

	mtx_lock(&chip->qlock);
	for (;;) {
		bp = bioq_takefirst(&chip->bioq);
		if (bp == NULL)
			break;
		bp->bio_error = EIO;
		bp->bio_flags |= BIO_ERROR;
		bp->bio_resid = bp->bio_bcount;

		biodone(bp);
	}
	mtx_unlock(&chip->qlock);

	mtx_destroy(&chip->qlock);
}
Ejemplo n.º 12
0
void
fss_strategy(struct buf *bp)
{
	const bool write = ((bp->b_flags & B_READ) != B_READ);
	struct fss_softc *sc = device_lookup_private(&fss_cd, minor(bp->b_dev));

	mutex_enter(&sc->sc_slock);

	if (write || !FSS_ISVALID(sc)) {

		mutex_exit(&sc->sc_slock);

		bp->b_error = (write ? EROFS : ENXIO);
		bp->b_resid = bp->b_bcount;
		biodone(bp);
		return;
	}

	bp->b_rawblkno = bp->b_blkno;
	bufq_put(sc->sc_bufq, bp);
	cv_signal(&sc->sc_work_cv);

	mutex_exit(&sc->sc_slock);
}
Ejemplo n.º 13
0
/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 */
int
ntfs_strategy(void *v)
{
	struct vop_strategy_args /* {
		struct vnode *a_vp;
		struct buf *a_bp;
	} */ *ap = v;
	struct buf *bp = ap->a_bp;
	struct vnode *vp = ap->a_vp;
	struct fnode *fp = VTOF(vp);
	struct ntnode *ip = FTONT(fp);
	struct ntfsmount *ntmp = ip->i_mp;
	int error;

	dprintf(("ntfs_strategy: blkno: %d, lblkno: %d\n",
		(u_int32_t)bp->b_blkno,
		(u_int32_t)bp->b_lblkno));

	dprintf(("strategy: bcount: %u flags: 0x%x\n",
		(u_int32_t)bp->b_bcount,bp->b_flags));

	if (bp->b_flags & B_READ) {
		u_int32_t toread;

		if (ntfs_cntob(bp->b_blkno) >= fp->f_size) {
			clrbuf(bp);
			error = 0;
		} else {
			toread = MIN(bp->b_bcount,
				 fp->f_size - ntfs_cntob(bp->b_blkno));
			dprintf(("ntfs_strategy: toread: %d, fsize: %d\n",
				toread,(u_int32_t)fp->f_size));

			error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
				fp->f_attrname, ntfs_cntob(bp->b_blkno),
				toread, bp->b_data, NULL);

			if (error) {
				printf("ntfs_strategy: ntfs_readattr failed\n");
				bp->b_error = error;
			}

			memset((char *)bp->b_data + toread, 0,
			    bp->b_bcount - toread);
		}
	} else {
		size_t tmp;
		u_int32_t towrite;

		if (ntfs_cntob(bp->b_blkno) + bp->b_bcount >= fp->f_size) {
			printf("ntfs_strategy: CAN'T EXTEND FILE\n");
			bp->b_error = error = EFBIG;
		} else {
			towrite = MIN(bp->b_bcount,
				fp->f_size - ntfs_cntob(bp->b_blkno));
			dprintf(("ntfs_strategy: towrite: %d, fsize: %d\n",
				towrite,(u_int32_t)fp->f_size));

			error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,
				fp->f_attrname, ntfs_cntob(bp->b_blkno),towrite,
				bp->b_data, &tmp, NULL);

			if (error) {
				printf("ntfs_strategy: ntfs_writeattr fail\n");
				bp->b_error = error;
			}
		}
	}
	biodone(bp);
	return (error);
}
Ejemplo n.º 14
0
/*
 * Strategy routine called from dm_strategy.
 */
static int
dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp)
{
	dm_target_stripe_config_t *tsc;
	struct bio *bio = &bp->b_bio1;
	struct buf *nestbuf;
	uint64_t blkno, blkoff;
	uint64_t stripe, blknr;
	uint32_t stripe_off, stripe_rest, num_blks, issue_blks;
	int devnr;

	tsc = table_en->target_config;
	if (tsc == NULL)
		return 0;

	/* calculate extent of request */
	KKASSERT(bp->b_resid % DEV_BSIZE == 0);

	switch(bp->b_cmd) {
	case BUF_CMD_READ:
	case BUF_CMD_WRITE:
	case BUF_CMD_FREEBLKS:
		/*
		 * Loop through to individual operations
		 */
		blkno = bp->b_bio1.bio_offset / DEV_BSIZE;
		blkoff = 0;
		num_blks = bp->b_resid / DEV_BSIZE;
		nestiobuf_init(bio);

		while (num_blks > 0) {
			/* blockno to strip piece nr */
			stripe = blkno / tsc->stripe_chunksize;
			stripe_off = blkno % tsc->stripe_chunksize;

			/* where we are inside the strip */
			devnr = stripe % tsc->stripe_num;
			blknr = stripe / tsc->stripe_num;

			/* how much is left before we hit a boundary */
			stripe_rest = tsc->stripe_chunksize - stripe_off;

			/* issue this piece on stripe `stripe' */
			issue_blks = MIN(stripe_rest, num_blks);
			nestbuf = getpbuf(NULL);
			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;

			nestiobuf_add(bio, nestbuf, blkoff,
					issue_blks * DEV_BSIZE, NULL);

			/* I need number of bytes. */
			nestbuf->b_bio1.bio_offset =
				blknr * tsc->stripe_chunksize + stripe_off;
			nestbuf->b_bio1.bio_offset +=
				tsc->stripe_devs[devnr].offset;
			nestbuf->b_bio1.bio_offset *= DEV_BSIZE;

			vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode,
				    &nestbuf->b_bio1);

			blkno += issue_blks;
			blkoff += issue_blks * DEV_BSIZE;
			num_blks -= issue_blks;
		}
		nestiobuf_start(bio);
		break;
	case BUF_CMD_FLUSH:
		nestiobuf_init(bio);
		for (devnr = 0; devnr < tsc->stripe_num; ++devnr) {
			nestbuf = getpbuf(NULL);
			nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS;

			nestiobuf_add(bio, nestbuf, 0, 0, NULL);
			nestbuf->b_bio1.bio_offset = 0;
			vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode,
				    &nestbuf->b_bio1);
		}
		nestiobuf_start(bio);
		break;
	default:
		bp->b_flags |= B_ERROR;
		bp->b_error = EIO;
		biodone(bio);
		break;
	}
	return 0;
}
Ejemplo n.º 15
0
/* Pseudo strategy function
 * Called by scsipi_do_ioctl() via physio/physstrat if there is to
 * be data transfered, and directly if there is no data transfer.
 *
 * Should I reorganize this so it returns to physio instead
 * of sleeping in scsiio_scsipi_cmd?  Is there any advantage, other
 * than avoiding the probable duplicate wakeup in iodone? [PD]
 *
 * No, seems ok to me... [JRE]
 * (I don't see any duplicate wakeups)
 *
 * Can't be used with block devices or raw_read/raw_write directly
 * from the cdevsw/bdevsw tables because they couldn't have added
 * the screq structure. [JRE]
 */
static void
scsistrategy(struct buf *bp)
{
	struct scsi_ioctl *si;
	scsireq_t *screq;
	struct scsipi_periph *periph;
	int error;
	int flags = 0;

	si = si_find(bp);
	if (si == NULL) {
		printf("scsistrategy: "
		    "No matching ioctl request found in queue\n");
		error = EINVAL;
		goto done;
	}
	screq = &si->si_screq;
	periph = si->si_periph;
	SC_DEBUG(periph, SCSIPI_DB2, ("user_strategy\n"));

	/*
	 * We're in trouble if physio tried to break up the transfer.
	 */
	if (bp->b_bcount != screq->datalen) {
		scsipi_printaddr(periph);
		printf("physio split the request.. cannot proceed\n");
		error = EIO;
		goto done;
	}

	if (screq->timeout == 0) {
		error = EINVAL;
		goto done;
	}

	if (screq->cmdlen > sizeof(struct scsipi_generic)) {
		scsipi_printaddr(periph);
		printf("cmdlen too big\n");
		error = EFAULT;
		goto done;
	}

	if ((screq->flags & SCCMD_READ) && screq->datalen > 0)
		flags |= XS_CTL_DATA_IN;
	if ((screq->flags & SCCMD_WRITE) && screq->datalen > 0)
		flags |= XS_CTL_DATA_OUT;
	if (screq->flags & SCCMD_TARGET)
		flags |= XS_CTL_TARGET;
	if (screq->flags & SCCMD_ESCAPE)
		flags |= XS_CTL_ESCAPE;

	error = scsipi_command(periph, (void *)screq->cmd, screq->cmdlen,
	    (void *)bp->b_data, screq->datalen,
	    0, /* user must do the retries *//* ignored */
	    screq->timeout, bp, flags | XS_CTL_USERCMD);

done:
	if (error)
		bp->b_resid = bp->b_bcount;
	bp->b_error = error;
	biodone(bp);
	return;
}
Ejemplo n.º 16
0
static void
isf_task(void *arg)
{
	struct isf_softc	*sc = arg;
	struct bio		*bp;
	int			ss = sc->isf_disk->d_sectorsize;
	int			error, i;

	for (;;) {
		ISF_LOCK(sc);
		do {
			bp = bioq_first(&sc->isf_bioq);
			if (bp == NULL) {
				if (sc->isf_doomed)
					kproc_exit(0);
				else
					ISF_SLEEP(sc, sc, 0);
			}
		} while (bp == NULL);
		bioq_remove(&sc->isf_bioq, bp);

		error = 0;
		switch (bp->bio_cmd) {
		case BIO_READ:
			isf_read(sc, bp->bio_pblkno * ss, bp->bio_data,
			    bp->bio_bcount);
			break;

		case BIO_WRITE:
			/*
			 * In principle one could suspend the in-progress
			 * erase, process any pending writes to other
			 * blocks and then proceed, but that seems
			 * overly complex for the likely usage modes.
			 */
			if (sc->isf_erasing) {
				error = EBUSY;
				break;
			}

			/*
			 * Read in the block we want to write and check that
			 * we're only setting bits to 0.  If an erase would
			 * be required return an I/O error.
			 */
			isf_read(sc, bp->bio_pblkno * ss, sc->isf_rbuf,
			    bp->bio_bcount);
			for (i = 0; i < bp->bio_bcount / 2; i++)
				if ((sc->isf_rbuf[i] &
				    ((uint16_t *)bp->bio_data)[i]) !=
				    ((uint16_t *)bp->bio_data)[i]) {
					device_printf(sc->isf_dev, "write"
					    " requires erase at 0x%08jx\n",
					    bp->bio_pblkno * ss);
					error = EIO;
					break;
				}
			if (error != 0)
				break;

			error = isf_write(sc, bp->bio_pblkno * ss,
			    bp->bio_data, bp->bio_bcount);
			break;

		default:
			panic("%s: unsupported I/O operation %d", __func__,
			    bp->bio_cmd);
		}
		if (error == 0)
			biodone(bp);
		else
			biofinish(bp, NULL, error);
		ISF_UNLOCK(sc);
	}
}
Ejemplo n.º 17
0
/*
 * Calculate the logical to physical mapping if not done already,
 * then call the device strategy routine.
 */
int
ufs_strategy(void *v)
{
	struct vop_strategy_args /* {
		struct vnode *a_vp;
		struct buf *a_bp;
	} */ *ap = v;
	struct buf	*bp;
	struct vnode	*vp;
	struct inode	*ip;
	struct mount	*mp;
	int		error;

	bp = ap->a_bp;
	vp = ap->a_vp;
	ip = VTOI(vp);
	if (vp->v_type == VBLK || vp->v_type == VCHR)
		panic("ufs_strategy: spec");
	KASSERT(bp->b_bcount != 0);
	if (bp->b_blkno == bp->b_lblkno) {
		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,
				 NULL);
		if (error) {
			bp->b_error = error;
			biodone(bp);
			return (error);
		}
		if (bp->b_blkno == -1) /* no valid data */
			clrbuf(bp);
	}
	if (bp->b_blkno < 0) { /* block is not on disk */
		biodone(bp);
		return (0);
	}
	vp = ip->i_devvp;

	error = VOP_STRATEGY(vp, bp);
	if (error)
		return error;

	if (!BUF_ISREAD(bp))
		return 0;

	mp = wapbl_vptomp(vp);
	if (mp == NULL || mp->mnt_wapbl_replay == NULL ||
	    !WAPBL_REPLAY_ISOPEN(mp) ||
	    !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount))
		return 0;

	error = biowait(bp);
	if (error)
		return error;

	error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount);
	if (error) {
		mutex_enter(&bufcache_lock);
		SET(bp->b_cflags, BC_INVAL);
		mutex_exit(&bufcache_lock);
	}
	return error;
}
Ejemplo n.º 18
0
static void
ptdone(struct cam_periph *periph, union ccb *done_ccb)
{
	struct pt_softc *softc;
	struct ccb_scsiio *csio;

	softc = (struct pt_softc *)periph->softc;
	csio = &done_ccb->csio;
	switch (csio->ccb_h.ccb_state) {
	case PT_CCB_BUFFER_IO:
	case PT_CCB_BUFFER_IO_UA:
	{
		struct buf *bp;
		struct bio *bio;

		bio = (struct bio *)done_ccb->ccb_h.ccb_bio;
		bp = bio->bio_buf;

		if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
			int error;
			int sf;
			
			if ((csio->ccb_h.ccb_state & PT_CCB_RETRY_UA) != 0)
				sf = SF_RETRY_UA;
			else
				sf = 0;

			error = pterror(done_ccb, CAM_RETRY_SELTO, sf);
			if (error == ERESTART) {
				/*
				 * A retry was scheuled, so
				 * just return.
				 */
				return;
			}
			if (error != 0) {
				struct buf *q_bp;
				struct bio *q_bio;

				if (error == ENXIO) {
					/*
					 * Catastrophic error.  Mark our device
					 * as invalid.
					 */
					xpt_print(periph->path,
					    "Invalidating device\n");
					softc->flags |= PT_FLAG_DEVICE_INVALID;
				}

				/*
				 * return all queued I/O with EIO, so that
				 * the client can retry these I/Os in the
				 * proper order should it attempt to recover.
				 */
				while ((q_bio = bioq_takefirst(&softc->bio_queue)) != NULL) {
					q_bp = q_bio->bio_buf;
					q_bp->b_resid = q_bp->b_bcount;
					q_bp->b_error = EIO;
					q_bp->b_flags |= B_ERROR;
					biodone(q_bio);
				}
				bp->b_error = error;
				bp->b_resid = bp->b_bcount;
				bp->b_flags |= B_ERROR;
			} else {
				bp->b_resid = csio->resid;
				bp->b_error = 0;
				if (bp->b_resid != 0) {
					/* Short transfer ??? */
					bp->b_flags |= B_ERROR;
				}
			}
			if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0)
				cam_release_devq(done_ccb->ccb_h.path,
						 /*relsim_flags*/0,
						 /*reduction*/0,
						 /*timeout*/0,
						 /*getcount_only*/0);
		} else {
			bp->b_resid = csio->resid;
			if (bp->b_resid != 0)
				bp->b_flags |= B_ERROR;
		}

		/*
		 * Block out any asyncronous callbacks
		 * while we touch the pending ccb list.
		 */
		LIST_REMOVE(&done_ccb->ccb_h, periph_links.le);

		devstat_end_transaction_buf(&softc->device_stats, bp);
		biodone(bio);
		break;
	}
	case PT_CCB_WAITING:
		/* Caller will release the CCB */
		wakeup(&done_ccb->ccb_h.cbfcnp);
		return;
	}
	xpt_release_ccb(done_ccb);
}
Ejemplo n.º 19
0
Archivo: bmd.c Proyecto: ryo/netbsd-src
void
bmdstrategy(struct buf *bp)
{
	int unit = BMD_UNIT(bp->b_dev);
	struct bmd_softc *sc;
	int offset, disksize, resid;
	int page, pg_offset, pg_resid;
	void *data;

	if (unit >= bmd_cd.cd_ndevs) {
		bp->b_error = ENXIO;
		goto done;
	}

	sc = device_lookup_private(&bmd_cd, BMD_UNIT(bp->b_dev));
	if (sc == NULL) {
		bp->b_error = ENXIO;
		goto done;
	}

	DPRINTF(("bmdstrategy: %s blkno %d bcount %ld:",
		(bp->b_flags & B_READ) ? "read " : "write",
		bp->b_blkno, bp->b_bcount));

	bp->b_resid = bp->b_bcount;
	offset = (bp->b_blkno << DEV_BSHIFT);
	disksize = sc->sc_maxpage * BMD_PAGESIZE;
	if (offset >= disksize) {
		/* EOF if read, EIO if write */
		if (bp->b_flags & B_READ)
			goto done;
		bp->b_error = EIO;
		goto done;
	}

	resid = bp->b_resid;
	if (resid > disksize - offset)
		resid = disksize - offset;

	data = bp->b_data;
	do {
		page = offset / BMD_PAGESIZE;
		pg_offset = offset % BMD_PAGESIZE;

		/* length */
		pg_resid = MIN(resid, BMD_PAGESIZE - pg_offset);

		/* switch bank page */
		bus_space_write_1(sc->sc_iot, sc->sc_ioh, BMD_PAGE, page);

		/* XXX we should use DMA transfer? */
		if ((bp->b_flags & B_READ)) {
			bus_space_read_region_1(sc->sc_iot, sc->sc_bank,
				pg_offset, data, pg_resid);
		} else {
			bus_space_write_region_1(sc->sc_iot, sc->sc_bank,
				pg_offset, data, pg_resid);
		}

		data = (char *)data + pg_resid;
		offset += pg_resid;
		resid -= pg_resid;
		bp->b_resid -= pg_resid;
	} while (resid > 0);

	DPRINTF(("\n"));

 done:
	biodone(bp);
}
Ejemplo n.º 20
0
Archivo: mcd.c Proyecto: MarginC/kame
static void
mcdstrategy(struct bio *bp)
{
	struct mcd_softc *sc;
	int s;

	sc = (struct mcd_softc *)bp->bio_dev->si_drv1;

	/* test validity */
/*MCD_TRACE("strategy: buf=0x%lx, unit=%ld, block#=%ld bcount=%ld\n",
	bp,unit,bp->bio_blkno,bp->bio_bcount);*/

	if (bp->bio_blkno < 0) {
		device_printf(sc->dev, "strategy failure: blkno = %ld, bcount = %ld\n",
			(long)bp->bio_blkno, bp->bio_bcount);
		bp->bio_error = EINVAL;
		bp->bio_flags |= BIO_ERROR;
		goto bad;
	}

	/* if device invalidated (e.g. media change, door open), error */
	if (!(sc->data.flags & MCDVALID)) {
		device_printf(sc->dev, "media changed\n");
		bp->bio_error = EIO;
		goto bad;
	}

	/* read only */
	if (!(bp->bio_cmd == BIO_READ)) {
		bp->bio_error = EROFS;
		goto bad;
	}

	/* no data to read */
	if (bp->bio_bcount == 0)
		goto done;

	if (!(sc->data.flags & MCDTOC)) {
		bp->bio_error = EIO;
		goto bad;
	}

	bp->bio_pblkno = bp->bio_blkno;
	bp->bio_resid = 0;

	/* queue it */
	s = splbio();
	bioqdisksort(&sc->data.head, bp);
	splx(s);

	/* now check whether we can perform processing */
	mcd_start(sc);
	return;

bad:
	bp->bio_flags |= BIO_ERROR;
done:
	bp->bio_resid = bp->bio_bcount;
	biodone(bp);
	return;
}
Ejemplo n.º 21
0
int
zvol_strategy(buf_t *bp)
{
	zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev));
	uint64_t off, volsize;
	size_t size, resid;
	char *addr;
	objset_t *os;
	int error = 0;
	int sync;
	int reading;
	int txg_sync_needed = B_FALSE;

	if (zv == NULL) {
		bioerror(bp, ENXIO);
		biodone(bp);
		return (0);
	}

	if (getminor(bp->b_edev) == 0) {
		bioerror(bp, EINVAL);
		biodone(bp);
		return (0);
	}

	if (zv->zv_readonly && !(bp->b_flags & B_READ)) {
		bioerror(bp, EROFS);
		biodone(bp);
		return (0);
	}

	off = ldbtob(bp->b_blkno);
	volsize = zv->zv_volsize;

	os = zv->zv_objset;
	ASSERT(os != NULL);
	sync = !(bp->b_flags & B_ASYNC) && !(zil_disable);

	bp_mapin(bp);
	addr = bp->b_un.b_addr;
	resid = bp->b_bcount;

	/*
	 * There must be no buffer changes when doing a dmu_sync() because
	 * we can't change the data whilst calculating the checksum.
	 * A better approach than a per zvol rwlock would be to lock ranges.
	 */
	reading = bp->b_flags & B_READ;
	if (reading || resid <= zvol_immediate_write_sz)
		rw_enter(&zv->zv_dslock, RW_READER);
	else
		rw_enter(&zv->zv_dslock, RW_WRITER);

	while (resid != 0 && off < volsize) {

		size = MIN(resid, 1UL << 20);	/* cap at 1MB per tx */

		if (size > volsize - off)	/* don't write past the end */
			size = volsize - off;

		if (reading) {
			error = dmu_read(os, ZVOL_OBJ, off, size, addr);
		} else {
			dmu_tx_t *tx = dmu_tx_create(os);
			dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
			error = dmu_tx_assign(tx, TXG_WAIT);
			if (error) {
				dmu_tx_abort(tx);
			} else {
				dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
				if (sync) {
					/* use the ZIL to commit this write */
					if (zvol_log_write(zv, tx, off, size,
					    addr) != 0) {
						txg_sync_needed = B_TRUE;
					}
				}
				dmu_tx_commit(tx);
			}
		}
		if (error)
			break;
		off += size;
		addr += size;
		resid -= size;
	}
	rw_exit(&zv->zv_dslock);

	if ((bp->b_resid = resid) == bp->b_bcount)
		bioerror(bp, off > volsize ? EINVAL : error);

	biodone(bp);

	if (sync) {
		if (txg_sync_needed)
			txg_wait_synced(dmu_objset_pool(os), 0);
		else
			zil_commit(zv->zv_zilog, UINT64_MAX, 0);
	}

	return (0);
}
Ejemplo n.º 22
0
void
marustrategy(struct buf *bp)
{
    struct maru_softc *sc;
    struct disklabel *lp;
    struct partition *pp;
    int len;
    int err = ENXIO;
    m_u64 offset;
    DB("marustrategy(%p)\n", bp);
    maru_printbuf(bp);
	DB("ms:1\n");
    sc = &maru_softc[maruunit(bp->b_dev)];
    if (num_maru<1 ||
	maruunit(bp->b_dev) >= num_maru ||
	!(sc->sc_flags&MUF_INITED) ||
	!sc->sc_kapi)
	{
	err:
	DB("ms:2\n");
	    maru_berror(bp, err);
	DB("ms:3\n");
	    return;
	}
	DB("ms:4\n");
    len = bp->b_bcount;
    bp->b_resid = len;
    if (len<1)
	{
	DB("ms:5\n");
	    biodone(bp);
	DB("ms:6\n");
	    return;
	}
    DB("ms:6.1\n");
    offset = dbtob(bp->b_blkno);
    lp = sc->sc_dkdev.dk_label;
    /* the transfer must be a whole number of blocks */
    if (len % lp->d_secsize != 0)
	{
	    maru_berror(bp, EINVAL);
	    return;
	}
    
    /*
     * Do bounds checking and adjust transfer.  If there's an error,
     * the bounds check will flag that for us.
     */
    DB("ms:6.2\n");
    if (DISKPART(bp->b_dev) != RAW_PART &&
	bounds_check_with_label(bp, lp, sc->sc_flags&MUF_WLABEL) <= 0)
	{
	    biodone(bp);
	    return;
	}
    /*
     * Translate the partition-relative block number to an absolute.
     */
    DB("ms:6.3\n");
    if (DISKPART(bp->b_dev) != RAW_PART)
	{
	    pp = &sc->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)];
	    offset += pp->p_offset * lp->d_secsize;
	}
    if (bp->b_flags & B_READ)
	{
	    struct maru_message *msg;
	DB("ms:7\n");
	    msg = malloc(sizeof *msg, M_DEVBUF, M_NOWAIT);
	    if (!msg)
		goto err;
	    msg->mm_flags = MARU_READ_REQ;
	DB("ms:8\n");
	    msg->mm_id = maru_acquire_token(sc, bp);
	    msg->mm_len = len;
	    msg->mm_offset = offset;
	DB("ms:9\n");
	    if ((err = sc->sc_kapi->ka_inject(sc->sc_kapi, msg, sizeof *msg)))
		{
	DB("ms:10\n");
		    free(msg, M_DEVBUF);
		    goto err;
		}
	DB("ms:11\n");
	    sc->sc_reading++;
	    return;
	}
    else /* B_WRITE */
	{
	    struct maru_message *msg;
	DB("ms:13\n");
	    msg = malloc(sizeof *msg, M_DEVBUF, M_NOWAIT);
	    if (!msg)
		goto err;
	    msg->mm_flags = MARU_WRITE;
	    msg->mm_id = maru_acquire_token(sc, bp);
	    msg->mm_len = len;
	    msg->mm_offset = offset;
	DB("ms:14\n");
	    if ((err = sc->sc_kapi->ka_inject(sc->sc_kapi, msg, sizeof(msg)+msg->mm_len)))
		{
	DB("ms:15\n");
		    free(msg, M_DEVBUF);
		    goto err;
		}
	DB("ms:16\n");
	    sc->sc_writing++;
	    return;
	}
	DB("ms:17\n");
}
Ejemplo n.º 23
0
/*
 * Actually translate the requested transfer into one the physical driver can
 * understand.  The transfer is described by a buf and will include only one
 * physical transfer.
 */
void
cdstrategy(struct buf *bp)
{
	struct cd_softc *cd;
	int s;

	if ((cd = cdlookup(DISKUNIT(bp->b_dev))) == NULL) {
		bp->b_error = ENXIO;
		goto bad;
	}

	SC_DEBUG(cd->sc_link, SDEV_DB2, ("cdstrategy: %ld bytes @ blk %d\n",
	    bp->b_bcount, bp->b_blkno));
	/*
	 * If the device has been made invalid, error out
	 * maybe the media changed, or no media loaded
	 */
	if ((cd->sc_link->flags & SDEV_MEDIA_LOADED) == 0) {
		bp->b_error = EIO;
		goto bad;
	}
	/*
	 * The transfer must be a whole number of blocks.
	 */
	if ((bp->b_bcount % cd->sc_dk.dk_label->d_secsize) != 0) {
		bp->b_error = EINVAL;
		goto bad;
	}
	/*
	 * If it's a null transfer, return immediately
	 */
	if (bp->b_bcount == 0)
		goto done;

	/*
	 * Do bounds checking, adjust transfer. if error, process.
	 * If end of partition, just return.
	 */
	if (bounds_check_with_label(bp, cd->sc_dk.dk_label,
	    (cd->flags & (CDF_WLABEL|CDF_LABELLING)) != 0) <= 0)
		goto done;

	s = splbio();

	/*
	 * Place it in the queue of disk activities for this disk
	 */
	disksort(&cd->buf_queue, bp);

	/*
	 * Tell the device to get going on the transfer if it's
	 * not doing anything, otherwise just wait for completion
	 */
	cdstart(cd);

	device_unref(&cd->sc_dev);
	splx(s);
	return;

bad:
	bp->b_flags |= B_ERROR;
done:
	/*
	 * Correctly set the buf to indicate a completed xfer
	 */
	bp->b_resid = bp->b_bcount;
	s = splbio();
	biodone(bp);
	splx(s);
	if (cd != NULL)
		device_unref(&cd->sc_dev);
}
Ejemplo n.º 24
0
int
puffs_doio(struct vnode *vp, struct bio *bio, struct thread *td)
{
	struct buf *bp = bio->bio_buf;
	struct ucred *cred;
	struct uio *uiop;
	struct uio uio;
	struct iovec io;
	size_t n;
	int error = 0;

	if (td != NULL && td->td_proc != NULL)
		cred = td->td_proc->p_ucred;
	else
		cred = proc0.p_ucred;

	uiop = &uio;
	uiop->uio_iov = &io;
	uiop->uio_iovcnt = 1;
	uiop->uio_segflg = UIO_SYSSPACE;
	uiop->uio_td = td;

	/*
	 * clear B_ERROR and B_INVAL state prior to initiating the I/O.  We
	 * do this here so we do not have to do it in all the code that
	 * calls us.
	 */
	bp->b_flags &= ~(B_ERROR | B_INVAL);

	KASSERT(bp->b_cmd != BUF_CMD_DONE,
	    ("puffs_doio: bp %p already marked done!", bp));

	if (bp->b_cmd == BUF_CMD_READ) {
		io.iov_len = uiop->uio_resid = (size_t)bp->b_bcount;
		io.iov_base = bp->b_data;
		uiop->uio_rw = UIO_READ;

		uiop->uio_offset = bio->bio_offset;
		error = puffs_directread(vp, uiop, 0, cred);
		if (error == 0 && uiop->uio_resid) {
			n = (size_t)bp->b_bcount - uiop->uio_resid;
			bzero(bp->b_data + n, bp->b_bcount - n);
			uiop->uio_resid = 0;
		}
		if (error) {
			bp->b_flags |= B_ERROR;
			bp->b_error = error;
		}
		bp->b_resid = uiop->uio_resid;
	} else {
		KKASSERT(bp->b_cmd == BUF_CMD_WRITE);
		if (bio->bio_offset + bp->b_dirtyend > puffs_meta_getsize(vp))
			bp->b_dirtyend = puffs_meta_getsize(vp) -
			    bio->bio_offset;

		if (bp->b_dirtyend > bp->b_dirtyoff) {
			io.iov_len = uiop->uio_resid = bp->b_dirtyend
			    - bp->b_dirtyoff;
			uiop->uio_offset = bio->bio_offset + bp->b_dirtyoff;
			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
			uiop->uio_rw = UIO_WRITE;

			error = puffs_directwrite(vp, uiop, 0, cred);

			if (error == EINTR
			    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
				crit_enter();
				bp->b_flags &= ~(B_INVAL|B_NOCACHE);
				if ((bp->b_flags & B_PAGING) == 0)
					bdirty(bp);
				if (error)
					bp->b_flags |= B_EINTR;
				crit_exit();
			} else {
				if (error) {
					bp->b_flags |= B_ERROR;
					bp->b_error = error;
				}
				bp->b_dirtyoff = bp->b_dirtyend = 0;
			}
			bp->b_resid = uiop->uio_resid;
		} else {
			bp->b_resid = 0;
		}
	}

	biodone(bio);
	KKASSERT(bp->b_cmd == BUF_CMD_DONE);
	if (bp->b_flags & B_EINTR)
		return (EINTR);
	if (bp->b_flags & B_ERROR)
		return (bp->b_error ? bp->b_error : EIO);
	return (0);
}
Ejemplo n.º 25
0
/*
 * cdstart looks to see if there is a buf waiting for the device
 * and that the device is not already busy. If both are true,
 * It deques the buf and creates a scsi command to perform the
 * transfer in the buf. The transfer request will call scsi_done
 * on completion, which will in turn call this routine again
 * so that the next queued transfer is performed.
 * The bufs are queued by the strategy routine (cdstrategy)
 *
 * This routine is also called after other non-queued requests
 * have been made of the scsi driver, to ensure that the queue
 * continues to be drained.
 *
 * must be called at the correct (highish) spl level
 * cdstart() is called at splbio from cdstrategy, cdrestart and scsi_done
 */
void
cdstart(void *v)
{
	struct cd_softc *cd = v;
	struct scsi_link *sc_link = cd->sc_link;
	struct buf *bp = 0;
	struct buf *dp;
	struct scsi_rw_big cmd_big;
	struct scsi_rw cmd_small;
	struct scsi_generic *cmdp;
	int blkno, nblks, cmdlen, error;
	struct partition *p;

	splassert(IPL_BIO);

	SC_DEBUG(sc_link, SDEV_DB2, ("cdstart\n"));
	/*
	 * Check if the device has room for another command
	 */
	while (sc_link->openings > 0) {
		/*
		 * there is excess capacity, but a special waits
		 * It'll need the adapter as soon as we clear out of the
		 * way and let it run (user level wait).
		 */
		if (sc_link->flags & SDEV_WAITING) {
			sc_link->flags &= ~SDEV_WAITING;
			wakeup((caddr_t)sc_link);
			return;
		}

		/*
		 * See if there is a buf with work for us to do..
		 */
		dp = &cd->buf_queue;
		if ((bp = dp->b_actf) == NULL)	/* yes, an assign */
			return;
		dp->b_actf = bp->b_actf;

		/*
		 * If the device has become invalid, abort all the
		 * reads and writes until all files have been closed and
		 * re-opened
		 */
		if ((sc_link->flags & SDEV_MEDIA_LOADED) == 0) {
			bp->b_error = EIO;
			bp->b_flags |= B_ERROR;
			bp->b_resid = bp->b_bcount;
			biodone(bp);
			continue;
		}

		/*
		 * We have a buf, now we should make a command
		 *
		 * First, translate the block to absolute and put it in terms
		 * of the logical blocksize of the device.
		 */
		blkno =
		    bp->b_blkno / (cd->sc_dk.dk_label->d_secsize / DEV_BSIZE);
		p = &cd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)];
		blkno += DL_GETPOFFSET(p);
		nblks = howmany(bp->b_bcount, cd->sc_dk.dk_label->d_secsize);

		/*
		 *  Fill out the scsi command.  If the transfer will
		 *  fit in a "small" cdb, use it.
		 */
		if (!(sc_link->flags & SDEV_ATAPI) &&
		    !(sc_link->quirks & SDEV_ONLYBIG) && 
		    ((blkno & 0x1fffff) == blkno) &&
		    ((nblks & 0xff) == nblks)) {
			/*
			 * We can fit in a small cdb.
			 */
			bzero(&cmd_small, sizeof(cmd_small));
			cmd_small.opcode = (bp->b_flags & B_READ) ?
			    READ_COMMAND : WRITE_COMMAND;
			_lto3b(blkno, cmd_small.addr);
			cmd_small.length = nblks & 0xff;
			cmdlen = sizeof(cmd_small);
			cmdp = (struct scsi_generic *)&cmd_small;
		} else {
			/*
			 * Need a large cdb.
			 */
			bzero(&cmd_big, sizeof(cmd_big));
			cmd_big.opcode = (bp->b_flags & B_READ) ?
			    READ_BIG : WRITE_BIG;
			_lto4b(blkno, cmd_big.addr);
			_lto2b(nblks, cmd_big.length);
			cmdlen = sizeof(cmd_big);
			cmdp = (struct scsi_generic *)&cmd_big;
		}

		/* Instrumentation. */
		disk_busy(&cd->sc_dk);

		/*
		 * Call the routine that chats with the adapter.
		 * Note: we cannot sleep as we may be an interrupt
		 */
		error = scsi_scsi_cmd(sc_link, cmdp, cmdlen,
		    (u_char *) bp->b_data, bp->b_bcount, SCSI_RETRIES, 30000,
		    bp, SCSI_NOSLEEP | ((bp->b_flags & B_READ) ? SCSI_DATA_IN :
		    SCSI_DATA_OUT));
		switch (error) {
		case 0:
			timeout_del(&cd->sc_timeout);
			break;
		case EAGAIN:
			/*
			 * The device can't start another i/o. Try again later.
			 */
			dp->b_actf = bp;
			disk_unbusy(&cd->sc_dk, 0, 0);
			timeout_add(&cd->sc_timeout, 1);
			return;
		default:
			disk_unbusy(&cd->sc_dk, 0, 0);
			printf("%s: not queued, error %d\n",
			    cd->sc_dev.dv_xname, error);
			break;
		}
	}
}
Ejemplo n.º 26
0
/*
 * Mark I/O complete on a buffer, release it if I/O is asynchronous,
 * and wake up anyone waiting for it.
 */
void
iodone(struct buf *bp)
{
	ASSERT(SEMA_HELD(&bp->b_sem));
	(void) biodone(bp);
}
Ejemplo n.º 27
0
static void
htif_blk_task(void *arg)
{
	struct htif_blk_request req __aligned(HTIF_ALIGN);
	struct htif_blk_softc *sc;
	uint64_t req_paddr;
	struct bio *bp;
	uint64_t paddr;
	uint64_t resp;
	uint64_t cmd;
	int i;

	sc = (struct htif_blk_softc *)arg;

	while (1) {
		HTIF_BLK_LOCK(sc);
		do {
			bp = bioq_takefirst(&sc->bio_queue);
			if (bp == NULL)
				msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0);
		} while (bp == NULL);
		HTIF_BLK_UNLOCK(sc);

		if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
			HTIF_BLK_LOCK(sc);

			rmb();
			req.offset = (bp->bio_pblkno * sc->disk->d_sectorsize);
			req.size = bp->bio_bcount;
			paddr = vtophys(bp->bio_data);
			KASSERT(paddr != 0, ("paddr is 0"));
			req.addr = paddr;
			sc->curtag++;
			req.tag = sc->curtag;

			cmd = sc->index;
			cmd <<= HTIF_DEV_ID_SHIFT;
			if (bp->bio_cmd == BIO_READ)
				cmd |= (HTIF_CMD_READ << HTIF_CMD_SHIFT);
			else
				cmd |= (HTIF_CMD_WRITE << HTIF_CMD_SHIFT);
			req_paddr = vtophys(&req);
			KASSERT(req_paddr != 0, ("req_paddr is 0"));
			cmd |= req_paddr;

			sc->cmd_done = 0;
			resp = htif_command(cmd);
			htif_blk_intr(sc, resp);

			/* Wait for interrupt */
			i = 0;
			while (sc->cmd_done == 0) {
				msleep(&sc->intr_chan, &sc->sc_mtx, PRIBIO, "intr", hz/2);

				if (i++ > 2) {
					/* TODO: try to re-issue operation on timeout ? */
					bp->bio_error = EIO;
					bp->bio_flags |= BIO_ERROR;
					disk_err(bp, "hard error", -1, 1);
					break;
				}
			}
			HTIF_BLK_UNLOCK(sc);

			biodone(bp);
		} else {
			printf("unknown op %d\n", bp->bio_cmd);
		}
	}
}
Ejemplo n.º 28
0
void
sd_buf_done(struct scsi_xfer *xs)
{
	struct sd_softc *sc = xs->sc_link->device_softc;
	struct buf *bp = xs->cookie;
	int error, s;

	switch (xs->error) {
	case XS_NOERROR:
		bp->b_error = 0;
		bp->b_resid = xs->resid;
		break;

	case XS_NO_CCB:
		/* The adapter is busy, requeue the buf and try it later. */
		disk_unbusy(&sc->sc_dk, bp->b_bcount - xs->resid,
		    bp->b_flags & B_READ);
		bufq_requeue(&sc->sc_bufq, bp);
		scsi_xs_put(xs);
		SET(sc->flags, SDF_WAITING);
		timeout_add(&sc->sc_timeout, 1);
		return;

	case XS_SENSE:
	case XS_SHORTSENSE:
#ifdef SCSIDEBUG
		scsi_sense_print_debug(xs);
#endif
		error = sd_interpret_sense(xs);
		if (error == 0) {
			bp->b_error = 0;
			bp->b_resid = xs->resid;
			break;
		}
		if (error != ERESTART) {
			bp->b_error = error;
			xs->retries = 0;
		}
		goto retry;

	case XS_BUSY:
		if (xs->retries) {
			if (scsi_delay(xs, 1) != ERESTART)
				xs->retries = 0;
		}
		goto retry;

	case XS_TIMEOUT:
retry:
		if (xs->retries--) {
			scsi_xs_exec(xs);
			return;
		}
		/* FALLTHROUGH */

	default:
		if (bp->b_error == 0)
			bp->b_error = EIO;
		bp->b_flags |= B_ERROR;
		bp->b_resid = bp->b_bcount;
		break;
	}

	disk_unbusy(&sc->sc_dk, bp->b_bcount - xs->resid,
	    bp->b_flags & B_READ);

	s = splbio();
	biodone(bp);
	splx(s);
	scsi_xs_put(xs);
}
Ejemplo n.º 29
0
Archivo: mcd.c Proyecto: MarginC/kame
static void
mcd_doread(struct mcd_softc *sc, int state, struct mcd_mbx *mbxin)
{
	struct mcd_mbx *mbx;
	struct bio *bp;
	int rm, i, k;
	struct mcd_read2 rbuf;
	int blknum;
	caddr_t	addr;

	mbx = (state!=MCD_S_BEGIN) ? sc->ch_mbxsave : mbxin;
	bp = mbx->bp;

loop:
	switch (state) {
	case MCD_S_BEGIN:
		mbx = sc->ch_mbxsave = mbxin;

	case MCD_S_BEGIN1:
retry_status:
		/* get status */
		MCD_WRITE(sc, MCD_REG_COMMAND, MCD_CMDGETSTAT);
		mbx->count = RDELAY_WAITSTAT;
		sc->ch_state = MCD_S_WAITSTAT;
		sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */
		return;
	case MCD_S_WAITSTAT:
		sc->ch_state = MCD_S_WAITSTAT;
		untimeout(mcd_timeout,(caddr_t)sc, sc->ch);
		if (mbx->count-- >= 0) {
			if (MCD_READ(sc, MCD_FLAGS) & MFL_STATUS_NOT_AVAIL) {
				sc->ch_state = MCD_S_WAITSTAT;
				timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */
				return;
			}
			sc->data.status = MCD_READ(sc, MCD_REG_STATUS) & 0xFF;
			if (sc->data.status & MCD_ST_CMDCHECK)
				goto retry_status;
			if (mcd_setflags(sc) < 0)
				goto changed;
			MCD_TRACE("got WAITSTAT delay=%d\n",
				RDELAY_WAITSTAT-mbx->count);
			/* reject, if audio active */
			if (sc->data.status & MCDAUDIOBSY) {
				device_printf(sc->dev, "audio is active\n");
				goto readerr;
			}

retry_mode:
			/* to check for raw/cooked mode */
			if (sc->data.flags & MCDREADRAW) {
				rm = MCD_MD_RAW;
				mbx->sz = MCDRBLK;
			} else {
				rm = MCD_MD_COOKED;
				mbx->sz = sc->data.blksize;
			}

			if (rm == sc->data.curr_mode)
				goto modedone;

			mbx->count = RDELAY_WAITMODE;

			sc->data.curr_mode = MCD_MD_UNKNOWN;
			mbx->mode = rm;
			MCD_WRITE(sc, MCD_REG_COMMAND, MCD_CMDSETMODE);
			MCD_WRITE(sc, MCD_REG_COMMAND, rm);

			sc->ch_state = MCD_S_WAITMODE;
			sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */
			return;
		} else {
			device_printf(sc->dev, "timeout getstatus\n");
			goto readerr;
		}

	case MCD_S_WAITMODE:
		sc->ch_state = MCD_S_WAITMODE;
		untimeout(mcd_timeout, (caddr_t)sc, sc->ch);
		if (mbx->count-- < 0) {
			device_printf(sc->dev, "timeout set mode\n");
			goto readerr;
		}
		if (MCD_READ(sc, MCD_FLAGS) & MFL_STATUS_NOT_AVAIL) {
			sc->ch_state = MCD_S_WAITMODE;
			sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100);
			return;
		}
		sc->data.status = MCD_READ(sc, MCD_REG_STATUS) & 0xFF;
		if (sc->data.status & MCD_ST_CMDCHECK) {
			sc->data.curr_mode = MCD_MD_UNKNOWN;
			goto retry_mode;
		}
		if (mcd_setflags(sc) < 0)
			goto changed;
		sc->data.curr_mode = mbx->mode;
		MCD_TRACE("got WAITMODE delay=%d\n",
			RDELAY_WAITMODE-mbx->count);
modedone:
		/* for first block */
		mbx->nblk = (bp->bio_bcount + (mbx->sz-1)) / mbx->sz;
		mbx->skip = 0;

nextblock:
		blknum 	= (bp->bio_blkno / (mbx->sz/DEV_BSIZE))
			+ mbx->skip/mbx->sz;

		MCD_TRACE("mcd_doread: read blknum=%d for bp=%p\n",
			blknum, bp);

		/* build parameter block */
		hsg2msf(blknum,rbuf.start_msf);
retry_read:
		/* send the read command */
		critical_enter();
		MCD_WRITE(sc, MCD_REG_COMMAND, sc->data.read_command);
		MCD_WRITE(sc, MCD_REG_COMMAND, rbuf.start_msf[0]);
		MCD_WRITE(sc, MCD_REG_COMMAND, rbuf.start_msf[1]);
		MCD_WRITE(sc, MCD_REG_COMMAND, rbuf.start_msf[2]);
		MCD_WRITE(sc, MCD_REG_COMMAND, 0);
		MCD_WRITE(sc, MCD_REG_COMMAND, 0);
		MCD_WRITE(sc, MCD_REG_COMMAND, 1);
		critical_exit();

		/* Spin briefly (<= 2ms) to avoid missing next block */
		for (i = 0; i < 20; i++) {
			k = MCD_READ(sc, MCD_FLAGS);
			if (!(k & MFL_DATA_NOT_AVAIL))
				goto got_it;
			DELAY(100);
		}

		mbx->count = RDELAY_WAITREAD;
		sc->ch_state = MCD_S_WAITREAD;
		sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */
		return;
	case MCD_S_WAITREAD:
		sc->ch_state = MCD_S_WAITREAD;
		untimeout(mcd_timeout, (caddr_t)sc, sc->ch);
		if (mbx->count-- > 0) {
			k = MCD_READ(sc, MCD_FLAGS);
			if (!(k & MFL_DATA_NOT_AVAIL)) { /* XXX */
				MCD_TRACE("got data delay=%d\n",
					RDELAY_WAITREAD-mbx->count);
			got_it:
				/* data is ready */
				addr	= bp->bio_data + mbx->skip;

				MCD_WRITE(sc, MCD_REG_CTL2,0x04);	/* XXX */
				for (i=0; i<mbx->sz; i++)
					*addr++ = MCD_READ(sc, MCD_REG_RDATA);
				MCD_WRITE(sc, MCD_REG_CTL2,0x0c);	/* XXX */

				k = MCD_READ(sc, MCD_FLAGS);
				/* If we still have some junk, read it too */
				if (!(k & MFL_DATA_NOT_AVAIL)) {
					MCD_WRITE(sc, MCD_REG_CTL2, 0x04);       /* XXX */
					(void)MCD_READ(sc, MCD_REG_RDATA);
					(void)MCD_READ(sc, MCD_REG_RDATA);
					MCD_WRITE(sc, MCD_REG_CTL2, 0x0c);       /* XXX */
				}

				if (--mbx->nblk > 0) {
					mbx->skip += mbx->sz;
					goto nextblock;
				}

				/* return buffer */
				bp->bio_resid = 0;
				biodone(bp);

				sc->data.flags &= ~(MCDMBXBSY|MCDREADRAW);
				mcd_start(sc);
				return;
			}
			if (!(k & MFL_STATUS_NOT_AVAIL)) {
				sc->data.status = MCD_READ(sc, MCD_REG_STATUS) & 0xFF;
				if (sc->data.status & MCD_ST_CMDCHECK)
					goto retry_read;
				if (mcd_setflags(sc) < 0)
					goto changed;
			}
			sc->ch_state = MCD_S_WAITREAD;
			sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */
			return;
		} else {
			device_printf(sc->dev, "timeout read data\n");
			goto readerr;
		}
	}

readerr:
	if (mbx->retry-- > 0) {
		device_printf(sc->dev, "retrying\n");
		state = MCD_S_BEGIN1;
		goto loop;
	}
harderr:
	/* invalidate the buffer */
	bp->bio_flags |= BIO_ERROR;
	bp->bio_resid = bp->bio_bcount;
	biodone(bp);

	sc->data.flags &= ~(MCDMBXBSY|MCDREADRAW);
	mcd_start(sc);
	return;

changed:
	device_printf(sc->dev, "media changed\n");
	goto harderr;

#ifdef NOTDEF
	device_printf(sc->dev, "unit timeout, resetting\n");
	MCD_WRITE(sc, MCD_REG_RESET, MCD_CMDRESET);
	DELAY(300000);
	(void)mcd_getstat(sc, 1);
	(void)mcd_getstat(sc, 1);
	/*sc->data.status &= ~MCDDSKCHNG; */
	sc->data.debug = 1; /* preventive set debug mode */

#endif

}
Ejemplo n.º 30
0
/*
 * define the low-level requests needed to perform
 * a high-level I/O operation for a specific plex
 * 'plexno'.
 *
 * Return 0 if all subdisks involved in the
 * request are up, 1 if some subdisks are not up,
 * and -1 if the request is at least partially
 * outside the bounds of the subdisks.
 *
 * Modify the pointer *diskstart to point to the
 * end address.  On read, return on the first bad
 * subdisk, so that the caller
 * (build_read_request) can try alternatives.
 *
 * On entry to this routine, the prq structures
 * are not assigned.  The assignment is performed
 * by expandrq().  Strictly speaking, the elements
 * rqe->sdno of all entries should be set to -1,
 * since 0 (from bzero) is a valid subdisk number.
 * We avoid this problem by initializing the ones
 * we use, and not looking at the others (index >=
 * prq->requests).
 */
enum requeststatus
bre5(struct request *rq,
    int plexno,
    daddr_t * diskaddr,
    daddr_t diskend)
{
    struct metrics m;					    /* most of the information */
    struct sd *sd;
    struct plex *plex;
    struct buf *bp;					    /* user's bp */
    struct rqgroup *rqg;				    /* the request group that we will create */
    struct rqelement *rqe;				    /* point to this request information */
    int rsectors;					    /* sectors remaining in this stripe */
    int mysdno;						    /* another sd index in loops */
    int rqno;						    /* request number */

    rqg = NULL;						    /* shut up, damn compiler */
    m.diskstart = *diskaddr;				    /* start of transfer */
    bp = rq->bp;					    /* buffer pointer */
    plex = &PLEX[plexno];				    /* point to the plex */


    while (*diskaddr < diskend) {			    /* until we get it all sorted out */
	if (*diskaddr >= plex->length)			    /* beyond the end of the plex */
	    return REQUEST_EOF;				    /* can't continue */

	m.badsdno = -1;					    /* no bad subdisk yet */

	/* Part A: Define the request */
	/*
	 * First, calculate some sizes:
	 * The offset of the start address from
	 * the start of the stripe.
	 */
	m.stripeoffset = *diskaddr % (plex->stripesize * (plex->subdisks - 1));

	/*
	 * The plex-relative address of the
	 * start of the stripe.
	 */
	m.stripebase = *diskaddr - m.stripeoffset;

	/* subdisk containing the parity stripe */
	if (plex->organization == plex_raid5)
	    m.psdno = plex->subdisks - 1
		- (*diskaddr / (plex->stripesize * (plex->subdisks - 1)))
		% plex->subdisks;
	else						    /* RAID-4 */
	    m.psdno = plex->subdisks - 1;

	/*
	 * The number of the subdisk in which
	 * the start is located.
	 */
	m.firstsdno = m.stripeoffset / plex->stripesize;
	if (m.firstsdno >= m.psdno)			    /* at or past parity sd */
	    m.firstsdno++;				    /* increment it */

	/*
	 * The offset from the beginning of
	 * the stripe on this subdisk.
	 */
	m.initoffset = m.stripeoffset % plex->stripesize;

	/* The offset of the stripe start relative to this subdisk */
	m.sdbase = m.stripebase / (plex->subdisks - 1);

	m.useroffset = *diskaddr - m.diskstart;		    /* The offset of the start in the user buffer */

	/*
	 * The number of sectors to transfer in the
	 * current (first) subdisk.
	 */
	m.initlen = min(diskend - *diskaddr,		    /* the amount remaining to transfer */
	    plex->stripesize - m.initoffset);		    /* and the amount left in this block */

	/*
	 * The number of sectors to transfer in this stripe
	 * is the minumum of the amount remaining to transfer
	 * and the amount left in this stripe.
	 */
	m.stripesectors = min(diskend - *diskaddr,
	    plex->stripesize * (plex->subdisks - 1) - m.stripeoffset);

	/* The number of data subdisks involved in this request */
	m.sdcount = (m.stripesectors + m.initoffset + plex->stripesize - 1) / plex->stripesize;

	/* Part B: decide what kind of transfer this will be.

	 * start and end addresses of the transfer in
	 * the current block.
	 *
	 * There are a number of different kinds of
	 * transfer, each of which relates to a
	 * specific subdisk:
	 *
	 * 1. Normal read.  All participating subdisks
	 *    are up, and the transfer can be made
	 *    directly to the user buffer.  The bounds
	 *    of the transfer are described by
	 *    m.dataoffset and m.datalen.  We have
	 *    already calculated m.initoffset and
	 *    m.initlen, which define the parameters
	 *    for the first data block.
	 *
	 * 2. Recovery read.  One participating
	 *    subdisk is down.  To recover data, all
	 *    the other subdisks, including the parity
	 *    subdisk, must be read.  The data is
	 *    recovered by exclusive-oring all the
	 *    other blocks.  The bounds of the
	 *    transfer are described by m.groupoffset
	 *    and m.grouplen.
	 *
	 * 3. A read request may request reading both
	 *    available data (normal read) and
	 *    non-available data (recovery read).
	 *    This can be a problem if the address
	 *    ranges of the two reads do not coincide:
	 *    in this case, the normal read needs to
	 *    be extended to cover the address range
	 *    of the recovery read, and must thus be
	 *    performed out of malloced memory.
	 *
	 * 4. Normal write.  All the participating
	 *    subdisks are up.  The bounds of the
	 *    transfer are described by m.dataoffset
	 *    and m.datalen.  Since these values
	 *    differ for each block, we calculate the
	 *    bounds for the parity block
	 *    independently as the maximum of the
	 *    individual blocks and store these values
	 *    in m.writeoffset and m.writelen.  This
	 *    write proceeds in four phases:
	 *
	 *    i.  Read the old contents of each block
	 *        and the parity block.
	 *    ii.  ``Remove'' the old contents from
	 *         the parity block with exclusive or.
	 *    iii. ``Insert'' the new contents of the
	 *          block in the parity block, again
	 *          with exclusive or.
	 *
	 *    iv.  Write the new contents of the data
	 *         blocks and the parity block.  The data
	 *         block transfers can be made directly from
	 *         the user buffer.
	 *
	 * 5. Degraded write where the data block is
	 *    not available.  The bounds of the
	 *    transfer are described by m.groupoffset
	 *    and m.grouplen. This requires the
	 *    following steps:
	 *
	 *    i.  Read in all the other data blocks,
	 *        excluding the parity block.
	 *
	 *    ii.  Recreate the parity block from the
	 *         other data blocks and the data to be
	 *         written.
	 *
	 *    iii. Write the parity block.
	 *
	 * 6. Parityless write, a write where the
	 *    parity block is not available.  This is
	 *    in fact the simplest: just write the
	 *    data blocks.  This can proceed directly
	 *    from the user buffer.  The bounds of the
	 *    transfer are described by m.dataoffset
	 *    and m.datalen.
	 *
	 * 7. Combination of degraded data block write
	 *    and normal write.  In this case the
	 *    address ranges of the reads may also
	 *    need to be extended to cover all
	 *    participating blocks.
	 *
	 * All requests in a group transfer transfer
	 * the same address range relative to their
	 * subdisk.  The individual transfers may
	 * vary, but since our group of requests is
	 * all in a single slice, we can define a
	 * range in which they all fall.
	 *
	 * In the following code section, we determine
	 * which kind of transfer we will perform.  If
	 * there is a group transfer, we also decide
	 * its bounds relative to the subdisks.  At
	 * the end, we have the following values:
	 *
	 *  m.flags indicates the kinds of transfers
	 *    we will perform.
	 *  m.initoffset indicates the offset of the
	 *    beginning of any data operation relative
	 *    to the beginning of the stripe base.
	 *  m.initlen specifies the length of any data
	 *    operation.
	 *  m.dataoffset contains the same value as
	 *    m.initoffset.
	 *  m.datalen contains the same value as
	 *    m.initlen.  Initially dataoffset and
	 *    datalen describe the parameters for the
	 *    first data block; while building the data
	 *    block requests, they are updated for each
	 *    block.
	 *  m.groupoffset indicates the offset of any
	 *    group operation relative to the beginning
	 *    of the stripe base.
	 *  m.grouplen specifies the length of any
	 *    group operation.
	 *  m.writeoffset indicates the offset of a
	 *    normal write relative to the beginning of
	 *    the stripe base.  This value differs from
	 *    m.dataoffset in that it applies to the
	 *    entire operation, and not just the first
	 *    block.
	 *  m.writelen specifies the total span of a
	 *    normal write operation.  writeoffset and
	 *    writelen are used to define the parity
	 *    block.
	 */
	m.groupoffset = 0;				    /* assume no group... */
	m.grouplen = 0;					    /* until we know we have one */
	m.writeoffset = m.initoffset;			    /* start offset of transfer */
	m.writelen = 0;					    /* nothing to write yet */
	m.flags = 0;					    /* no flags yet */
	rsectors = m.stripesectors;			    /* remaining sectors to examine */
	m.dataoffset = m.initoffset;			    /* start at the beginning of the transfer */
	m.datalen = m.initlen;

	if (m.sdcount > 1) {
	    plex->multiblock++;				    /* more than one block for the request */
	    /*
	     * If we have two transfers that don't overlap,
	     * (one at the end of the first block, the other
	     * at the beginning of the second block),
	     * it's cheaper to split them.
	     */
	    if (rsectors < plex->stripesize) {
		m.sdcount = 1;				    /* just one subdisk */
		m.stripesectors = m.initlen;		    /* and just this many sectors */
		rsectors = m.initlen;			    /* and in the loop counter */
	    }
	}
	if (SD[plex->sdnos[m.psdno]].state < sd_reborn)	    /* is our parity subdisk down? */
	    m.badsdno = m.psdno;			    /* note that it's down */
	if (bp->b_flags & B_READ) {			    /* read operation */
	    for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
		if (mysdno == m.psdno)			    /* ignore parity on read */
		    mysdno++;
		if (mysdno == plex->subdisks)		    /* wraparound */
		    mysdno = 0;
		if (mysdno == m.psdno)			    /* parity, */
		    mysdno++;				    /* we've given already */

		if (SD[plex->sdnos[mysdno]].state < sd_reborn) { /* got a bad subdisk, */
		    if (m.badsdno >= 0)			    /* we had one already, */
			return REQUEST_DOWN;		    /* we can't take a second */
		    m.badsdno = mysdno;			    /* got the first */
		    m.groupoffset = m.dataoffset;	    /* define the bounds */
		    m.grouplen = m.datalen;
		    m.flags |= XFR_RECOVERY_READ;	    /* we need recovery */
		    plex->recovered_reads++;		    /* count another one */
		} else
		    m.flags |= XFR_NORMAL_READ;		    /* normal read */

		/* Update the pointers for the next block */
		m.dataoffset = 0;			    /* back to the start of the stripe */
		rsectors -= m.datalen;			    /* remaining sectors to examine */
		m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
	    }
	} else {					    /* write operation */
	    for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
		if (mysdno == m.psdno)			    /* parity stripe, we've dealt with that */
		    mysdno++;
		if (mysdno == plex->subdisks)		    /* wraparound */
		    mysdno = 0;
		if (mysdno == m.psdno)			    /* parity, */
		    mysdno++;				    /* we've given already */

		sd = &SD[plex->sdnos[mysdno]];
		if (sd->state != sd_up) {
		    enum requeststatus s;

		    s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
		    if (s && (m.badsdno >= 0)) {	    /* second bad disk, */
			int sdno;
			/*
			 * If the parity disk is down, there's
			 * no recovery.  We make all involved
			 * subdisks stale.  Otherwise, we
			 * should be able to recover, but it's
			 * like pulling teeth.  Fix it later.
			 */
			for (sdno = 0; sdno < m.sdcount; sdno++) {
			    struct sd *sd = &SD[plex->sdnos[sdno]];
			    if (sd->state >= sd_reborn)	    /* sort of up, */
				set_sd_state(sd->sdno, sd_stale, setstate_force); /* make it stale */
			}
			return s;			    /* and crap out */
		    }
		    m.badsdno = mysdno;			    /* note which one is bad */
		    m.flags |= XFR_DEGRADED_WRITE;	    /* we need recovery */
		    plex->degraded_writes++;		    /* count another one */
		    m.groupoffset = m.dataoffset;	    /* define the bounds */
		    m.grouplen = m.datalen;
		} else {
		    m.flags |= XFR_NORMAL_WRITE;	    /* normal write operation */
		    if (m.writeoffset > m.dataoffset) {	    /* move write operation lower */
			m.writelen = max(m.writeoffset + m.writelen,
			    m.dataoffset + m.datalen)
			    - m.dataoffset;
			m.writeoffset = m.dataoffset;
		    } else
			m.writelen = max(m.writeoffset + m.writelen,
			    m.dataoffset + m.datalen)
			    - m.writeoffset;
		}

		/* Update the pointers for the next block */
		m.dataoffset = 0;			    /* back to the start of the stripe */
		rsectors -= m.datalen;			    /* remaining sectors to examine */
		m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
	    }
	    if (m.badsdno == m.psdno) {			    /* got a bad parity block, */
		struct sd *psd = &SD[plex->sdnos[m.psdno]];

		if (psd->state == sd_down)
		    set_sd_state(psd->sdno, sd_obsolete, setstate_force); /* it's obsolete now */
		else if (psd->state == sd_crashed)
		    set_sd_state(psd->sdno, sd_stale, setstate_force); /* it's stale now */
		m.flags &= ~XFR_NORMAL_WRITE;		    /* this write isn't normal, */
		m.flags |= XFR_PARITYLESS_WRITE;	    /* it's parityless */
		plex->parityless_writes++;		    /* count another one */
	    }
	}

	/* reset the initial transfer values */
	m.dataoffset = m.initoffset;			    /* start at the beginning of the transfer */
	m.datalen = m.initlen;

	/* decide how many requests we need */
	if (m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))
	    /* doing a recovery read or degraded write, */
	    m.rqcount = plex->subdisks;			    /* all subdisks */
	else if (m.flags & XFR_NORMAL_WRITE)		    /* normal write, */
	    m.rqcount = m.sdcount + 1;			    /* all data blocks and the parity block */
	else						    /* parityless write or normal read */
	    m.rqcount = m.sdcount;			    /* just the data blocks */

	/* Part C: build the requests */
	rqg = allocrqg(rq, m.rqcount);			    /* get a request group */
	if (rqg == NULL) {				    /* malloc failed */
	    bp->b_flags |= B_ERROR;
	    bp->b_error = ENOMEM;
	    biodone(bp);
	    return REQUEST_ENOMEM;
	}
	rqg->plexno = plexno;
	rqg->flags = m.flags;
	rqno = 0;					    /* index in the request group */

	/* 1: PARITY BLOCK */
	/*
	 * Are we performing an operation which requires parity?  In that case,
	 * work out the parameters and define the parity block.
	 * XFR_PARITYOP is XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE
	 */
	if (m.flags & XFR_PARITYOP) {			    /* need parity */
	    rqe = &rqg->rqe[rqno];			    /* point to element */
	    sd = &SD[plex->sdnos[m.psdno]];		    /* the subdisk in question */
	    rqe->rqg = rqg;				    /* point back to group */
	    rqe->flags = (m.flags | XFR_PARITY_BLOCK | XFR_MALLOCED) /* always malloc parity block */
	    &~(XFR_NORMAL_READ | XFR_PARITYLESS_WRITE);	    /* transfer flags without data op stuf */
	    setrqebounds(rqe, &m);			    /* set up the bounds of the transfer */
	    rqe->sdno = sd->sdno;			    /* subdisk number */
	    rqe->driveno = sd->driveno;
	    if (build_rq_buffer(rqe, plex))		    /* build the buffer */
		return REQUEST_ENOMEM;			    /* can't do it */
	    rqe->b.b_flags |= B_READ;			    /* we must read first */
	    m.sdcount++;				    /* adjust the subdisk count */
	    rqno++;					    /* and point to the next request */
	}
	/*
	 * 2: DATA BLOCKS
	 * Now build up requests for the blocks required
	 * for individual transfers
	 */
	for (mysdno = m.firstsdno; rqno < m.sdcount; mysdno++, rqno++) {
	    if (mysdno == m.psdno)			    /* parity, */
		mysdno++;				    /* we've given already */
	    if (mysdno == plex->subdisks)		    /* got to the end, */
		mysdno = 0;				    /* wrap around */
	    if (mysdno == m.psdno)			    /* parity, */
		mysdno++;				    /* we've given already */

	    rqe = &rqg->rqe[rqno];			    /* point to element */
	    sd = &SD[plex->sdnos[mysdno]];		    /* the subdisk in question */
	    rqe->rqg = rqg;				    /* point to group */
	    if (m.flags & XFR_NEEDS_MALLOC)		    /* we need a malloced buffer first */
		rqe->flags = m.flags | XFR_DATA_BLOCK | XFR_MALLOCED; /* transfer flags */
	    else
		rqe->flags = m.flags | XFR_DATA_BLOCK;	    /* transfer flags */
	    if (mysdno == m.badsdno) {			    /* this is the bad subdisk */
		rqg->badsdno = rqno;			    /* note which one */
		rqe->flags |= XFR_BAD_SUBDISK;		    /* note that it's dead */
		/*
		 * we can't read or write from/to it,
		 * but we don't need to malloc
		 */
		rqe->flags &= ~(XFR_MALLOCED | XFR_NORMAL_READ | XFR_NORMAL_WRITE);
	    }
	    setrqebounds(rqe, &m);			    /* set up the bounds of the transfer */
	    rqe->useroffset = m.useroffset;		    /* offset in user buffer */
	    rqe->sdno = sd->sdno;			    /* subdisk number */
	    rqe->driveno = sd->driveno;
	    if (build_rq_buffer(rqe, plex))		    /* build the buffer */
		return REQUEST_ENOMEM;			    /* can't do it */
	    if ((m.flags & XFR_PARITYOP)		    /* parity operation, */
	    &&((m.flags & XFR_BAD_SUBDISK) == 0))	    /* and not the bad subdisk, */
		rqe->b.b_flags |= B_READ;		    /* we must read first */

	    /* Now update pointers for the next block */
	    *diskaddr += m.datalen;			    /* skip past what we've done */
	    m.stripesectors -= m.datalen;		    /* deduct from what's left */
	    m.useroffset += m.datalen;			    /* and move on in the user buffer */
	    m.datalen = min(m.stripesectors, plex->stripesize);	/* and recalculate */
	    m.dataoffset = 0;				    /* start at the beginning of next block */
	}

	/*
	 * 3: REMAINING BLOCKS FOR RECOVERY
	 * Finally, if we have a recovery operation, build
	 * up transfers for the other subdisks.  Follow the
	 * subdisks around until we get to where we started.
	 * These requests use only the group parameters.
	 */
	if ((rqno < m.rqcount)				    /* haven't done them all already */
	&&(m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))) {
	    for (; rqno < m.rqcount; rqno++, mysdno++) {
		if (mysdno == m.psdno)			    /* parity, */
		    mysdno++;				    /* we've given already */
		if (mysdno == plex->subdisks)		    /* got to the end, */
		    mysdno = 0;				    /* wrap around */
		if (mysdno == m.psdno)			    /* parity, */
		    mysdno++;				    /* we've given already */

		rqe = &rqg->rqe[rqno];			    /* point to element */
		sd = &SD[plex->sdnos[mysdno]];		    /* the subdisk in question */
		rqe->rqg = rqg;				    /* point to group */

		rqe->sdoffset = m.sdbase + m.groupoffset;   /* start of transfer */
		rqe->dataoffset = 0;			    /* for tidiness' sake */
		rqe->groupoffset = 0;			    /* group starts at the beginining */
		rqe->datalen = 0;
		rqe->grouplen = m.grouplen;
		rqe->buflen = m.grouplen;
		rqe->flags = (m.flags | XFR_MALLOCED)	    /* transfer flags without data op stuf */
		&~XFR_DATAOP;
		rqe->sdno = sd->sdno;			    /* subdisk number */
		rqe->driveno = sd->driveno;
		if (build_rq_buffer(rqe, plex))		    /* build the buffer */
		    return REQUEST_ENOMEM;		    /* can't do it */
		rqe->b.b_flags |= B_READ;		    /* we must read first */
	    }
	}
	/*
	 * We need to lock the address range before
	 * doing anything.  We don't have to be
	 * performing a recovery operation: somebody
	 * else could be doing so, and the results could
	 * influence us.  Note the fact here, we'll perform
	 * the lock in launch_requests.
	 */
	rqg->lockbase = m.stripebase;
	if (*diskaddr < diskend)			    /* didn't finish the request on this stripe */
	    plex->multistripe++;			    /* count another one */
    }
    return REQUEST_OK;
}