static int nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp) { int err; bp->bio_driver1 = NULL; atomic_add_int(&ndisk->cur_depth, 1); err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done); if (err) { atomic_add_int(&ndisk->cur_depth, -1); if (__predict_false(bp->bio_flags & BIO_ORDERED)) atomic_add_int(&ndisk->ordered_in_flight, -1); bp->bio_error = err; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); return (-1); } return (0); }
/* Main flash handling task. */ static void opalflash_task(void *arg) { struct opalflash_softc *sc; struct bio *bp; device_t dev; sc = arg; for (;;) { dev = sc->sc_dev; OPALFLASH_LOCK(sc); do { bp = bioq_first(&sc->sc_bio_queue); if (bp == NULL) msleep(sc, &sc->sc_mtx, PRIBIO, "opalflash", 0); } while (bp == NULL); bioq_remove(&sc->sc_bio_queue, bp); OPALFLASH_UNLOCK(sc); switch (bp->bio_cmd) { case BIO_DELETE: bp->bio_error = opalflash_erase(sc, bp->bio_offset, bp->bio_bcount); break; case BIO_READ: bp->bio_error = opalflash_read(sc, bp->bio_offset, bp->bio_data, bp->bio_bcount); break; case BIO_WRITE: bp->bio_error = opalflash_write(sc, bp->bio_offset, bp->bio_data, bp->bio_bcount); break; default: bp->bio_error = EINVAL; } biodone(bp); } }
void puffs_parkdone_asyncbiowrite(struct puffs_mount *pmp, struct puffs_req *preq, void *arg) { struct puffs_vnmsg_write *write_msg = (void *)preq; struct buf *bp = arg; DPRINTF(("%s\n", __func__)); bp->b_error = checkerr(pmp, preq->preq_rv, __func__); if (bp->b_error == 0) { if (write_msg->pvnr_resid > bp->b_bcount) { puffs_senderr(pmp, PUFFS_ERR_WRITE, E2BIG, "resid grew", preq->preq_cookie); bp->b_error = E2BIG; } else { bp->b_resid = write_msg->pvnr_resid; } } biodone(bp); }
/* * Memory file system I/O. * * Trivial on the HP since buffer has already been mapped into KVA space. */ void mfs_doio(struct mfsnode *mfsp, struct buf *bp) { caddr_t base; long offset = bp->b_blkno << DEV_BSHIFT; int s; if (bp->b_bcount > mfsp->mfs_size - offset) bp->b_bcount = mfsp->mfs_size - offset; base = mfsp->mfs_baseoff + offset; if (bp->b_flags & B_READ) bp->b_error = copyin(base, bp->b_data, bp->b_bcount); else bp->b_error = copyout(bp->b_data, base, bp->b_bcount); if (bp->b_error) bp->b_flags |= B_ERROR; else bp->b_resid = 0; s = splbio(); biodone(bp); splx(s); }
static void dk_done1(struct dk_softc *dksc, struct buf *bp, bool lock) { struct disk *dk = &dksc->sc_dkdev; if (bp->b_error != 0) { struct cfdriver *cd = device_cfdriver(dksc->sc_dev); diskerr(bp, cd->cd_name, "error", LOG_PRINTF, 0, dk->dk_label); printf("\n"); } if (lock) mutex_enter(&dksc->sc_iolock); disk_unbusy(dk, bp->b_bcount - bp->b_resid, (bp->b_flags & B_READ)); if (lock) mutex_exit(&dksc->sc_iolock); rnd_add_uint32(&dksc->sc_rnd_source, bp->b_rawblkno); biodone(bp); }
static void destroy_geom_disk(struct nvd_disk *ndisk) { struct bio *bp; taskqueue_free(ndisk->tq); disk_destroy(ndisk->disk); mtx_lock(&ndisk->bioqlock); for (;;) { bp = bioq_takefirst(&ndisk->bioq); if (bp == NULL) break; bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); } mtx_unlock(&ndisk->bioqlock); mtx_destroy(&ndisk->bioqlock); }
/* I/O on subdisk completed */ void sdio_done(struct bio *bio) { struct sdbuf *sbp; get_mplock(); sbp = (struct sdbuf *) bio->bio_buf; if (sbp->b.b_flags & B_ERROR) { /* had an error */ sbp->bio->bio_buf->b_flags |= B_ERROR; /* propagate upwards */ sbp->bio->bio_buf->b_error = sbp->b.b_error; } #ifdef VINUMDEBUG if (debug & DEBUG_LASTREQS) logrq(loginfo_sdiodone, (union rqinfou)bio, bio); #endif sbp->bio->bio_buf->b_resid = sbp->b.b_resid; /* copy the resid field */ /* Now update the statistics */ if (sbp->b.b_cmd == BUF_CMD_READ) { /* read operation */ DRIVE[sbp->driveno].reads++; DRIVE[sbp->driveno].bytes_read += sbp->b.b_bcount; SD[sbp->sdno].reads++; SD[sbp->sdno].bytes_read += sbp->b.b_bcount; } else { /* write operation */ DRIVE[sbp->driveno].writes++; DRIVE[sbp->driveno].bytes_written += sbp->b.b_bcount; SD[sbp->sdno].writes++; SD[sbp->sdno].bytes_written += sbp->b.b_bcount; } biodone_sync(bio); biodone(sbp->bio); /* complete the caller's I/O */ BUF_UNLOCK(&sbp->b); uninitbufbio(&sbp->b); Free(sbp); rel_mplock(); }
static void mfi_disk_strategy(struct bio *bio) { struct mfi_disk *sc; struct mfi_softc *controller; sc = bio->bio_disk->d_drv1; if (sc == NULL) { bio->bio_error = EINVAL; bio->bio_flags |= BIO_ERROR; bio->bio_resid = bio->bio_bcount; biodone(bio); return; } controller = sc->ld_controller; bio->bio_driver1 = (void *)(uintptr_t)sc->ld_id; mtx_lock(&controller->mfi_io_lock); mfi_enqueue_bio(controller, bio); mfi_startio(controller); mtx_unlock(&controller->mfi_io_lock); return; }
void rdstrategy(struct buf *bp) { struct rdsoftc *rd; struct hdcsoftc *sc; struct disklabel *lp; int s; if ((rd = device_lookup_private(&rd_cd, DISKUNIT(bp->b_dev))) == NULL) { bp->b_error = ENXIO; goto done; } sc = rd->sc_hdc; lp = rd->sc_disk.dk_label; if ((bounds_check_with_label(&rd->sc_disk, bp, 1)) <= 0) goto done; if (bp->b_bcount == 0) goto done; bp->b_rawblkno = bp->b_blkno + lp->d_partitions[DISKPART(bp->b_dev)].p_offset; bp->b_cylinder = bp->b_rawblkno / lp->d_secpercyl; s = splbio(); BUFQ_PUT(sc->sc_q, bp); if (inq == 0) { inq = 1; vsbus_dma_start(&sc->sc_vd); } splx(s); return; done: biodone(bp); }
/* * Read/write routine for a buffer. Finds the proper unit, range checks * arguments, and schedules the transfer. Does not wait for the transfer * to complete. Multi-page transfers are supported. All I/O requests must * be a multiple of a sector in length. */ static void idad_strategy(struct bio *bp) { struct idad_softc *drv; int s; drv = bp->bio_disk->d_drv1; if (drv == NULL) { bp->bio_error = EINVAL; goto bad; } /* * software write protect check */ if (drv->flags & DRV_WRITEPROT && (bp->bio_cmd == BIO_WRITE)) { bp->bio_error = EROFS; goto bad; } bp->bio_driver1 = drv; s = splbio(); ida_submit_buf(drv->controller, bp); splx(s); return; bad: bp->bio_flags |= BIO_ERROR; /* * Correctly set the buf to indicate a completed transfer */ bp->bio_resid = bp->bio_bcount; biodone(bp); return; }
void destroy_geom_disk(struct nand_chip *chip) { struct bio *bp; taskqueue_free(chip->tq); disk_destroy(chip->ndisk); disk_destroy(chip->rdisk); mtx_lock(&chip->qlock); for (;;) { bp = bioq_takefirst(&chip->bioq); if (bp == NULL) break; bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); } mtx_unlock(&chip->qlock); mtx_destroy(&chip->qlock); }
void fss_strategy(struct buf *bp) { const bool write = ((bp->b_flags & B_READ) != B_READ); struct fss_softc *sc = device_lookup_private(&fss_cd, minor(bp->b_dev)); mutex_enter(&sc->sc_slock); if (write || !FSS_ISVALID(sc)) { mutex_exit(&sc->sc_slock); bp->b_error = (write ? EROFS : ENXIO); bp->b_resid = bp->b_bcount; biodone(bp); return; } bp->b_rawblkno = bp->b_blkno; bufq_put(sc->sc_bufq, bp); cv_signal(&sc->sc_work_cv); mutex_exit(&sc->sc_slock); }
/* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. */ int ntfs_strategy(void *v) { struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap = v; struct buf *bp = ap->a_bp; struct vnode *vp = ap->a_vp; struct fnode *fp = VTOF(vp); struct ntnode *ip = FTONT(fp); struct ntfsmount *ntmp = ip->i_mp; int error; dprintf(("ntfs_strategy: blkno: %d, lblkno: %d\n", (u_int32_t)bp->b_blkno, (u_int32_t)bp->b_lblkno)); dprintf(("strategy: bcount: %u flags: 0x%x\n", (u_int32_t)bp->b_bcount,bp->b_flags)); if (bp->b_flags & B_READ) { u_int32_t toread; if (ntfs_cntob(bp->b_blkno) >= fp->f_size) { clrbuf(bp); error = 0; } else { toread = MIN(bp->b_bcount, fp->f_size - ntfs_cntob(bp->b_blkno)); dprintf(("ntfs_strategy: toread: %d, fsize: %d\n", toread,(u_int32_t)fp->f_size)); error = ntfs_readattr(ntmp, ip, fp->f_attrtype, fp->f_attrname, ntfs_cntob(bp->b_blkno), toread, bp->b_data, NULL); if (error) { printf("ntfs_strategy: ntfs_readattr failed\n"); bp->b_error = error; } memset((char *)bp->b_data + toread, 0, bp->b_bcount - toread); } } else { size_t tmp; u_int32_t towrite; if (ntfs_cntob(bp->b_blkno) + bp->b_bcount >= fp->f_size) { printf("ntfs_strategy: CAN'T EXTEND FILE\n"); bp->b_error = error = EFBIG; } else { towrite = MIN(bp->b_bcount, fp->f_size - ntfs_cntob(bp->b_blkno)); dprintf(("ntfs_strategy: towrite: %d, fsize: %d\n", towrite,(u_int32_t)fp->f_size)); error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype, fp->f_attrname, ntfs_cntob(bp->b_blkno),towrite, bp->b_data, &tmp, NULL); if (error) { printf("ntfs_strategy: ntfs_writeattr fail\n"); bp->b_error = error; } } } biodone(bp); return (error); }
/* * Strategy routine called from dm_strategy. */ static int dm_target_stripe_strategy(dm_table_entry_t *table_en, struct buf *bp) { dm_target_stripe_config_t *tsc; struct bio *bio = &bp->b_bio1; struct buf *nestbuf; uint64_t blkno, blkoff; uint64_t stripe, blknr; uint32_t stripe_off, stripe_rest, num_blks, issue_blks; int devnr; tsc = table_en->target_config; if (tsc == NULL) return 0; /* calculate extent of request */ KKASSERT(bp->b_resid % DEV_BSIZE == 0); switch(bp->b_cmd) { case BUF_CMD_READ: case BUF_CMD_WRITE: case BUF_CMD_FREEBLKS: /* * Loop through to individual operations */ blkno = bp->b_bio1.bio_offset / DEV_BSIZE; blkoff = 0; num_blks = bp->b_resid / DEV_BSIZE; nestiobuf_init(bio); while (num_blks > 0) { /* blockno to strip piece nr */ stripe = blkno / tsc->stripe_chunksize; stripe_off = blkno % tsc->stripe_chunksize; /* where we are inside the strip */ devnr = stripe % tsc->stripe_num; blknr = stripe / tsc->stripe_num; /* how much is left before we hit a boundary */ stripe_rest = tsc->stripe_chunksize - stripe_off; /* issue this piece on stripe `stripe' */ issue_blks = MIN(stripe_rest, num_blks); nestbuf = getpbuf(NULL); nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; nestiobuf_add(bio, nestbuf, blkoff, issue_blks * DEV_BSIZE, NULL); /* I need number of bytes. */ nestbuf->b_bio1.bio_offset = blknr * tsc->stripe_chunksize + stripe_off; nestbuf->b_bio1.bio_offset += tsc->stripe_devs[devnr].offset; nestbuf->b_bio1.bio_offset *= DEV_BSIZE; vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode, &nestbuf->b_bio1); blkno += issue_blks; blkoff += issue_blks * DEV_BSIZE; num_blks -= issue_blks; } nestiobuf_start(bio); break; case BUF_CMD_FLUSH: nestiobuf_init(bio); for (devnr = 0; devnr < tsc->stripe_num; ++devnr) { nestbuf = getpbuf(NULL); nestbuf->b_flags |= bio->bio_buf->b_flags & B_HASBOGUS; nestiobuf_add(bio, nestbuf, 0, 0, NULL); nestbuf->b_bio1.bio_offset = 0; vn_strategy(tsc->stripe_devs[devnr].pdev->pdev_vnode, &nestbuf->b_bio1); } nestiobuf_start(bio); break; default: bp->b_flags |= B_ERROR; bp->b_error = EIO; biodone(bio); break; } return 0; }
/* Pseudo strategy function * Called by scsipi_do_ioctl() via physio/physstrat if there is to * be data transfered, and directly if there is no data transfer. * * Should I reorganize this so it returns to physio instead * of sleeping in scsiio_scsipi_cmd? Is there any advantage, other * than avoiding the probable duplicate wakeup in iodone? [PD] * * No, seems ok to me... [JRE] * (I don't see any duplicate wakeups) * * Can't be used with block devices or raw_read/raw_write directly * from the cdevsw/bdevsw tables because they couldn't have added * the screq structure. [JRE] */ static void scsistrategy(struct buf *bp) { struct scsi_ioctl *si; scsireq_t *screq; struct scsipi_periph *periph; int error; int flags = 0; si = si_find(bp); if (si == NULL) { printf("scsistrategy: " "No matching ioctl request found in queue\n"); error = EINVAL; goto done; } screq = &si->si_screq; periph = si->si_periph; SC_DEBUG(periph, SCSIPI_DB2, ("user_strategy\n")); /* * We're in trouble if physio tried to break up the transfer. */ if (bp->b_bcount != screq->datalen) { scsipi_printaddr(periph); printf("physio split the request.. cannot proceed\n"); error = EIO; goto done; } if (screq->timeout == 0) { error = EINVAL; goto done; } if (screq->cmdlen > sizeof(struct scsipi_generic)) { scsipi_printaddr(periph); printf("cmdlen too big\n"); error = EFAULT; goto done; } if ((screq->flags & SCCMD_READ) && screq->datalen > 0) flags |= XS_CTL_DATA_IN; if ((screq->flags & SCCMD_WRITE) && screq->datalen > 0) flags |= XS_CTL_DATA_OUT; if (screq->flags & SCCMD_TARGET) flags |= XS_CTL_TARGET; if (screq->flags & SCCMD_ESCAPE) flags |= XS_CTL_ESCAPE; error = scsipi_command(periph, (void *)screq->cmd, screq->cmdlen, (void *)bp->b_data, screq->datalen, 0, /* user must do the retries *//* ignored */ screq->timeout, bp, flags | XS_CTL_USERCMD); done: if (error) bp->b_resid = bp->b_bcount; bp->b_error = error; biodone(bp); return; }
static void isf_task(void *arg) { struct isf_softc *sc = arg; struct bio *bp; int ss = sc->isf_disk->d_sectorsize; int error, i; for (;;) { ISF_LOCK(sc); do { bp = bioq_first(&sc->isf_bioq); if (bp == NULL) { if (sc->isf_doomed) kproc_exit(0); else ISF_SLEEP(sc, sc, 0); } } while (bp == NULL); bioq_remove(&sc->isf_bioq, bp); error = 0; switch (bp->bio_cmd) { case BIO_READ: isf_read(sc, bp->bio_pblkno * ss, bp->bio_data, bp->bio_bcount); break; case BIO_WRITE: /* * In principle one could suspend the in-progress * erase, process any pending writes to other * blocks and then proceed, but that seems * overly complex for the likely usage modes. */ if (sc->isf_erasing) { error = EBUSY; break; } /* * Read in the block we want to write and check that * we're only setting bits to 0. If an erase would * be required return an I/O error. */ isf_read(sc, bp->bio_pblkno * ss, sc->isf_rbuf, bp->bio_bcount); for (i = 0; i < bp->bio_bcount / 2; i++) if ((sc->isf_rbuf[i] & ((uint16_t *)bp->bio_data)[i]) != ((uint16_t *)bp->bio_data)[i]) { device_printf(sc->isf_dev, "write" " requires erase at 0x%08jx\n", bp->bio_pblkno * ss); error = EIO; break; } if (error != 0) break; error = isf_write(sc, bp->bio_pblkno * ss, bp->bio_data, bp->bio_bcount); break; default: panic("%s: unsupported I/O operation %d", __func__, bp->bio_cmd); } if (error == 0) biodone(bp); else biofinish(bp, NULL, error); ISF_UNLOCK(sc); } }
/* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. */ int ufs_strategy(void *v) { struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap = v; struct buf *bp; struct vnode *vp; struct inode *ip; struct mount *mp; int error; bp = ap->a_bp; vp = ap->a_vp; ip = VTOI(vp); if (vp->v_type == VBLK || vp->v_type == VCHR) panic("ufs_strategy: spec"); KASSERT(bp->b_bcount != 0); if (bp->b_blkno == bp->b_lblkno) { error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL); if (error) { bp->b_error = error; biodone(bp); return (error); } if (bp->b_blkno == -1) /* no valid data */ clrbuf(bp); } if (bp->b_blkno < 0) { /* block is not on disk */ biodone(bp); return (0); } vp = ip->i_devvp; error = VOP_STRATEGY(vp, bp); if (error) return error; if (!BUF_ISREAD(bp)) return 0; mp = wapbl_vptomp(vp); if (mp == NULL || mp->mnt_wapbl_replay == NULL || !WAPBL_REPLAY_ISOPEN(mp) || !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount)) return 0; error = biowait(bp); if (error) return error; error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount); if (error) { mutex_enter(&bufcache_lock); SET(bp->b_cflags, BC_INVAL); mutex_exit(&bufcache_lock); } return error; }
static void ptdone(struct cam_periph *periph, union ccb *done_ccb) { struct pt_softc *softc; struct ccb_scsiio *csio; softc = (struct pt_softc *)periph->softc; csio = &done_ccb->csio; switch (csio->ccb_h.ccb_state) { case PT_CCB_BUFFER_IO: case PT_CCB_BUFFER_IO_UA: { struct buf *bp; struct bio *bio; bio = (struct bio *)done_ccb->ccb_h.ccb_bio; bp = bio->bio_buf; if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { int error; int sf; if ((csio->ccb_h.ccb_state & PT_CCB_RETRY_UA) != 0) sf = SF_RETRY_UA; else sf = 0; error = pterror(done_ccb, CAM_RETRY_SELTO, sf); if (error == ERESTART) { /* * A retry was scheuled, so * just return. */ return; } if (error != 0) { struct buf *q_bp; struct bio *q_bio; if (error == ENXIO) { /* * Catastrophic error. Mark our device * as invalid. */ xpt_print(periph->path, "Invalidating device\n"); softc->flags |= PT_FLAG_DEVICE_INVALID; } /* * return all queued I/O with EIO, so that * the client can retry these I/Os in the * proper order should it attempt to recover. */ while ((q_bio = bioq_takefirst(&softc->bio_queue)) != NULL) { q_bp = q_bio->bio_buf; q_bp->b_resid = q_bp->b_bcount; q_bp->b_error = EIO; q_bp->b_flags |= B_ERROR; biodone(q_bio); } bp->b_error = error; bp->b_resid = bp->b_bcount; bp->b_flags |= B_ERROR; } else { bp->b_resid = csio->resid; bp->b_error = 0; if (bp->b_resid != 0) { /* Short transfer ??? */ bp->b_flags |= B_ERROR; } } if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) cam_release_devq(done_ccb->ccb_h.path, /*relsim_flags*/0, /*reduction*/0, /*timeout*/0, /*getcount_only*/0); } else { bp->b_resid = csio->resid; if (bp->b_resid != 0) bp->b_flags |= B_ERROR; } /* * Block out any asyncronous callbacks * while we touch the pending ccb list. */ LIST_REMOVE(&done_ccb->ccb_h, periph_links.le); devstat_end_transaction_buf(&softc->device_stats, bp); biodone(bio); break; } case PT_CCB_WAITING: /* Caller will release the CCB */ wakeup(&done_ccb->ccb_h.cbfcnp); return; } xpt_release_ccb(done_ccb); }
void bmdstrategy(struct buf *bp) { int unit = BMD_UNIT(bp->b_dev); struct bmd_softc *sc; int offset, disksize, resid; int page, pg_offset, pg_resid; void *data; if (unit >= bmd_cd.cd_ndevs) { bp->b_error = ENXIO; goto done; } sc = device_lookup_private(&bmd_cd, BMD_UNIT(bp->b_dev)); if (sc == NULL) { bp->b_error = ENXIO; goto done; } DPRINTF(("bmdstrategy: %s blkno %d bcount %ld:", (bp->b_flags & B_READ) ? "read " : "write", bp->b_blkno, bp->b_bcount)); bp->b_resid = bp->b_bcount; offset = (bp->b_blkno << DEV_BSHIFT); disksize = sc->sc_maxpage * BMD_PAGESIZE; if (offset >= disksize) { /* EOF if read, EIO if write */ if (bp->b_flags & B_READ) goto done; bp->b_error = EIO; goto done; } resid = bp->b_resid; if (resid > disksize - offset) resid = disksize - offset; data = bp->b_data; do { page = offset / BMD_PAGESIZE; pg_offset = offset % BMD_PAGESIZE; /* length */ pg_resid = MIN(resid, BMD_PAGESIZE - pg_offset); /* switch bank page */ bus_space_write_1(sc->sc_iot, sc->sc_ioh, BMD_PAGE, page); /* XXX we should use DMA transfer? */ if ((bp->b_flags & B_READ)) { bus_space_read_region_1(sc->sc_iot, sc->sc_bank, pg_offset, data, pg_resid); } else { bus_space_write_region_1(sc->sc_iot, sc->sc_bank, pg_offset, data, pg_resid); } data = (char *)data + pg_resid; offset += pg_resid; resid -= pg_resid; bp->b_resid -= pg_resid; } while (resid > 0); DPRINTF(("\n")); done: biodone(bp); }
static void mcdstrategy(struct bio *bp) { struct mcd_softc *sc; int s; sc = (struct mcd_softc *)bp->bio_dev->si_drv1; /* test validity */ /*MCD_TRACE("strategy: buf=0x%lx, unit=%ld, block#=%ld bcount=%ld\n", bp,unit,bp->bio_blkno,bp->bio_bcount);*/ if (bp->bio_blkno < 0) { device_printf(sc->dev, "strategy failure: blkno = %ld, bcount = %ld\n", (long)bp->bio_blkno, bp->bio_bcount); bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; goto bad; } /* if device invalidated (e.g. media change, door open), error */ if (!(sc->data.flags & MCDVALID)) { device_printf(sc->dev, "media changed\n"); bp->bio_error = EIO; goto bad; } /* read only */ if (!(bp->bio_cmd == BIO_READ)) { bp->bio_error = EROFS; goto bad; } /* no data to read */ if (bp->bio_bcount == 0) goto done; if (!(sc->data.flags & MCDTOC)) { bp->bio_error = EIO; goto bad; } bp->bio_pblkno = bp->bio_blkno; bp->bio_resid = 0; /* queue it */ s = splbio(); bioqdisksort(&sc->data.head, bp); splx(s); /* now check whether we can perform processing */ mcd_start(sc); return; bad: bp->bio_flags |= BIO_ERROR; done: bp->bio_resid = bp->bio_bcount; biodone(bp); return; }
int zvol_strategy(buf_t *bp) { zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); uint64_t off, volsize; size_t size, resid; char *addr; objset_t *os; int error = 0; int sync; int reading; int txg_sync_needed = B_FALSE; if (zv == NULL) { bioerror(bp, ENXIO); biodone(bp); return (0); } if (getminor(bp->b_edev) == 0) { bioerror(bp, EINVAL); biodone(bp); return (0); } if (zv->zv_readonly && !(bp->b_flags & B_READ)) { bioerror(bp, EROFS); biodone(bp); return (0); } off = ldbtob(bp->b_blkno); volsize = zv->zv_volsize; os = zv->zv_objset; ASSERT(os != NULL); sync = !(bp->b_flags & B_ASYNC) && !(zil_disable); bp_mapin(bp); addr = bp->b_un.b_addr; resid = bp->b_bcount; /* * There must be no buffer changes when doing a dmu_sync() because * we can't change the data whilst calculating the checksum. * A better approach than a per zvol rwlock would be to lock ranges. */ reading = bp->b_flags & B_READ; if (reading || resid <= zvol_immediate_write_sz) rw_enter(&zv->zv_dslock, RW_READER); else rw_enter(&zv->zv_dslock, RW_WRITER); while (resid != 0 && off < volsize) { size = MIN(resid, 1UL << 20); /* cap at 1MB per tx */ if (size > volsize - off) /* don't write past the end */ size = volsize - off; if (reading) { error = dmu_read(os, ZVOL_OBJ, off, size, addr); } else { dmu_tx_t *tx = dmu_tx_create(os); dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { dmu_write(os, ZVOL_OBJ, off, size, addr, tx); if (sync) { /* use the ZIL to commit this write */ if (zvol_log_write(zv, tx, off, size, addr) != 0) { txg_sync_needed = B_TRUE; } } dmu_tx_commit(tx); } } if (error) break; off += size; addr += size; resid -= size; } rw_exit(&zv->zv_dslock); if ((bp->b_resid = resid) == bp->b_bcount) bioerror(bp, off > volsize ? EINVAL : error); biodone(bp); if (sync) { if (txg_sync_needed) txg_wait_synced(dmu_objset_pool(os), 0); else zil_commit(zv->zv_zilog, UINT64_MAX, 0); } return (0); }
void marustrategy(struct buf *bp) { struct maru_softc *sc; struct disklabel *lp; struct partition *pp; int len; int err = ENXIO; m_u64 offset; DB("marustrategy(%p)\n", bp); maru_printbuf(bp); DB("ms:1\n"); sc = &maru_softc[maruunit(bp->b_dev)]; if (num_maru<1 || maruunit(bp->b_dev) >= num_maru || !(sc->sc_flags&MUF_INITED) || !sc->sc_kapi) { err: DB("ms:2\n"); maru_berror(bp, err); DB("ms:3\n"); return; } DB("ms:4\n"); len = bp->b_bcount; bp->b_resid = len; if (len<1) { DB("ms:5\n"); biodone(bp); DB("ms:6\n"); return; } DB("ms:6.1\n"); offset = dbtob(bp->b_blkno); lp = sc->sc_dkdev.dk_label; /* the transfer must be a whole number of blocks */ if (len % lp->d_secsize != 0) { maru_berror(bp, EINVAL); return; } /* * Do bounds checking and adjust transfer. If there's an error, * the bounds check will flag that for us. */ DB("ms:6.2\n"); if (DISKPART(bp->b_dev) != RAW_PART && bounds_check_with_label(bp, lp, sc->sc_flags&MUF_WLABEL) <= 0) { biodone(bp); return; } /* * Translate the partition-relative block number to an absolute. */ DB("ms:6.3\n"); if (DISKPART(bp->b_dev) != RAW_PART) { pp = &sc->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; offset += pp->p_offset * lp->d_secsize; } if (bp->b_flags & B_READ) { struct maru_message *msg; DB("ms:7\n"); msg = malloc(sizeof *msg, M_DEVBUF, M_NOWAIT); if (!msg) goto err; msg->mm_flags = MARU_READ_REQ; DB("ms:8\n"); msg->mm_id = maru_acquire_token(sc, bp); msg->mm_len = len; msg->mm_offset = offset; DB("ms:9\n"); if ((err = sc->sc_kapi->ka_inject(sc->sc_kapi, msg, sizeof *msg))) { DB("ms:10\n"); free(msg, M_DEVBUF); goto err; } DB("ms:11\n"); sc->sc_reading++; return; } else /* B_WRITE */ { struct maru_message *msg; DB("ms:13\n"); msg = malloc(sizeof *msg, M_DEVBUF, M_NOWAIT); if (!msg) goto err; msg->mm_flags = MARU_WRITE; msg->mm_id = maru_acquire_token(sc, bp); msg->mm_len = len; msg->mm_offset = offset; DB("ms:14\n"); if ((err = sc->sc_kapi->ka_inject(sc->sc_kapi, msg, sizeof(msg)+msg->mm_len))) { DB("ms:15\n"); free(msg, M_DEVBUF); goto err; } DB("ms:16\n"); sc->sc_writing++; return; } DB("ms:17\n"); }
/* * Actually translate the requested transfer into one the physical driver can * understand. The transfer is described by a buf and will include only one * physical transfer. */ void cdstrategy(struct buf *bp) { struct cd_softc *cd; int s; if ((cd = cdlookup(DISKUNIT(bp->b_dev))) == NULL) { bp->b_error = ENXIO; goto bad; } SC_DEBUG(cd->sc_link, SDEV_DB2, ("cdstrategy: %ld bytes @ blk %d\n", bp->b_bcount, bp->b_blkno)); /* * If the device has been made invalid, error out * maybe the media changed, or no media loaded */ if ((cd->sc_link->flags & SDEV_MEDIA_LOADED) == 0) { bp->b_error = EIO; goto bad; } /* * The transfer must be a whole number of blocks. */ if ((bp->b_bcount % cd->sc_dk.dk_label->d_secsize) != 0) { bp->b_error = EINVAL; goto bad; } /* * If it's a null transfer, return immediately */ if (bp->b_bcount == 0) goto done; /* * Do bounds checking, adjust transfer. if error, process. * If end of partition, just return. */ if (bounds_check_with_label(bp, cd->sc_dk.dk_label, (cd->flags & (CDF_WLABEL|CDF_LABELLING)) != 0) <= 0) goto done; s = splbio(); /* * Place it in the queue of disk activities for this disk */ disksort(&cd->buf_queue, bp); /* * Tell the device to get going on the transfer if it's * not doing anything, otherwise just wait for completion */ cdstart(cd); device_unref(&cd->sc_dev); splx(s); return; bad: bp->b_flags |= B_ERROR; done: /* * Correctly set the buf to indicate a completed xfer */ bp->b_resid = bp->b_bcount; s = splbio(); biodone(bp); splx(s); if (cd != NULL) device_unref(&cd->sc_dev); }
int puffs_doio(struct vnode *vp, struct bio *bio, struct thread *td) { struct buf *bp = bio->bio_buf; struct ucred *cred; struct uio *uiop; struct uio uio; struct iovec io; size_t n; int error = 0; if (td != NULL && td->td_proc != NULL) cred = td->td_proc->p_ucred; else cred = proc0.p_ucred; uiop = &uio; uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = td; /* * clear B_ERROR and B_INVAL state prior to initiating the I/O. We * do this here so we do not have to do it in all the code that * calls us. */ bp->b_flags &= ~(B_ERROR | B_INVAL); KASSERT(bp->b_cmd != BUF_CMD_DONE, ("puffs_doio: bp %p already marked done!", bp)); if (bp->b_cmd == BUF_CMD_READ) { io.iov_len = uiop->uio_resid = (size_t)bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; uiop->uio_offset = bio->bio_offset; error = puffs_directread(vp, uiop, 0, cred); if (error == 0 && uiop->uio_resid) { n = (size_t)bp->b_bcount - uiop->uio_resid; bzero(bp->b_data + n, bp->b_bcount - n); uiop->uio_resid = 0; } if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; } bp->b_resid = uiop->uio_resid; } else { KKASSERT(bp->b_cmd == BUF_CMD_WRITE); if (bio->bio_offset + bp->b_dirtyend > puffs_meta_getsize(vp)) bp->b_dirtyend = puffs_meta_getsize(vp) - bio->bio_offset; if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = bio->bio_offset + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; error = puffs_directwrite(vp, uiop, 0, cred); if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { crit_enter(); bp->b_flags &= ~(B_INVAL|B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) bdirty(bp); if (error) bp->b_flags |= B_EINTR; crit_exit(); } else { if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; } bp->b_dirtyoff = bp->b_dirtyend = 0; } bp->b_resid = uiop->uio_resid; } else { bp->b_resid = 0; } } biodone(bio); KKASSERT(bp->b_cmd == BUF_CMD_DONE); if (bp->b_flags & B_EINTR) return (EINTR); if (bp->b_flags & B_ERROR) return (bp->b_error ? bp->b_error : EIO); return (0); }
/* * cdstart looks to see if there is a buf waiting for the device * and that the device is not already busy. If both are true, * It deques the buf and creates a scsi command to perform the * transfer in the buf. The transfer request will call scsi_done * on completion, which will in turn call this routine again * so that the next queued transfer is performed. * The bufs are queued by the strategy routine (cdstrategy) * * This routine is also called after other non-queued requests * have been made of the scsi driver, to ensure that the queue * continues to be drained. * * must be called at the correct (highish) spl level * cdstart() is called at splbio from cdstrategy, cdrestart and scsi_done */ void cdstart(void *v) { struct cd_softc *cd = v; struct scsi_link *sc_link = cd->sc_link; struct buf *bp = 0; struct buf *dp; struct scsi_rw_big cmd_big; struct scsi_rw cmd_small; struct scsi_generic *cmdp; int blkno, nblks, cmdlen, error; struct partition *p; splassert(IPL_BIO); SC_DEBUG(sc_link, SDEV_DB2, ("cdstart\n")); /* * Check if the device has room for another command */ while (sc_link->openings > 0) { /* * there is excess capacity, but a special waits * It'll need the adapter as soon as we clear out of the * way and let it run (user level wait). */ if (sc_link->flags & SDEV_WAITING) { sc_link->flags &= ~SDEV_WAITING; wakeup((caddr_t)sc_link); return; } /* * See if there is a buf with work for us to do.. */ dp = &cd->buf_queue; if ((bp = dp->b_actf) == NULL) /* yes, an assign */ return; dp->b_actf = bp->b_actf; /* * If the device has become invalid, abort all the * reads and writes until all files have been closed and * re-opened */ if ((sc_link->flags & SDEV_MEDIA_LOADED) == 0) { bp->b_error = EIO; bp->b_flags |= B_ERROR; bp->b_resid = bp->b_bcount; biodone(bp); continue; } /* * We have a buf, now we should make a command * * First, translate the block to absolute and put it in terms * of the logical blocksize of the device. */ blkno = bp->b_blkno / (cd->sc_dk.dk_label->d_secsize / DEV_BSIZE); p = &cd->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)]; blkno += DL_GETPOFFSET(p); nblks = howmany(bp->b_bcount, cd->sc_dk.dk_label->d_secsize); /* * Fill out the scsi command. If the transfer will * fit in a "small" cdb, use it. */ if (!(sc_link->flags & SDEV_ATAPI) && !(sc_link->quirks & SDEV_ONLYBIG) && ((blkno & 0x1fffff) == blkno) && ((nblks & 0xff) == nblks)) { /* * We can fit in a small cdb. */ bzero(&cmd_small, sizeof(cmd_small)); cmd_small.opcode = (bp->b_flags & B_READ) ? READ_COMMAND : WRITE_COMMAND; _lto3b(blkno, cmd_small.addr); cmd_small.length = nblks & 0xff; cmdlen = sizeof(cmd_small); cmdp = (struct scsi_generic *)&cmd_small; } else { /* * Need a large cdb. */ bzero(&cmd_big, sizeof(cmd_big)); cmd_big.opcode = (bp->b_flags & B_READ) ? READ_BIG : WRITE_BIG; _lto4b(blkno, cmd_big.addr); _lto2b(nblks, cmd_big.length); cmdlen = sizeof(cmd_big); cmdp = (struct scsi_generic *)&cmd_big; } /* Instrumentation. */ disk_busy(&cd->sc_dk); /* * Call the routine that chats with the adapter. * Note: we cannot sleep as we may be an interrupt */ error = scsi_scsi_cmd(sc_link, cmdp, cmdlen, (u_char *) bp->b_data, bp->b_bcount, SCSI_RETRIES, 30000, bp, SCSI_NOSLEEP | ((bp->b_flags & B_READ) ? SCSI_DATA_IN : SCSI_DATA_OUT)); switch (error) { case 0: timeout_del(&cd->sc_timeout); break; case EAGAIN: /* * The device can't start another i/o. Try again later. */ dp->b_actf = bp; disk_unbusy(&cd->sc_dk, 0, 0); timeout_add(&cd->sc_timeout, 1); return; default: disk_unbusy(&cd->sc_dk, 0, 0); printf("%s: not queued, error %d\n", cd->sc_dev.dv_xname, error); break; } } }
/* * Mark I/O complete on a buffer, release it if I/O is asynchronous, * and wake up anyone waiting for it. */ void iodone(struct buf *bp) { ASSERT(SEMA_HELD(&bp->b_sem)); (void) biodone(bp); }
static void htif_blk_task(void *arg) { struct htif_blk_request req __aligned(HTIF_ALIGN); struct htif_blk_softc *sc; uint64_t req_paddr; struct bio *bp; uint64_t paddr; uint64_t resp; uint64_t cmd; int i; sc = (struct htif_blk_softc *)arg; while (1) { HTIF_BLK_LOCK(sc); do { bp = bioq_takefirst(&sc->bio_queue); if (bp == NULL) msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0); } while (bp == NULL); HTIF_BLK_UNLOCK(sc); if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { HTIF_BLK_LOCK(sc); rmb(); req.offset = (bp->bio_pblkno * sc->disk->d_sectorsize); req.size = bp->bio_bcount; paddr = vtophys(bp->bio_data); KASSERT(paddr != 0, ("paddr is 0")); req.addr = paddr; sc->curtag++; req.tag = sc->curtag; cmd = sc->index; cmd <<= HTIF_DEV_ID_SHIFT; if (bp->bio_cmd == BIO_READ) cmd |= (HTIF_CMD_READ << HTIF_CMD_SHIFT); else cmd |= (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); req_paddr = vtophys(&req); KASSERT(req_paddr != 0, ("req_paddr is 0")); cmd |= req_paddr; sc->cmd_done = 0; resp = htif_command(cmd); htif_blk_intr(sc, resp); /* Wait for interrupt */ i = 0; while (sc->cmd_done == 0) { msleep(&sc->intr_chan, &sc->sc_mtx, PRIBIO, "intr", hz/2); if (i++ > 2) { /* TODO: try to re-issue operation on timeout ? */ bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; disk_err(bp, "hard error", -1, 1); break; } } HTIF_BLK_UNLOCK(sc); biodone(bp); } else { printf("unknown op %d\n", bp->bio_cmd); } } }
void sd_buf_done(struct scsi_xfer *xs) { struct sd_softc *sc = xs->sc_link->device_softc; struct buf *bp = xs->cookie; int error, s; switch (xs->error) { case XS_NOERROR: bp->b_error = 0; bp->b_resid = xs->resid; break; case XS_NO_CCB: /* The adapter is busy, requeue the buf and try it later. */ disk_unbusy(&sc->sc_dk, bp->b_bcount - xs->resid, bp->b_flags & B_READ); bufq_requeue(&sc->sc_bufq, bp); scsi_xs_put(xs); SET(sc->flags, SDF_WAITING); timeout_add(&sc->sc_timeout, 1); return; case XS_SENSE: case XS_SHORTSENSE: #ifdef SCSIDEBUG scsi_sense_print_debug(xs); #endif error = sd_interpret_sense(xs); if (error == 0) { bp->b_error = 0; bp->b_resid = xs->resid; break; } if (error != ERESTART) { bp->b_error = error; xs->retries = 0; } goto retry; case XS_BUSY: if (xs->retries) { if (scsi_delay(xs, 1) != ERESTART) xs->retries = 0; } goto retry; case XS_TIMEOUT: retry: if (xs->retries--) { scsi_xs_exec(xs); return; } /* FALLTHROUGH */ default: if (bp->b_error == 0) bp->b_error = EIO; bp->b_flags |= B_ERROR; bp->b_resid = bp->b_bcount; break; } disk_unbusy(&sc->sc_dk, bp->b_bcount - xs->resid, bp->b_flags & B_READ); s = splbio(); biodone(bp); splx(s); scsi_xs_put(xs); }
static void mcd_doread(struct mcd_softc *sc, int state, struct mcd_mbx *mbxin) { struct mcd_mbx *mbx; struct bio *bp; int rm, i, k; struct mcd_read2 rbuf; int blknum; caddr_t addr; mbx = (state!=MCD_S_BEGIN) ? sc->ch_mbxsave : mbxin; bp = mbx->bp; loop: switch (state) { case MCD_S_BEGIN: mbx = sc->ch_mbxsave = mbxin; case MCD_S_BEGIN1: retry_status: /* get status */ MCD_WRITE(sc, MCD_REG_COMMAND, MCD_CMDGETSTAT); mbx->count = RDELAY_WAITSTAT; sc->ch_state = MCD_S_WAITSTAT; sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */ return; case MCD_S_WAITSTAT: sc->ch_state = MCD_S_WAITSTAT; untimeout(mcd_timeout,(caddr_t)sc, sc->ch); if (mbx->count-- >= 0) { if (MCD_READ(sc, MCD_FLAGS) & MFL_STATUS_NOT_AVAIL) { sc->ch_state = MCD_S_WAITSTAT; timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */ return; } sc->data.status = MCD_READ(sc, MCD_REG_STATUS) & 0xFF; if (sc->data.status & MCD_ST_CMDCHECK) goto retry_status; if (mcd_setflags(sc) < 0) goto changed; MCD_TRACE("got WAITSTAT delay=%d\n", RDELAY_WAITSTAT-mbx->count); /* reject, if audio active */ if (sc->data.status & MCDAUDIOBSY) { device_printf(sc->dev, "audio is active\n"); goto readerr; } retry_mode: /* to check for raw/cooked mode */ if (sc->data.flags & MCDREADRAW) { rm = MCD_MD_RAW; mbx->sz = MCDRBLK; } else { rm = MCD_MD_COOKED; mbx->sz = sc->data.blksize; } if (rm == sc->data.curr_mode) goto modedone; mbx->count = RDELAY_WAITMODE; sc->data.curr_mode = MCD_MD_UNKNOWN; mbx->mode = rm; MCD_WRITE(sc, MCD_REG_COMMAND, MCD_CMDSETMODE); MCD_WRITE(sc, MCD_REG_COMMAND, rm); sc->ch_state = MCD_S_WAITMODE; sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */ return; } else { device_printf(sc->dev, "timeout getstatus\n"); goto readerr; } case MCD_S_WAITMODE: sc->ch_state = MCD_S_WAITMODE; untimeout(mcd_timeout, (caddr_t)sc, sc->ch); if (mbx->count-- < 0) { device_printf(sc->dev, "timeout set mode\n"); goto readerr; } if (MCD_READ(sc, MCD_FLAGS) & MFL_STATUS_NOT_AVAIL) { sc->ch_state = MCD_S_WAITMODE; sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); return; } sc->data.status = MCD_READ(sc, MCD_REG_STATUS) & 0xFF; if (sc->data.status & MCD_ST_CMDCHECK) { sc->data.curr_mode = MCD_MD_UNKNOWN; goto retry_mode; } if (mcd_setflags(sc) < 0) goto changed; sc->data.curr_mode = mbx->mode; MCD_TRACE("got WAITMODE delay=%d\n", RDELAY_WAITMODE-mbx->count); modedone: /* for first block */ mbx->nblk = (bp->bio_bcount + (mbx->sz-1)) / mbx->sz; mbx->skip = 0; nextblock: blknum = (bp->bio_blkno / (mbx->sz/DEV_BSIZE)) + mbx->skip/mbx->sz; MCD_TRACE("mcd_doread: read blknum=%d for bp=%p\n", blknum, bp); /* build parameter block */ hsg2msf(blknum,rbuf.start_msf); retry_read: /* send the read command */ critical_enter(); MCD_WRITE(sc, MCD_REG_COMMAND, sc->data.read_command); MCD_WRITE(sc, MCD_REG_COMMAND, rbuf.start_msf[0]); MCD_WRITE(sc, MCD_REG_COMMAND, rbuf.start_msf[1]); MCD_WRITE(sc, MCD_REG_COMMAND, rbuf.start_msf[2]); MCD_WRITE(sc, MCD_REG_COMMAND, 0); MCD_WRITE(sc, MCD_REG_COMMAND, 0); MCD_WRITE(sc, MCD_REG_COMMAND, 1); critical_exit(); /* Spin briefly (<= 2ms) to avoid missing next block */ for (i = 0; i < 20; i++) { k = MCD_READ(sc, MCD_FLAGS); if (!(k & MFL_DATA_NOT_AVAIL)) goto got_it; DELAY(100); } mbx->count = RDELAY_WAITREAD; sc->ch_state = MCD_S_WAITREAD; sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */ return; case MCD_S_WAITREAD: sc->ch_state = MCD_S_WAITREAD; untimeout(mcd_timeout, (caddr_t)sc, sc->ch); if (mbx->count-- > 0) { k = MCD_READ(sc, MCD_FLAGS); if (!(k & MFL_DATA_NOT_AVAIL)) { /* XXX */ MCD_TRACE("got data delay=%d\n", RDELAY_WAITREAD-mbx->count); got_it: /* data is ready */ addr = bp->bio_data + mbx->skip; MCD_WRITE(sc, MCD_REG_CTL2,0x04); /* XXX */ for (i=0; i<mbx->sz; i++) *addr++ = MCD_READ(sc, MCD_REG_RDATA); MCD_WRITE(sc, MCD_REG_CTL2,0x0c); /* XXX */ k = MCD_READ(sc, MCD_FLAGS); /* If we still have some junk, read it too */ if (!(k & MFL_DATA_NOT_AVAIL)) { MCD_WRITE(sc, MCD_REG_CTL2, 0x04); /* XXX */ (void)MCD_READ(sc, MCD_REG_RDATA); (void)MCD_READ(sc, MCD_REG_RDATA); MCD_WRITE(sc, MCD_REG_CTL2, 0x0c); /* XXX */ } if (--mbx->nblk > 0) { mbx->skip += mbx->sz; goto nextblock; } /* return buffer */ bp->bio_resid = 0; biodone(bp); sc->data.flags &= ~(MCDMBXBSY|MCDREADRAW); mcd_start(sc); return; } if (!(k & MFL_STATUS_NOT_AVAIL)) { sc->data.status = MCD_READ(sc, MCD_REG_STATUS) & 0xFF; if (sc->data.status & MCD_ST_CMDCHECK) goto retry_read; if (mcd_setflags(sc) < 0) goto changed; } sc->ch_state = MCD_S_WAITREAD; sc->ch = timeout(mcd_timeout, (caddr_t)sc, hz/100); /* XXX */ return; } else { device_printf(sc->dev, "timeout read data\n"); goto readerr; } } readerr: if (mbx->retry-- > 0) { device_printf(sc->dev, "retrying\n"); state = MCD_S_BEGIN1; goto loop; } harderr: /* invalidate the buffer */ bp->bio_flags |= BIO_ERROR; bp->bio_resid = bp->bio_bcount; biodone(bp); sc->data.flags &= ~(MCDMBXBSY|MCDREADRAW); mcd_start(sc); return; changed: device_printf(sc->dev, "media changed\n"); goto harderr; #ifdef NOTDEF device_printf(sc->dev, "unit timeout, resetting\n"); MCD_WRITE(sc, MCD_REG_RESET, MCD_CMDRESET); DELAY(300000); (void)mcd_getstat(sc, 1); (void)mcd_getstat(sc, 1); /*sc->data.status &= ~MCDDSKCHNG; */ sc->data.debug = 1; /* preventive set debug mode */ #endif }
/* * define the low-level requests needed to perform * a high-level I/O operation for a specific plex * 'plexno'. * * Return 0 if all subdisks involved in the * request are up, 1 if some subdisks are not up, * and -1 if the request is at least partially * outside the bounds of the subdisks. * * Modify the pointer *diskstart to point to the * end address. On read, return on the first bad * subdisk, so that the caller * (build_read_request) can try alternatives. * * On entry to this routine, the prq structures * are not assigned. The assignment is performed * by expandrq(). Strictly speaking, the elements * rqe->sdno of all entries should be set to -1, * since 0 (from bzero) is a valid subdisk number. * We avoid this problem by initializing the ones * we use, and not looking at the others (index >= * prq->requests). */ enum requeststatus bre5(struct request *rq, int plexno, daddr_t * diskaddr, daddr_t diskend) { struct metrics m; /* most of the information */ struct sd *sd; struct plex *plex; struct buf *bp; /* user's bp */ struct rqgroup *rqg; /* the request group that we will create */ struct rqelement *rqe; /* point to this request information */ int rsectors; /* sectors remaining in this stripe */ int mysdno; /* another sd index in loops */ int rqno; /* request number */ rqg = NULL; /* shut up, damn compiler */ m.diskstart = *diskaddr; /* start of transfer */ bp = rq->bp; /* buffer pointer */ plex = &PLEX[plexno]; /* point to the plex */ while (*diskaddr < diskend) { /* until we get it all sorted out */ if (*diskaddr >= plex->length) /* beyond the end of the plex */ return REQUEST_EOF; /* can't continue */ m.badsdno = -1; /* no bad subdisk yet */ /* Part A: Define the request */ /* * First, calculate some sizes: * The offset of the start address from * the start of the stripe. */ m.stripeoffset = *diskaddr % (plex->stripesize * (plex->subdisks - 1)); /* * The plex-relative address of the * start of the stripe. */ m.stripebase = *diskaddr - m.stripeoffset; /* subdisk containing the parity stripe */ if (plex->organization == plex_raid5) m.psdno = plex->subdisks - 1 - (*diskaddr / (plex->stripesize * (plex->subdisks - 1))) % plex->subdisks; else /* RAID-4 */ m.psdno = plex->subdisks - 1; /* * The number of the subdisk in which * the start is located. */ m.firstsdno = m.stripeoffset / plex->stripesize; if (m.firstsdno >= m.psdno) /* at or past parity sd */ m.firstsdno++; /* increment it */ /* * The offset from the beginning of * the stripe on this subdisk. */ m.initoffset = m.stripeoffset % plex->stripesize; /* The offset of the stripe start relative to this subdisk */ m.sdbase = m.stripebase / (plex->subdisks - 1); m.useroffset = *diskaddr - m.diskstart; /* The offset of the start in the user buffer */ /* * The number of sectors to transfer in the * current (first) subdisk. */ m.initlen = min(diskend - *diskaddr, /* the amount remaining to transfer */ plex->stripesize - m.initoffset); /* and the amount left in this block */ /* * The number of sectors to transfer in this stripe * is the minumum of the amount remaining to transfer * and the amount left in this stripe. */ m.stripesectors = min(diskend - *diskaddr, plex->stripesize * (plex->subdisks - 1) - m.stripeoffset); /* The number of data subdisks involved in this request */ m.sdcount = (m.stripesectors + m.initoffset + plex->stripesize - 1) / plex->stripesize; /* Part B: decide what kind of transfer this will be. * start and end addresses of the transfer in * the current block. * * There are a number of different kinds of * transfer, each of which relates to a * specific subdisk: * * 1. Normal read. All participating subdisks * are up, and the transfer can be made * directly to the user buffer. The bounds * of the transfer are described by * m.dataoffset and m.datalen. We have * already calculated m.initoffset and * m.initlen, which define the parameters * for the first data block. * * 2. Recovery read. One participating * subdisk is down. To recover data, all * the other subdisks, including the parity * subdisk, must be read. The data is * recovered by exclusive-oring all the * other blocks. The bounds of the * transfer are described by m.groupoffset * and m.grouplen. * * 3. A read request may request reading both * available data (normal read) and * non-available data (recovery read). * This can be a problem if the address * ranges of the two reads do not coincide: * in this case, the normal read needs to * be extended to cover the address range * of the recovery read, and must thus be * performed out of malloced memory. * * 4. Normal write. All the participating * subdisks are up. The bounds of the * transfer are described by m.dataoffset * and m.datalen. Since these values * differ for each block, we calculate the * bounds for the parity block * independently as the maximum of the * individual blocks and store these values * in m.writeoffset and m.writelen. This * write proceeds in four phases: * * i. Read the old contents of each block * and the parity block. * ii. ``Remove'' the old contents from * the parity block with exclusive or. * iii. ``Insert'' the new contents of the * block in the parity block, again * with exclusive or. * * iv. Write the new contents of the data * blocks and the parity block. The data * block transfers can be made directly from * the user buffer. * * 5. Degraded write where the data block is * not available. The bounds of the * transfer are described by m.groupoffset * and m.grouplen. This requires the * following steps: * * i. Read in all the other data blocks, * excluding the parity block. * * ii. Recreate the parity block from the * other data blocks and the data to be * written. * * iii. Write the parity block. * * 6. Parityless write, a write where the * parity block is not available. This is * in fact the simplest: just write the * data blocks. This can proceed directly * from the user buffer. The bounds of the * transfer are described by m.dataoffset * and m.datalen. * * 7. Combination of degraded data block write * and normal write. In this case the * address ranges of the reads may also * need to be extended to cover all * participating blocks. * * All requests in a group transfer transfer * the same address range relative to their * subdisk. The individual transfers may * vary, but since our group of requests is * all in a single slice, we can define a * range in which they all fall. * * In the following code section, we determine * which kind of transfer we will perform. If * there is a group transfer, we also decide * its bounds relative to the subdisks. At * the end, we have the following values: * * m.flags indicates the kinds of transfers * we will perform. * m.initoffset indicates the offset of the * beginning of any data operation relative * to the beginning of the stripe base. * m.initlen specifies the length of any data * operation. * m.dataoffset contains the same value as * m.initoffset. * m.datalen contains the same value as * m.initlen. Initially dataoffset and * datalen describe the parameters for the * first data block; while building the data * block requests, they are updated for each * block. * m.groupoffset indicates the offset of any * group operation relative to the beginning * of the stripe base. * m.grouplen specifies the length of any * group operation. * m.writeoffset indicates the offset of a * normal write relative to the beginning of * the stripe base. This value differs from * m.dataoffset in that it applies to the * entire operation, and not just the first * block. * m.writelen specifies the total span of a * normal write operation. writeoffset and * writelen are used to define the parity * block. */ m.groupoffset = 0; /* assume no group... */ m.grouplen = 0; /* until we know we have one */ m.writeoffset = m.initoffset; /* start offset of transfer */ m.writelen = 0; /* nothing to write yet */ m.flags = 0; /* no flags yet */ rsectors = m.stripesectors; /* remaining sectors to examine */ m.dataoffset = m.initoffset; /* start at the beginning of the transfer */ m.datalen = m.initlen; if (m.sdcount > 1) { plex->multiblock++; /* more than one block for the request */ /* * If we have two transfers that don't overlap, * (one at the end of the first block, the other * at the beginning of the second block), * it's cheaper to split them. */ if (rsectors < plex->stripesize) { m.sdcount = 1; /* just one subdisk */ m.stripesectors = m.initlen; /* and just this many sectors */ rsectors = m.initlen; /* and in the loop counter */ } } if (SD[plex->sdnos[m.psdno]].state < sd_reborn) /* is our parity subdisk down? */ m.badsdno = m.psdno; /* note that it's down */ if (bp->b_flags & B_READ) { /* read operation */ for (mysdno = m.firstsdno; rsectors > 0; mysdno++) { if (mysdno == m.psdno) /* ignore parity on read */ mysdno++; if (mysdno == plex->subdisks) /* wraparound */ mysdno = 0; if (mysdno == m.psdno) /* parity, */ mysdno++; /* we've given already */ if (SD[plex->sdnos[mysdno]].state < sd_reborn) { /* got a bad subdisk, */ if (m.badsdno >= 0) /* we had one already, */ return REQUEST_DOWN; /* we can't take a second */ m.badsdno = mysdno; /* got the first */ m.groupoffset = m.dataoffset; /* define the bounds */ m.grouplen = m.datalen; m.flags |= XFR_RECOVERY_READ; /* we need recovery */ plex->recovered_reads++; /* count another one */ } else m.flags |= XFR_NORMAL_READ; /* normal read */ /* Update the pointers for the next block */ m.dataoffset = 0; /* back to the start of the stripe */ rsectors -= m.datalen; /* remaining sectors to examine */ m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */ } } else { /* write operation */ for (mysdno = m.firstsdno; rsectors > 0; mysdno++) { if (mysdno == m.psdno) /* parity stripe, we've dealt with that */ mysdno++; if (mysdno == plex->subdisks) /* wraparound */ mysdno = 0; if (mysdno == m.psdno) /* parity, */ mysdno++; /* we've given already */ sd = &SD[plex->sdnos[mysdno]]; if (sd->state != sd_up) { enum requeststatus s; s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */ if (s && (m.badsdno >= 0)) { /* second bad disk, */ int sdno; /* * If the parity disk is down, there's * no recovery. We make all involved * subdisks stale. Otherwise, we * should be able to recover, but it's * like pulling teeth. Fix it later. */ for (sdno = 0; sdno < m.sdcount; sdno++) { struct sd *sd = &SD[plex->sdnos[sdno]]; if (sd->state >= sd_reborn) /* sort of up, */ set_sd_state(sd->sdno, sd_stale, setstate_force); /* make it stale */ } return s; /* and crap out */ } m.badsdno = mysdno; /* note which one is bad */ m.flags |= XFR_DEGRADED_WRITE; /* we need recovery */ plex->degraded_writes++; /* count another one */ m.groupoffset = m.dataoffset; /* define the bounds */ m.grouplen = m.datalen; } else { m.flags |= XFR_NORMAL_WRITE; /* normal write operation */ if (m.writeoffset > m.dataoffset) { /* move write operation lower */ m.writelen = max(m.writeoffset + m.writelen, m.dataoffset + m.datalen) - m.dataoffset; m.writeoffset = m.dataoffset; } else m.writelen = max(m.writeoffset + m.writelen, m.dataoffset + m.datalen) - m.writeoffset; } /* Update the pointers for the next block */ m.dataoffset = 0; /* back to the start of the stripe */ rsectors -= m.datalen; /* remaining sectors to examine */ m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */ } if (m.badsdno == m.psdno) { /* got a bad parity block, */ struct sd *psd = &SD[plex->sdnos[m.psdno]]; if (psd->state == sd_down) set_sd_state(psd->sdno, sd_obsolete, setstate_force); /* it's obsolete now */ else if (psd->state == sd_crashed) set_sd_state(psd->sdno, sd_stale, setstate_force); /* it's stale now */ m.flags &= ~XFR_NORMAL_WRITE; /* this write isn't normal, */ m.flags |= XFR_PARITYLESS_WRITE; /* it's parityless */ plex->parityless_writes++; /* count another one */ } } /* reset the initial transfer values */ m.dataoffset = m.initoffset; /* start at the beginning of the transfer */ m.datalen = m.initlen; /* decide how many requests we need */ if (m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE)) /* doing a recovery read or degraded write, */ m.rqcount = plex->subdisks; /* all subdisks */ else if (m.flags & XFR_NORMAL_WRITE) /* normal write, */ m.rqcount = m.sdcount + 1; /* all data blocks and the parity block */ else /* parityless write or normal read */ m.rqcount = m.sdcount; /* just the data blocks */ /* Part C: build the requests */ rqg = allocrqg(rq, m.rqcount); /* get a request group */ if (rqg == NULL) { /* malloc failed */ bp->b_flags |= B_ERROR; bp->b_error = ENOMEM; biodone(bp); return REQUEST_ENOMEM; } rqg->plexno = plexno; rqg->flags = m.flags; rqno = 0; /* index in the request group */ /* 1: PARITY BLOCK */ /* * Are we performing an operation which requires parity? In that case, * work out the parameters and define the parity block. * XFR_PARITYOP is XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE */ if (m.flags & XFR_PARITYOP) { /* need parity */ rqe = &rqg->rqe[rqno]; /* point to element */ sd = &SD[plex->sdnos[m.psdno]]; /* the subdisk in question */ rqe->rqg = rqg; /* point back to group */ rqe->flags = (m.flags | XFR_PARITY_BLOCK | XFR_MALLOCED) /* always malloc parity block */ &~(XFR_NORMAL_READ | XFR_PARITYLESS_WRITE); /* transfer flags without data op stuf */ setrqebounds(rqe, &m); /* set up the bounds of the transfer */ rqe->sdno = sd->sdno; /* subdisk number */ rqe->driveno = sd->driveno; if (build_rq_buffer(rqe, plex)) /* build the buffer */ return REQUEST_ENOMEM; /* can't do it */ rqe->b.b_flags |= B_READ; /* we must read first */ m.sdcount++; /* adjust the subdisk count */ rqno++; /* and point to the next request */ } /* * 2: DATA BLOCKS * Now build up requests for the blocks required * for individual transfers */ for (mysdno = m.firstsdno; rqno < m.sdcount; mysdno++, rqno++) { if (mysdno == m.psdno) /* parity, */ mysdno++; /* we've given already */ if (mysdno == plex->subdisks) /* got to the end, */ mysdno = 0; /* wrap around */ if (mysdno == m.psdno) /* parity, */ mysdno++; /* we've given already */ rqe = &rqg->rqe[rqno]; /* point to element */ sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */ rqe->rqg = rqg; /* point to group */ if (m.flags & XFR_NEEDS_MALLOC) /* we need a malloced buffer first */ rqe->flags = m.flags | XFR_DATA_BLOCK | XFR_MALLOCED; /* transfer flags */ else rqe->flags = m.flags | XFR_DATA_BLOCK; /* transfer flags */ if (mysdno == m.badsdno) { /* this is the bad subdisk */ rqg->badsdno = rqno; /* note which one */ rqe->flags |= XFR_BAD_SUBDISK; /* note that it's dead */ /* * we can't read or write from/to it, * but we don't need to malloc */ rqe->flags &= ~(XFR_MALLOCED | XFR_NORMAL_READ | XFR_NORMAL_WRITE); } setrqebounds(rqe, &m); /* set up the bounds of the transfer */ rqe->useroffset = m.useroffset; /* offset in user buffer */ rqe->sdno = sd->sdno; /* subdisk number */ rqe->driveno = sd->driveno; if (build_rq_buffer(rqe, plex)) /* build the buffer */ return REQUEST_ENOMEM; /* can't do it */ if ((m.flags & XFR_PARITYOP) /* parity operation, */ &&((m.flags & XFR_BAD_SUBDISK) == 0)) /* and not the bad subdisk, */ rqe->b.b_flags |= B_READ; /* we must read first */ /* Now update pointers for the next block */ *diskaddr += m.datalen; /* skip past what we've done */ m.stripesectors -= m.datalen; /* deduct from what's left */ m.useroffset += m.datalen; /* and move on in the user buffer */ m.datalen = min(m.stripesectors, plex->stripesize); /* and recalculate */ m.dataoffset = 0; /* start at the beginning of next block */ } /* * 3: REMAINING BLOCKS FOR RECOVERY * Finally, if we have a recovery operation, build * up transfers for the other subdisks. Follow the * subdisks around until we get to where we started. * These requests use only the group parameters. */ if ((rqno < m.rqcount) /* haven't done them all already */ &&(m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))) { for (; rqno < m.rqcount; rqno++, mysdno++) { if (mysdno == m.psdno) /* parity, */ mysdno++; /* we've given already */ if (mysdno == plex->subdisks) /* got to the end, */ mysdno = 0; /* wrap around */ if (mysdno == m.psdno) /* parity, */ mysdno++; /* we've given already */ rqe = &rqg->rqe[rqno]; /* point to element */ sd = &SD[plex->sdnos[mysdno]]; /* the subdisk in question */ rqe->rqg = rqg; /* point to group */ rqe->sdoffset = m.sdbase + m.groupoffset; /* start of transfer */ rqe->dataoffset = 0; /* for tidiness' sake */ rqe->groupoffset = 0; /* group starts at the beginining */ rqe->datalen = 0; rqe->grouplen = m.grouplen; rqe->buflen = m.grouplen; rqe->flags = (m.flags | XFR_MALLOCED) /* transfer flags without data op stuf */ &~XFR_DATAOP; rqe->sdno = sd->sdno; /* subdisk number */ rqe->driveno = sd->driveno; if (build_rq_buffer(rqe, plex)) /* build the buffer */ return REQUEST_ENOMEM; /* can't do it */ rqe->b.b_flags |= B_READ; /* we must read first */ } } /* * We need to lock the address range before * doing anything. We don't have to be * performing a recovery operation: somebody * else could be doing so, and the results could * influence us. Note the fact here, we'll perform * the lock in launch_requests. */ rqg->lockbase = m.stripebase; if (*diskaddr < diskend) /* didn't finish the request on this stripe */ plex->multistripe++; /* count another one */ } return REQUEST_OK; }