Пример #1
0
/*
 * Recover a failed I/O operation.
 *
 * The correct way to do this is to examine the request and determine
 * how to recover each individual failure.  In the case of a write,
 * this could be as simple as doing nothing: the defective drives may
 * already be down, and there may be nothing else to do.  In case of
 * a read, it will be necessary to retry if there are alternative
 * copies of the data.
 *
 * The easy way (here) is just to reissue the request.  This will take
 * a little longer, but nothing like as long as the failure will have
 * taken.
 *
 */
void
recover_io(struct request *rq)
{
    /*
     * This should read:
     *
     *     vinumstrategy(rq->bp);
     *
     * Negotiate with phk to get it fixed.
     */
    BUF_STRATEGY(rq->bp, 0);				    /* reissue the command */
}
Пример #2
0
int
physio(dev_t dev, struct uio *uio, int ioflag)
{
	int i;
	int error;
	int spl;
	caddr_t sa;
	off_t blockno;
	u_int iolen;
	struct buf *bp;

	/* Keep the process UPAGES from being swapped. XXX: why ? */
	PHOLD(curproc);

	bp = getpbuf(NULL);
	sa = bp->b_data;
	error = bp->b_error = 0;

	/* XXX: sanity check */
	if(dev->si_iosize_max < PAGE_SIZE) {
		printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n",
		    devtoname(dev), dev->si_iosize_max);
		dev->si_iosize_max = DFLTPHYS;
	}

	for (i = 0; i < uio->uio_iovcnt; i++) {
		while (uio->uio_iov[i].iov_len) {
			if (uio->uio_rw == UIO_READ)
				bp->b_flags = B_PHYS | B_CALL | B_READ;
			else 
				bp->b_flags = B_PHYS | B_CALL | B_WRITE;
			bp->b_dev = dev;
			bp->b_iodone = physwakeup;
			bp->b_data = uio->uio_iov[i].iov_base;
			bp->b_bcount = uio->uio_iov[i].iov_len;
			bp->b_offset = uio->uio_offset;
			bp->b_saveaddr = sa;

			/* Don't exceed drivers iosize limit */
			if (bp->b_bcount > dev->si_iosize_max)
				bp->b_bcount = dev->si_iosize_max;

			/* 
			 * Make sure the pbuf can map the request
			 * XXX: The pbuf has kvasize = MAXPHYS so a request
			 * XXX: larger than MAXPHYS - PAGE_SIZE must be
			 * XXX: page aligned or it will be fragmented.
			 */
			iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK;
			if ((bp->b_bcount + iolen) > bp->b_kvasize) {
				bp->b_bcount = bp->b_kvasize;
				if (iolen != 0)
					bp->b_bcount -= PAGE_SIZE;
			}
			bp->b_bufsize = bp->b_bcount;

			blockno = bp->b_offset >> DEV_BSHIFT;
			if ((daddr_t)blockno != blockno) {
				error = EINVAL; /* blockno overflow */
				goto doerror;
			}
			bp->b_blkno = blockno;

			if (uio->uio_segflg == UIO_USERSPACE) {
				if (!useracc(bp->b_data, bp->b_bufsize,
				    bp->b_flags & B_READ ?
				    VM_PROT_WRITE : VM_PROT_READ)) {
					error = EFAULT;
					goto doerror;
				}
				vmapbuf(bp);
			}

			BUF_STRATEGY(bp, 0);
			spl = splbio();
			while ((bp->b_flags & B_DONE) == 0)
				tsleep((caddr_t)bp, PRIBIO, "physstr", 0);
			splx(spl);

			if (uio->uio_segflg == UIO_USERSPACE)
				vunmapbuf(bp);
			iolen = bp->b_bcount - bp->b_resid;
			if (iolen == 0 && !(bp->b_flags & B_ERROR))
				goto doerror;	/* EOF */
			uio->uio_iov[i].iov_len -= iolen;
			uio->uio_iov[i].iov_base += iolen;
			uio->uio_resid -= iolen;
			uio->uio_offset += iolen;
			if( bp->b_flags & B_ERROR) {
				error = bp->b_error;
				goto doerror;
			}
		}
	}
doerror:
	relpbuf(bp, NULL);
	PRELE(curproc);
	return (error);
}
Пример #3
0
/*
 * Call the low-level strategy routines to
 * perform the requests in a struct request
 */
int
launch_requests(struct request *rq, int reviveok)
{
    int s;
    struct rqgroup *rqg;
    int rqno;						    /* loop index */
    struct rqelement *rqe;				    /* current element */
    struct drive *drive;
    int rcount;						    /* request count */

    /*
     * First find out whether we're reviving, and the
     * request contains a conflict.  If so, we hang
     * the request off plex->waitlist of the first
     * plex we find which is reviving
     */
    if ((rq->flags & XFR_REVIVECONFLICT)		    /* possible revive conflict */
    &&(!reviveok)) {					    /* and we don't want to do it now, */
	struct sd *sd;
	struct request *waitlist;			    /* point to the waitlist */

	sd = &SD[rq->sdno];
	if (sd->waitlist != NULL) {			    /* something there already, */
	    waitlist = sd->waitlist;
	    while (waitlist->next != NULL)		    /* find the end */
		waitlist = waitlist->next;
	    waitlist->next = rq;			    /* hook our request there */
	} else
	    sd->waitlist = rq;				    /* hook our request at the front */

#if VINUMDEBUG
	if (debug & DEBUG_REVIVECONFLICT)
	    log(LOG_DEBUG,
		"Revive conflict sd %d: %p\n%s dev %d.%d, offset 0x%x, length %ld\n",
		rq->sdno,
		rq,
		rq->bp->b_flags & B_READ ? "Read" : "Write",
		major(rq->bp->b_dev),
		minor(rq->bp->b_dev),
		rq->bp->b_blkno,
		rq->bp->b_bcount);
#endif
	return 0;					    /* and get out of here */
    }
    rq->active = 0;					    /* nothing yet */
#if VINUMDEBUG
    if (debug & DEBUG_ADDRESSES)
	log(LOG_DEBUG,
	    "Request: %p\n%s dev %d.%d, offset 0x%x, length %ld\n",
	    rq,
	    rq->bp->b_flags & B_READ ? "Read" : "Write",
	    major(rq->bp->b_dev),
	    minor(rq->bp->b_dev),
	    rq->bp->b_blkno,
	    rq->bp->b_bcount);
    vinum_conf.lastrq = rq;
    vinum_conf.lastbuf = rq->bp;
    if (debug & DEBUG_LASTREQS)
	logrq(loginfo_user_bpl, (union rqinfou) rq->bp, rq->bp);
#endif

    /*
     * With the division of labour below (first count the requests, then
     * issue them), it's possible that we don't need this splbio()
     * protection.  But I'll try that some other time.
     */
    s = splbio();
    for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) {	    /* through the whole request chain */
	rqg->active = rqg->count;			    /* they're all active */
	for (rqno = 0; rqno < rqg->count; rqno++) {
	    rqe = &rqg->rqe[rqno];
	    if (rqe->flags & XFR_BAD_SUBDISK)		    /* this subdisk is bad, */
		rqg->active--;				    /* one less active request */
	}
	if (rqg->active)				    /* we have at least one active request, */
	    rq->active++;				    /* one more active request group */
    }

    /* Now fire off the requests */
    for (rqg = rq->rqg; rqg != NULL;) {			    /* through the whole request chain */
	if (rqg->lockbase >= 0)				    /* this rqg needs a lock first */
	    rqg->lock = lockrange(rqg->lockbase, rqg->rq->bp, &PLEX[rqg->plexno]);
	rcount = rqg->count;
	for (rqno = 0; rqno < rcount;) {
	    rqe = &rqg->rqe[rqno];

	    /*
	     * Point to next rqg before the bottom end
	     * changes the structures.
	     */
	    if (++rqno >= rcount)
		rqg = rqg->next;
	    if ((rqe->flags & XFR_BAD_SUBDISK) == 0) {	    /* this subdisk is good, */
		drive = &DRIVE[rqe->driveno];		    /* look at drive */
		drive->active++;
		if (drive->active >= drive->maxactive)
		    drive->maxactive = drive->active;
		vinum_conf.active++;
		if (vinum_conf.active >= vinum_conf.maxactive)
		    vinum_conf.maxactive = vinum_conf.active;

#if VINUMDEBUG
		if (debug & DEBUG_ADDRESSES)
		    log(LOG_DEBUG,
			"  %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
			rqe->b.b_flags & B_READ ? "Read" : "Write",
			major(rqe->b.b_dev),
			minor(rqe->b.b_dev),
			rqe->sdno,
			(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
			rqe->b.b_blkno,
			rqe->b.b_bcount);
		if (debug & DEBUG_LASTREQS)
		    logrq(loginfo_rqe, (union rqinfou) rqe, rq->bp);
#endif


		/* fire off the request */
		BUF_STRATEGY(&rqe->b, 0);
	    }
	}
    }
    splx(s);
    return 0;
}
Пример #4
0
/* Perform I/O on a subdisk */
void
sdio(struct buf *bp)
{
    int s;						    /* spl */
    struct sd *sd;
    struct sdbuf *sbp;
    daddr_t endoffset;
    struct drive *drive;

#if VINUMDEBUG
    if (debug & DEBUG_LASTREQS)
	logrq(loginfo_sdio, (union rqinfou) bp, bp);
#endif
    sd = &SD[Sdno(bp->b_dev)];				    /* point to the subdisk */
    drive = &DRIVE[sd->driveno];

    if (drive->state != drive_up) {
	if (sd->state >= sd_crashed) {
	    if (bp->b_flags & B_READ)			    /* reading, */
		set_sd_state(sd->sdno, sd_crashed, setstate_force);
	    else
		set_sd_state(sd->sdno, sd_stale, setstate_force);
	}
	bp->b_flags |= B_ERROR;
	bp->b_error = EIO;
	biodone(bp);
	return;
    }
    /*
     * We allow access to any kind of subdisk as long as we can expect
     * to get the I/O performed.
     */
    if (sd->state < sd_empty) {				    /* nothing to talk to, */
	bp->b_flags |= B_ERROR;
	bp->b_error = EIO;
	biodone(bp);
	return;
    }
    /* Get a buffer */
    sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
    if (sbp == NULL) {
	bp->b_flags |= B_ERROR;
	bp->b_error = ENOMEM;
	biodone(bp);
	return;
    }
    bzero(sbp, sizeof(struct sdbuf));			    /* start with nothing */
    sbp->b.b_flags = bp->b_flags | B_CALL;		    /* inform us when it's done */
    sbp->b.b_bufsize = bp->b_bufsize;			    /* buffer size */
    sbp->b.b_bcount = bp->b_bcount;			    /* number of bytes to transfer */
    sbp->b.b_resid = bp->b_resid;			    /* and amount waiting */
    sbp->b.b_dev = DRIVE[sd->driveno].dev;		    /* device */
    sbp->b.b_data = bp->b_data;				    /* data buffer */
    sbp->b.b_blkno = bp->b_blkno + sd->driveoffset;
    sbp->b.b_iodone = sdio_done;			    /* come here on completion */
    BUF_LOCKINIT(&sbp->b);				    /* get a lock for the buffer */
    BUF_LOCK(&sbp->b, LK_EXCLUSIVE);			    /* and lock it */
    sbp->bp = bp;					    /* note the address of the original header */
    sbp->sdno = sd->sdno;				    /* note for statistics */
    sbp->driveno = sd->driveno;
    endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE;  /* final sector offset */
    if (endoffset > sd->sectors) {			    /* beyond the end */
	sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
	if (sbp->b.b_bcount <= 0) {			    /* nothing to transfer */
	    bp->b_resid = bp->b_bcount;			    /* nothing transferred */
	    biodone(bp);
	    Free(sbp);
	    return;
	}
    }
#if VINUMDEBUG
    if (debug & DEBUG_ADDRESSES)
	log(LOG_DEBUG,
	    "  %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
	    sbp->b.b_flags & B_READ ? "Read" : "Write",
	    major(sbp->b.b_dev),
	    minor(sbp->b.b_dev),
	    sbp->sdno,
	    (u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
	    (int) sbp->b.b_blkno,
	    sbp->b.b_bcount);
#endif
    s = splbio();
#if VINUMDEBUG
    if (debug & DEBUG_LASTREQS)
	logrq(loginfo_sdiol, (union rqinfou) &sbp->b, &sbp->b);
#endif
    BUF_STRATEGY(&sbp->b, 0);
    splx(s);
}
Пример #5
0
/*
 * Write disk label back to device after modification.
 */
static int
l32_writedisklabel(cdev_t dev, struct diskslices *ssp, struct diskslice *sp,
		   disklabel_t lpx)
{
	struct disklabel32 *lp;
	struct disklabel32 *dlp;
	struct buf *bp;
	const char *msg;
	int error = 0;

	lp = lpx.lab32;

	if (lp->d_partitions[RAW_PART].p_offset != 0)
		return (EXDEV);			/* not quite right */

	bp = geteblk((int)lp->d_secsize);
	bp->b_bio1.bio_offset = (off_t)LABELSECTOR32 * lp->d_secsize;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	bp->b_bcount = lp->d_secsize;

#if 1
	/*
	 * We read the label first to see if it's there,
	 * in which case we will put ours at the same offset into the block..
	 * (I think this is stupid [Julian])
	 * Note that you can't write a label out over a corrupted label!
	 * (also stupid.. how do you write the first one? by raw writes?)
	 */
	bp->b_flags &= ~B_INVAL;
	bp->b_cmd = BUF_CMD_READ;
	KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
	dev_dstrategy(dev, &bp->b_bio1);
	error = biowait(&bp->b_bio1, "labrd");
	if (error)
		goto done;
	for (dlp = (struct disklabel32 *)bp->b_data;
	    dlp <= (struct disklabel32 *)
	      ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
	    dlp = (struct disklabel32 *)((char *)dlp + sizeof(long))) {
		if (dlp->d_magic == DISKMAGIC32 &&
		    dlp->d_magic2 == DISKMAGIC32 && dkcksum32(dlp) == 0) {
			*dlp = *lp;
			lpx.lab32 = dlp;
			msg = l32_fixlabel(NULL, sp, lpx, TRUE);
			if (msg) {
				error = EINVAL;
			} else {
				bp->b_cmd = BUF_CMD_WRITE;
				bp->b_bio1.bio_done = biodone_sync;
				bp->b_bio1.bio_flags |= BIO_SYNC;
				KKASSERT(dkpart(dev) == WHOLE_SLICE_PART);
				dev_dstrategy(dev, &bp->b_bio1);
				error = biowait(&bp->b_bio1, "labwr");
			}
			goto done;
		}
	}
	error = ESRCH;
done:
#else
	bzero(bp->b_data, lp->d_secsize);
	dlp = (struct disklabel32 *)bp->b_data;
	*dlp = *lp;
	bp->b_flags &= ~B_INVAL;
	bp->b_cmd = BUF_CMD_WRITE;
	bp->b_bio1.bio_done = biodone_sync;
	bp->b_bio1.bio_flags |= BIO_SYNC;
	BUF_STRATEGY(bp, 1);
	error = biowait(&bp->b_bio1, "labwr");
#endif
	bp->b_flags |= B_INVAL | B_AGE;
	brelse(bp);
	return (error);
}