static void physio_done(struct work *wk, void *dummy) { struct buf *bp = (void *)wk; size_t todo = bp->b_bufsize; size_t done = bp->b_bcount - bp->b_resid; struct physio_stat *ps = bp->b_private; bool is_iobuf; KASSERT(&bp->b_work == wk); KASSERT(bp->b_bcount <= todo); KASSERT(bp->b_resid <= bp->b_bcount); KASSERT((bp->b_flags & B_PHYS) != 0); KASSERT(dummy == NULL); vunmapbuf(bp, todo); uvm_vsunlock(bp->b_proc->p_vmspace, bp->b_data, todo); mutex_enter(&ps->ps_lock); is_iobuf = (bp != ps->ps_orig_bp); if (__predict_false(done != todo)) { off_t endoffset = dbtob(bp->b_blkno) + done; /* * we got an error or hit EOM. * * we only care about the first one. * ie. the one at the lowest offset. */ KASSERT(ps->ps_endoffset != endoffset); DPRINTF(("%s: error=%d at %" PRIu64 " - %" PRIu64 ", blkno=%" PRIu64 ", bcount=%d, flags=0x%x\n", __func__, bp->b_error, dbtob(bp->b_blkno), endoffset, bp->b_blkno, bp->b_bcount, bp->b_flags)); if (ps->ps_endoffset == -1 || endoffset < ps->ps_endoffset) { DPRINTF(("%s: ps=%p, error %d -> %d, endoff %" PRIu64 " -> %" PRIu64 "\n", __func__, ps, ps->ps_error, bp->b_error, ps->ps_endoffset, endoffset)); ps->ps_endoffset = endoffset; ps->ps_error = bp->b_error; } ps->ps_failed++; } else { KASSERT(bp->b_error == 0); } ps->ps_running--; cv_signal(&ps->ps_cv); mutex_exit(&ps->ps_lock); if (is_iobuf) putiobuf(bp); }
int physio(struct cdev *dev, struct uio *uio, int ioflag) { int i; int error; caddr_t sa; u_int iolen; struct buf *bp; /* Keep the process UPAGES from being swapped. XXX: why ? */ PHOLD(curproc); bp = getpbuf(NULL); sa = bp->b_data; error = 0; /* XXX: sanity check */ if(dev->si_iosize_max < PAGE_SIZE) { printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; } for (i = 0; i < uio->uio_iovcnt; i++) { while (uio->uio_iov[i].iov_len) { bp->b_flags = 0; if (uio->uio_rw == UIO_READ) { bp->b_iocmd = BIO_READ; curthread->td_ru.ru_inblock++; } else { bp->b_iocmd = BIO_WRITE; curthread->td_ru.ru_oublock++; } bp->b_iodone = bdone; bp->b_data = uio->uio_iov[i].iov_base; bp->b_bcount = uio->uio_iov[i].iov_len; bp->b_offset = uio->uio_offset; bp->b_iooffset = uio->uio_offset; bp->b_saveaddr = sa; /* Don't exceed drivers iosize limit */ if (bp->b_bcount > dev->si_iosize_max) bp->b_bcount = dev->si_iosize_max; /* * Make sure the pbuf can map the request * XXX: The pbuf has kvasize = MAXPHYS so a request * XXX: larger than MAXPHYS - PAGE_SIZE must be * XXX: page aligned or it will be fragmented. */ iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK; if ((bp->b_bcount + iolen) > bp->b_kvasize) { bp->b_bcount = bp->b_kvasize; if (iolen != 0) bp->b_bcount -= PAGE_SIZE; } bp->b_bufsize = bp->b_bcount; bp->b_blkno = btodb(bp->b_offset); if (uio->uio_segflg == UIO_USERSPACE) if (vmapbuf(bp) < 0) { error = EFAULT; goto doerror; } dev_strategy(dev, bp); if (uio->uio_rw == UIO_READ) bwait(bp, PRIBIO, "physrd"); else bwait(bp, PRIBIO, "physwr"); if (uio->uio_segflg == UIO_USERSPACE) vunmapbuf(bp); iolen = bp->b_bcount - bp->b_resid; if (iolen == 0 && !(bp->b_ioflags & BIO_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base = (char *)uio->uio_iov[i].iov_base + iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; if( bp->b_ioflags & BIO_ERROR) { error = bp->b_error; goto doerror; } } } doerror: relpbuf(bp, NULL); PRELE(curproc); return (error); }
static int ffs_rawread_main(struct vnode *vp, struct uio *uio) { int error, nerror; struct buf *bp, *nbp, *tbp; int iolen; caddr_t udata; int resid; off_t offset; udata = uio->uio_iov->iov_base; resid = uio->uio_resid; offset = uio->uio_offset; error = 0; nerror = 0; bp = NULL; nbp = NULL; while (resid > 0) { if (bp == NULL) { /* Setup first read */ /* XXX: Leave some bufs for swap */ bp = getpbuf_kva(&vp->v_mount->mnt_pbuf_count); error = ffs_rawread_readahead(vp, udata, offset, resid, bp); if (error != 0) break; if (resid > bp->b_bufsize) { /* Setup fist readahead */ /* XXX: Leave bufs for swap */ if (rawreadahead != 0) nbp = trypbuf_kva(&vp->v_mount->mnt_pbuf_count); else nbp = NULL; if (nbp != NULL) { nerror = ffs_rawread_readahead( vp, udata + bp->b_bufsize, offset + bp->b_bufsize, resid - bp->b_bufsize, nbp); if (nerror) { relpbuf(nbp, &vp->v_mount->mnt_pbuf_count); nbp = NULL; } } } } biowait(&bp->b_bio1, "rawrd"); vunmapbuf(bp); iolen = bp->b_bcount - bp->b_resid; if (iolen == 0 && (bp->b_flags & B_ERROR) == 0) { nerror = 0; /* Ignore possible beyond EOF error */ break; /* EOF */ } if ((bp->b_flags & B_ERROR) != 0) { error = bp->b_error; break; } clearbiocache(&bp->b_bio2); resid -= iolen; udata += iolen; offset += iolen; if (iolen < bp->b_bufsize) { /* Incomplete read. Try to read remaining part */ error = ffs_rawread_readahead( vp, udata, offset, bp->b_bufsize - iolen, bp); if (error != 0) break; } else if (nbp != NULL) { /* Complete read with readahead */ tbp = bp; bp = nbp; nbp = tbp; clearbiocache(&nbp->b_bio2); if (resid <= bp->b_bufsize) { /* No more readaheads */ relpbuf(nbp, &vp->v_mount->mnt_pbuf_count); nbp = NULL; } else { /* Setup next readahead */ nerror = ffs_rawread_readahead( vp, udata + bp->b_bufsize, offset + bp->b_bufsize, resid - bp->b_bufsize, nbp); if (nerror != 0) { relpbuf(nbp, &vp->v_mount->mnt_pbuf_count); nbp = NULL; } } } else if (nerror != 0) {/* Deferred Readahead error */ break; } else if (resid > 0) { /* More to read, no readahead */ error = ffs_rawread_readahead(vp, udata, offset, resid, bp); if (error != 0) break; } } if (bp != NULL) relpbuf(bp, &vp->v_mount->mnt_pbuf_count); if (nbp != NULL) { /* Run down readahead buffer */ biowait(&nbp->b_bio1, "rawrd"); vunmapbuf(nbp); relpbuf(nbp, &vp->v_mount->mnt_pbuf_count); } if (error == 0) error = nerror; uio->uio_iov->iov_base = udata; uio->uio_resid = resid; uio->uio_offset = offset; return error; }
int physio(struct cdev *dev, struct uio *uio, int ioflag) { struct buf *bp; struct cdevsw *csw; caddr_t sa; u_int iolen; int error, i, mapped; /* Keep the process UPAGES from being swapped. XXX: why ? */ PHOLD(curproc); bp = getpbuf(NULL); sa = bp->b_data; error = 0; /* XXX: sanity check */ if(dev->si_iosize_max < PAGE_SIZE) { printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; } /* * If the driver does not want I/O to be split, that means that we * need to reject any requests that will not fit into one buffer. */ if (dev->si_flags & SI_NOSPLIT && (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS || uio->uio_iovcnt > 1)) { /* * Tell the user why his I/O was rejected. */ if (uio->uio_resid > dev->si_iosize_max) uprintf("%s: request size=%zd > si_iosize_max=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, dev->si_iosize_max); if (uio->uio_resid > MAXPHYS) uprintf("%s: request size=%zd > MAXPHYS=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, MAXPHYS); if (uio->uio_iovcnt > 1) uprintf("%s: request vectors=%d > 1; " "cannot split request\n", devtoname(dev), uio->uio_iovcnt); error = EFBIG; goto doerror; } for (i = 0; i < uio->uio_iovcnt; i++) { while (uio->uio_iov[i].iov_len) { bp->b_flags = 0; if (uio->uio_rw == UIO_READ) { bp->b_iocmd = BIO_READ; curthread->td_ru.ru_inblock++; } else { bp->b_iocmd = BIO_WRITE; curthread->td_ru.ru_oublock++; } bp->b_iodone = bdone; bp->b_data = uio->uio_iov[i].iov_base; bp->b_bcount = uio->uio_iov[i].iov_len; bp->b_offset = uio->uio_offset; bp->b_iooffset = uio->uio_offset; bp->b_saveaddr = sa; /* Don't exceed drivers iosize limit */ if (bp->b_bcount > dev->si_iosize_max) bp->b_bcount = dev->si_iosize_max; /* * Make sure the pbuf can map the request * XXX: The pbuf has kvasize = MAXPHYS so a request * XXX: larger than MAXPHYS - PAGE_SIZE must be * XXX: page aligned or it will be fragmented. */ iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK; if ((bp->b_bcount + iolen) > bp->b_kvasize) { /* * This device does not want I/O to be split. */ if (dev->si_flags & SI_NOSPLIT) { uprintf("%s: request ptr %p is not " "on a page boundary; cannot split " "request\n", devtoname(dev), bp->b_data); error = EFBIG; goto doerror; } bp->b_bcount = bp->b_kvasize; if (iolen != 0) bp->b_bcount -= PAGE_SIZE; } bp->b_bufsize = bp->b_bcount; bp->b_blkno = btodb(bp->b_offset); csw = dev->si_devsw; if (uio->uio_segflg == UIO_USERSPACE) { if (dev->si_flags & SI_UNMAPPED) mapped = 0; else mapped = 1; if (vmapbuf(bp, mapped) < 0) { error = EFAULT; goto doerror; } } dev_strategy_csw(dev, csw, bp); if (uio->uio_rw == UIO_READ) bwait(bp, PRIBIO, "physrd"); else bwait(bp, PRIBIO, "physwr"); if (uio->uio_segflg == UIO_USERSPACE) vunmapbuf(bp); iolen = bp->b_bcount - bp->b_resid; if (iolen == 0 && !(bp->b_ioflags & BIO_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base = (char *)uio->uio_iov[i].iov_base + iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; if( bp->b_ioflags & BIO_ERROR) { error = bp->b_error; goto doerror; } } } doerror: relpbuf(bp, NULL); PRELE(curproc); return (error); }
int physio(dev_t dev, struct uio *uio, int ioflag) { int i; int error; int spl; caddr_t sa; off_t blockno; u_int iolen; struct buf *bp; /* Keep the process UPAGES from being swapped. XXX: why ? */ PHOLD(curproc); bp = getpbuf(NULL); sa = bp->b_data; error = bp->b_error = 0; /* XXX: sanity check */ if(dev->si_iosize_max < PAGE_SIZE) { printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; } for (i = 0; i < uio->uio_iovcnt; i++) { while (uio->uio_iov[i].iov_len) { if (uio->uio_rw == UIO_READ) bp->b_flags = B_PHYS | B_CALL | B_READ; else bp->b_flags = B_PHYS | B_CALL | B_WRITE; bp->b_dev = dev; bp->b_iodone = physwakeup; bp->b_data = uio->uio_iov[i].iov_base; bp->b_bcount = uio->uio_iov[i].iov_len; bp->b_offset = uio->uio_offset; bp->b_saveaddr = sa; /* Don't exceed drivers iosize limit */ if (bp->b_bcount > dev->si_iosize_max) bp->b_bcount = dev->si_iosize_max; /* * Make sure the pbuf can map the request * XXX: The pbuf has kvasize = MAXPHYS so a request * XXX: larger than MAXPHYS - PAGE_SIZE must be * XXX: page aligned or it will be fragmented. */ iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK; if ((bp->b_bcount + iolen) > bp->b_kvasize) { bp->b_bcount = bp->b_kvasize; if (iolen != 0) bp->b_bcount -= PAGE_SIZE; } bp->b_bufsize = bp->b_bcount; blockno = bp->b_offset >> DEV_BSHIFT; if ((daddr_t)blockno != blockno) { error = EINVAL; /* blockno overflow */ goto doerror; } bp->b_blkno = blockno; if (uio->uio_segflg == UIO_USERSPACE) { if (!useracc(bp->b_data, bp->b_bufsize, bp->b_flags & B_READ ? VM_PROT_WRITE : VM_PROT_READ)) { error = EFAULT; goto doerror; } vmapbuf(bp); } BUF_STRATEGY(bp, 0); spl = splbio(); while ((bp->b_flags & B_DONE) == 0) tsleep((caddr_t)bp, PRIBIO, "physstr", 0); splx(spl); if (uio->uio_segflg == UIO_USERSPACE) vunmapbuf(bp); iolen = bp->b_bcount - bp->b_resid; if (iolen == 0 && !(bp->b_flags & B_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base += iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; if( bp->b_flags & B_ERROR) { error = bp->b_error; goto doerror; } } } doerror: relpbuf(bp, NULL); PRELE(curproc); return (error); }
/* * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly * from the raw device to user buffers, and bypasses the buffer cache. * * Comments in brackets are from Leffler, et al.'s pseudo-code implementation. */ int physio(void (*strategy)(struct buf *), struct buf *bp, dev_t dev, int flags, void (*minphys)(struct buf *), struct uio *uio) { struct iovec *iovp; struct proc *p = curproc; int error, done, i, nobuf, s, todo; error = 0; flags &= B_READ | B_WRITE; /* Make sure we have a buffer, creating one if necessary. */ if ((nobuf = (bp == NULL)) != 0) bp = getphysbuf(); /* [raise the processor priority level to splbio;] */ s = splbio(); /* [while the buffer is marked busy] */ while (bp->b_flags & B_BUSY) { /* [mark the buffer wanted] */ bp->b_flags |= B_WANTED; /* [wait until the buffer is available] */ tsleep(bp, PRIBIO+1, "physbuf", 0); } /* Mark it busy, so nobody else will use it. */ bp->b_flags |= B_BUSY; /* [lower the priority level] */ splx(s); /* [set up the fixed part of the buffer for a transfer] */ bp->b_dev = dev; bp->b_error = 0; bp->b_proc = p; LIST_INIT(&bp->b_dep); /* * [while there are data to transfer and no I/O error] * Note that I/O errors are handled with a 'goto' at the bottom * of the 'while' loop. */ for (i = 0; i < uio->uio_iovcnt; i++) { iovp = &uio->uio_iov[i]; while (iovp->iov_len > 0) { /* * [mark the buffer busy for physical I/O] * (i.e. set B_PHYS (because it's an I/O to user * memory), and B_RAW, because B_RAW is to be * "Set by physio for raw transfers.", in addition * to the "busy" and read/write flag.) */ bp->b_flags = B_BUSY | B_PHYS | B_RAW | flags; /* [set up the buffer for a maximum-sized transfer] */ bp->b_blkno = btodb(uio->uio_offset); bp->b_data = iovp->iov_base; /* * Because iov_len is unsigned but b_bcount is signed, * an overflow is possible. Therefore bound to MAXPHYS * before calling minphys. */ if (iovp->iov_len > MAXPHYS) bp->b_bcount = MAXPHYS; else bp->b_bcount = iovp->iov_len; /* * [call minphys to bound the transfer size] * and remember the amount of data to transfer, * for later comparison. */ (*minphys)(bp); todo = bp->b_bcount; #ifdef DIAGNOSTIC if (todo < 0) panic("todo < 0; minphys broken"); if (todo > MAXPHYS) panic("todo > MAXPHYS; minphys broken"); #endif /* * [lock the part of the user address space involved * in the transfer] * Beware vmapbuf(); it clobbers b_data and * saves it in b_saveaddr. However, vunmapbuf() * restores it. */ error = uvm_vslock(p, bp->b_data, todo, (flags & B_READ) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ); if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; goto after_unlock; } vmapbuf(bp, todo); /* [call strategy to start the transfer] */ (*strategy)(bp); /* * Note that the raise/wait/lower/get error * steps below would be done by biowait(), but * we want to unlock the address space before * we lower the priority. * * [raise the priority level to splbio] */ s = splbio(); /* [wait for the transfer to complete] */ while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PRIBIO + 1, "physio", 0); /* Mark it busy again, so nobody else will use it. */ bp->b_flags |= B_BUSY; /* [lower the priority level] */ splx(s); /* * [unlock the part of the address space previously * locked] */ vunmapbuf(bp, todo); uvm_vsunlock(p, bp->b_data, todo); after_unlock: /* remember error value (save a splbio/splx pair) */ if (bp->b_flags & B_ERROR) error = (bp->b_error ? bp->b_error : EIO); /* * [deduct the transfer size from the total number * of data to transfer] */ done = bp->b_bcount - bp->b_resid; #ifdef DIAGNOSTIC if (done < 0) panic("done < 0; strategy broken"); if (done > todo) panic("done > todo; strategy broken"); #endif iovp->iov_len -= done; iovp->iov_base = (caddr_t)iovp->iov_base + done; uio->uio_offset += done; uio->uio_resid -= done; /* * Now, check for an error. * Also, handle weird end-of-disk semantics. */ if (error || done < todo) goto done; } } done: /* * [clean up the state of the buffer] * Remember if somebody wants it, so we can wake them up below. * Also, if we had to steal it, give it back. */ s = splbio(); bp->b_flags &= ~(B_BUSY | B_PHYS | B_RAW); if (nobuf) putphysbuf(bp); else { /* * [if another process is waiting for the raw I/O buffer, * wake up processes waiting to do physical I/O] */ if (bp->b_flags & B_WANTED) { bp->b_flags &= ~B_WANTED; wakeup(bp); } } splx(s); return (error); }