static void physio_done(struct work *wk, void *dummy) { struct buf *bp = (void *)wk; size_t todo = bp->b_bufsize; size_t done = bp->b_bcount - bp->b_resid; struct physio_stat *ps = bp->b_private; bool is_iobuf; KASSERT(&bp->b_work == wk); KASSERT(bp->b_bcount <= todo); KASSERT(bp->b_resid <= bp->b_bcount); KASSERT((bp->b_flags & B_PHYS) != 0); KASSERT(dummy == NULL); vunmapbuf(bp, todo); uvm_vsunlock(bp->b_proc->p_vmspace, bp->b_data, todo); mutex_enter(&ps->ps_lock); is_iobuf = (bp != ps->ps_orig_bp); if (__predict_false(done != todo)) { off_t endoffset = dbtob(bp->b_blkno) + done; /* * we got an error or hit EOM. * * we only care about the first one. * ie. the one at the lowest offset. */ KASSERT(ps->ps_endoffset != endoffset); DPRINTF(("%s: error=%d at %" PRIu64 " - %" PRIu64 ", blkno=%" PRIu64 ", bcount=%d, flags=0x%x\n", __func__, bp->b_error, dbtob(bp->b_blkno), endoffset, bp->b_blkno, bp->b_bcount, bp->b_flags)); if (ps->ps_endoffset == -1 || endoffset < ps->ps_endoffset) { DPRINTF(("%s: ps=%p, error %d -> %d, endoff %" PRIu64 " -> %" PRIu64 "\n", __func__, ps, ps->ps_error, bp->b_error, ps->ps_endoffset, endoffset)); ps->ps_endoffset = endoffset; ps->ps_error = bp->b_error; } ps->ps_failed++; } else { KASSERT(bp->b_error == 0); } ps->ps_running--; cv_signal(&ps->ps_cv); mutex_exit(&ps->ps_lock); if (is_iobuf) putiobuf(bp); }
/* ARGSUSED */ int sys_mincore(struct lwp *l, const struct sys_mincore_args *uap, register_t *retval) { /* { syscallarg(void *) addr; syscallarg(size_t) len; syscallarg(char *) vec; } */ struct proc *p = l->l_proc; struct vm_page *pg; char *vec, pgi; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; struct vm_map_entry *entry; vaddr_t start, end, lim; struct vm_map *map; vsize_t len; int error = 0, npgs; map = &p->p_vmspace->vm_map; start = (vaddr_t)SCARG(uap, addr); len = SCARG(uap, len); vec = SCARG(uap, vec); if (start & PAGE_MASK) return EINVAL; len = round_page(len); end = start + len; if (end <= start) return EINVAL; /* * Lock down vec, so our returned status isn't outdated by * storing the status byte for a page. */ npgs = len >> PAGE_SHIFT; error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE); if (error) { return error; } vm_map_lock_read(map); if (uvm_map_lookup_entry(map, start, &entry) == false) { error = ENOMEM; goto out; } for (/* nothing */; entry != &map->header && entry->start < end; entry = entry->next) { KASSERT(!UVM_ET_ISSUBMAP(entry)); KASSERT(start >= entry->start); /* Make sure there are no holes. */ if (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end)) { error = ENOMEM; goto out; } lim = end < entry->end ? end : entry->end; /* * Special case for objects with no "real" pages. Those * are always considered resident (mapped devices). */ if (UVM_ET_ISOBJ(entry)) { KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) { for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) subyte(vec, 1); continue; } } amap = entry->aref.ar_amap; /* upper layer */ uobj = entry->object.uvm_obj; /* lower layer */ if (amap != NULL) amap_lock(amap); if (uobj != NULL) mutex_enter(uobj->vmobjlock); for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { pgi = 0; if (amap != NULL) { /* Check the upper layer first. */ anon = amap_lookup(&entry->aref, start - entry->start); /* Don't need to lock anon here. */ if (anon != NULL && anon->an_page != NULL) { /* * Anon has the page for this entry * offset. */ pgi = 1; } } if (uobj != NULL && pgi == 0) { /* Check the lower layer. */ pg = uvm_pagelookup(uobj, entry->offset + (start - entry->start)); if (pg != NULL) { /* * Object has the page for this entry * offset. */ pgi = 1; } } (void) subyte(vec, pgi); } if (uobj != NULL) mutex_exit(uobj->vmobjlock); if (amap != NULL) amap_unlock(amap); } out: vm_map_unlock_read(map); uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs); return error; }
/* * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly * from the raw device to user buffers, and bypasses the buffer cache. * * Comments in brackets are from Leffler, et al.'s pseudo-code implementation. */ int physio(void (*strategy)(struct buf *), struct buf *bp, dev_t dev, int flags, void (*minphys)(struct buf *), struct uio *uio) { struct iovec *iovp; struct proc *p = curproc; int error, done, i, nobuf, s, todo; error = 0; flags &= B_READ | B_WRITE; /* Make sure we have a buffer, creating one if necessary. */ if ((nobuf = (bp == NULL)) != 0) bp = getphysbuf(); /* [raise the processor priority level to splbio;] */ s = splbio(); /* [while the buffer is marked busy] */ while (bp->b_flags & B_BUSY) { /* [mark the buffer wanted] */ bp->b_flags |= B_WANTED; /* [wait until the buffer is available] */ tsleep(bp, PRIBIO+1, "physbuf", 0); } /* Mark it busy, so nobody else will use it. */ bp->b_flags |= B_BUSY; /* [lower the priority level] */ splx(s); /* [set up the fixed part of the buffer for a transfer] */ bp->b_dev = dev; bp->b_error = 0; bp->b_proc = p; LIST_INIT(&bp->b_dep); /* * [while there are data to transfer and no I/O error] * Note that I/O errors are handled with a 'goto' at the bottom * of the 'while' loop. */ for (i = 0; i < uio->uio_iovcnt; i++) { iovp = &uio->uio_iov[i]; while (iovp->iov_len > 0) { /* * [mark the buffer busy for physical I/O] * (i.e. set B_PHYS (because it's an I/O to user * memory), and B_RAW, because B_RAW is to be * "Set by physio for raw transfers.", in addition * to the "busy" and read/write flag.) */ bp->b_flags = B_BUSY | B_PHYS | B_RAW | flags; /* [set up the buffer for a maximum-sized transfer] */ bp->b_blkno = btodb(uio->uio_offset); bp->b_data = iovp->iov_base; /* * Because iov_len is unsigned but b_bcount is signed, * an overflow is possible. Therefore bound to MAXPHYS * before calling minphys. */ if (iovp->iov_len > MAXPHYS) bp->b_bcount = MAXPHYS; else bp->b_bcount = iovp->iov_len; /* * [call minphys to bound the transfer size] * and remember the amount of data to transfer, * for later comparison. */ (*minphys)(bp); todo = bp->b_bcount; #ifdef DIAGNOSTIC if (todo < 0) panic("todo < 0; minphys broken"); if (todo > MAXPHYS) panic("todo > MAXPHYS; minphys broken"); #endif /* * [lock the part of the user address space involved * in the transfer] * Beware vmapbuf(); it clobbers b_data and * saves it in b_saveaddr. However, vunmapbuf() * restores it. */ error = uvm_vslock(p, bp->b_data, todo, (flags & B_READ) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ); if (error) { bp->b_flags |= B_ERROR; bp->b_error = error; goto after_unlock; } vmapbuf(bp, todo); /* [call strategy to start the transfer] */ (*strategy)(bp); /* * Note that the raise/wait/lower/get error * steps below would be done by biowait(), but * we want to unlock the address space before * we lower the priority. * * [raise the priority level to splbio] */ s = splbio(); /* [wait for the transfer to complete] */ while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PRIBIO + 1, "physio", 0); /* Mark it busy again, so nobody else will use it. */ bp->b_flags |= B_BUSY; /* [lower the priority level] */ splx(s); /* * [unlock the part of the address space previously * locked] */ vunmapbuf(bp, todo); uvm_vsunlock(p, bp->b_data, todo); after_unlock: /* remember error value (save a splbio/splx pair) */ if (bp->b_flags & B_ERROR) error = (bp->b_error ? bp->b_error : EIO); /* * [deduct the transfer size from the total number * of data to transfer] */ done = bp->b_bcount - bp->b_resid; #ifdef DIAGNOSTIC if (done < 0) panic("done < 0; strategy broken"); if (done > todo) panic("done > todo; strategy broken"); #endif iovp->iov_len -= done; iovp->iov_base = (caddr_t)iovp->iov_base + done; uio->uio_offset += done; uio->uio_resid -= done; /* * Now, check for an error. * Also, handle weird end-of-disk semantics. */ if (error || done < todo) goto done; } } done: /* * [clean up the state of the buffer] * Remember if somebody wants it, so we can wake them up below. * Also, if we had to steal it, give it back. */ s = splbio(); bp->b_flags &= ~(B_BUSY | B_PHYS | B_RAW); if (nobuf) putphysbuf(bp); else { /* * [if another process is waiting for the raw I/O buffer, * wake up processes waiting to do physical I/O] */ if (bp->b_flags & B_WANTED) { bp->b_flags &= ~B_WANTED; wakeup(bp); } } splx(s); return (error); }
/* ARGSUSED */ int sys_mincore(struct proc *p, void *v, register_t *retval) { struct sys_mincore_args /* { syscallarg(void *) addr; syscallarg(size_t) len; syscallarg(char *) vec; } */ *uap = v; vm_page_t m; char *vec, pgi; struct uvm_object *uobj; struct vm_amap *amap; struct vm_anon *anon; vm_map_entry_t entry; vaddr_t start, end, lim; vm_map_t map; vsize_t len, npgs; int error = 0; map = &p->p_vmspace->vm_map; start = (vaddr_t)SCARG(uap, addr); len = SCARG(uap, len); vec = SCARG(uap, vec); if (start & PAGE_MASK) return (EINVAL); len = round_page(len); end = start + len; if (end <= start) return (EINVAL); npgs = len >> PAGE_SHIFT; /* * Lock down vec, so our returned status isn't outdated by * storing the status byte for a page. */ if ((error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE)) != 0) return (error); vm_map_lock_read(map); if (uvm_map_lookup_entry(map, start, &entry) == FALSE) { error = ENOMEM; goto out; } for (/* nothing */; entry != &map->header && entry->start < end; entry = entry->next) { KASSERT(!UVM_ET_ISSUBMAP(entry)); KASSERT(start >= entry->start); /* Make sure there are no holes. */ if (entry->end < end && (entry->next == &map->header || entry->next->start > entry->end)) { error = ENOMEM; goto out; } lim = end < entry->end ? end : entry->end; /* * Special case for objects with no "real" pages. Those * are always considered resident (mapped devices). */ if (UVM_ET_ISOBJ(entry)) { KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)); if (entry->object.uvm_obj->pgops->pgo_releasepg == NULL) { pgi = 1; for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) copyout(&pgi, vec, sizeof(char)); continue; } } amap = entry->aref.ar_amap; /* top layer */ uobj = entry->object.uvm_obj; /* bottom layer */ if (uobj != NULL) simple_lock(&uobj->vmobjlock); for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) { pgi = 0; if (amap != NULL) { /* Check the top layer first. */ anon = amap_lookup(&entry->aref, start - entry->start); /* Don't need to lock anon here. */ if (anon != NULL && anon->an_page != NULL) { /* * Anon has the page for this entry * offset. */ pgi = 1; } } if (uobj != NULL && pgi == 0) { /* Check the bottom layer. */ m = uvm_pagelookup(uobj, entry->offset + (start - entry->start)); if (m != NULL) { /* * Object has the page for this entry * offset. */ pgi = 1; } } copyout(&pgi, vec, sizeof(char)); } if (uobj != NULL) simple_unlock(&uobj->vmobjlock); } out: vm_map_unlock_read(map); uvm_vsunlock(p, SCARG(uap, vec), npgs); return (error); }