static int phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) { int i, s; s = splvm(); /* * Fill as many pages as vm_fault has allocated for us. */ for (i = 0; i < count; i++) { if ((m[i]->flags & PG_ZERO) == 0) pmap_zero_page(m[i]); vm_page_flag_set(m[i], PG_ZERO); /* Switch off pv_entries */ vm_page_lock_queues(); vm_page_unmanage(m[i]); vm_page_unlock_queues(); m[i]->valid = VM_PAGE_BITS_ALL; m[i]->dirty = 0; /* The requested page must remain busy, the others not. */ if (reqpage != i) { vm_page_flag_clear(m[i], PG_BUSY); m[i]->busy = 0; } } splx(s); return (VM_PAGER_OK); }
/* * mincore system call handler * * mincore_args(const void *addr, size_t len, char *vec) * * No requirements */ int sys_mincore(struct mincore_args *uap) { struct proc *p = curproc; vm_offset_t addr, first_addr; vm_offset_t end, cend; pmap_t pmap; vm_map_t map; char *vec; int error; int vecindex, lastvecindex; vm_map_entry_t current; vm_map_entry_t entry; int mincoreinfo; unsigned int timestamp; /* * Make sure that the addresses presented are valid for user * mode. */ first_addr = addr = trunc_page((vm_offset_t) uap->addr); end = addr + (vm_size_t)round_page(uap->len); if (end < addr) return (EINVAL); if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS) return (EINVAL); /* * Address of byte vector */ vec = uap->vec; map = &p->p_vmspace->vm_map; pmap = vmspace_pmap(p->p_vmspace); lwkt_gettoken(&map->token); vm_map_lock_read(map); RestartScan: timestamp = map->timestamp; if (!vm_map_lookup_entry(map, addr, &entry)) entry = entry->next; /* * Do this on a map entry basis so that if the pages are not * in the current processes address space, we can easily look * up the pages elsewhere. */ lastvecindex = -1; for(current = entry; (current != &map->header) && (current->start < end); current = current->next) { /* * ignore submaps (for now) or null objects */ if (current->maptype != VM_MAPTYPE_NORMAL && current->maptype != VM_MAPTYPE_VPAGETABLE) { continue; } if (current->object.vm_object == NULL) continue; /* * limit this scan to the current map entry and the * limits for the mincore call */ if (addr < current->start) addr = current->start; cend = current->end; if (cend > end) cend = end; /* * scan this entry one page at a time */ while (addr < cend) { /* * Check pmap first, it is likely faster, also * it can provide info as to whether we are the * one referencing or modifying the page. * * If we have to check the VM object, only mess * around with normal maps. Do not mess around * with virtual page tables (XXX). */ mincoreinfo = pmap_mincore(pmap, addr); if (mincoreinfo == 0 && current->maptype == VM_MAPTYPE_NORMAL) { vm_pindex_t pindex; vm_ooffset_t offset; vm_page_t m; /* * calculate the page index into the object */ offset = current->offset + (addr - current->start); pindex = OFF_TO_IDX(offset); /* * if the page is resident, then gather * information about it. spl protection is * required to maintain the object * association. And XXX what if the page is * busy? What's the deal with that? * * XXX vm_token - legacy for pmap_ts_referenced * in i386 and vkernel pmap code. */ lwkt_gettoken(&vm_token); vm_object_hold(current->object.vm_object); m = vm_page_lookup(current->object.vm_object, pindex); if (m && m->valid) { mincoreinfo = MINCORE_INCORE; if (m->dirty || pmap_is_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(m)) { vm_page_flag_set(m, PG_REFERENCED); mincoreinfo |= MINCORE_REFERENCED_OTHER; } } vm_object_drop(current->object.vm_object); lwkt_reltoken(&vm_token); } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * calculate index into user supplied byte vector */ vecindex = OFF_TO_IDX(addr - first_addr); /* * If we have skipped map entries, we need to make sure that * the byte vector is zeroed for those skipped entries. */ while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * Pass the page information to the user */ error = subyte( vec + vecindex, mincoreinfo); if (error) { error = EFAULT; goto done; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; lastvecindex = vecindex; addr += PAGE_SIZE; } } /* * subyte may page fault. In case it needs to modify * the map, we release the lock. */ vm_map_unlock_read(map); /* * Zero the last entries in the byte vector. */ vecindex = OFF_TO_IDX(end - first_addr); while((lastvecindex + 1) < vecindex) { error = subyte( vec + lastvecindex, 0); if (error) { error = EFAULT; goto done; } ++lastvecindex; } /* * If the map has changed, due to the subyte, the previous * output may be invalid. */ vm_map_lock_read(map); if (timestamp != map->timestamp) goto RestartScan; vm_map_unlock_read(map); error = 0; done: lwkt_reltoken(&map->token); return (error); }
/* * A VFS can call this function to try to dispose of a read request * directly from the VM system, pretty much bypassing almost all VFS * overhead except for atime updates. * * If 0 is returned some or all of the uio was handled. The caller must * check the uio and handle the remainder. * * The caller must fail on a non-zero error. */ int vop_helper_read_shortcut(struct vop_read_args *ap) { struct vnode *vp; struct uio *uio; struct lwbuf *lwb; struct lwbuf lwb_cache; vm_object_t obj; vm_page_t m; int offset; int n; int error; vp = ap->a_vp; uio = ap->a_uio; /* * We can't short-cut if there is no VM object or this is a special * UIO_NOCOPY read (typically from VOP_STRATEGY()). We also can't * do this if we cannot extract the filesize from the vnode. */ if (vm_read_shortcut_enable == 0) return(0); if (vp->v_object == NULL || uio->uio_segflg == UIO_NOCOPY) return(0); if (vp->v_filesize == NOOFFSET) return(0); if (uio->uio_resid == 0) return(0); /* * Iterate the uio on a page-by-page basis * * XXX can we leave the object held shared during the uiomove()? */ ++vm_read_shortcut_count; obj = vp->v_object; vm_object_hold_shared(obj); error = 0; while (uio->uio_resid && error == 0) { offset = (int)uio->uio_offset & PAGE_MASK; n = PAGE_SIZE - offset; if (n > uio->uio_resid) n = uio->uio_resid; if (vp->v_filesize < uio->uio_offset) break; if (uio->uio_offset + n > vp->v_filesize) n = vp->v_filesize - uio->uio_offset; if (n == 0) break; /* hit EOF */ m = vm_page_lookup_busy_try(obj, OFF_TO_IDX(uio->uio_offset), FALSE, &error); if (error || m == NULL) { ++vm_read_shortcut_failed; error = 0; break; } if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { ++vm_read_shortcut_failed; vm_page_wakeup(m); break; } lwb = lwbuf_alloc(m, &lwb_cache); /* * Use a no-fault uiomove() to avoid deadlocking against * our VM object (which could livelock on the same object * due to shared-vs-exclusive), or deadlocking against * our busied page. Returns EFAULT on any fault which * winds up diving a vnode. */ error = uiomove_nofault((char *)lwbuf_kva(lwb) + offset, n, uio); vm_page_flag_set(m, PG_REFERENCED); lwbuf_free(lwb); vm_page_wakeup(m); } vm_object_drop(obj); /* * Ignore EFAULT since we used uiomove_nofault(), causes caller * to fall-back to normal code for this case. */ if (error == EFAULT) error = 0; return (error); }
/* * MPSAFE thread */ static void vm_pagezero(void *arg) { vm_page_t m = NULL; struct lwbuf *lwb = NULL; struct lwbuf lwb_cache; enum zeroidle_state state = STATE_IDLE; char *pg = NULL; int npages = 0; int sleep_time; int i = 0; int cpu = (int)(intptr_t)arg; int zero_state = 0; /* * Adjust thread parameters before entering our loop. The thread * is started with the MP lock held and with normal kernel thread * priority. * * Also put us on the last cpu for now. * * For now leave the MP lock held, the VM routines cannot be called * with it released until tokenization is finished. */ lwkt_setpri_self(TDPRI_IDLE_WORK); lwkt_setcpu_self(globaldata_find(cpu)); sleep_time = DEFAULT_SLEEP_TIME; /* * Loop forever */ for (;;) { int zero_count; switch(state) { case STATE_IDLE: /* * Wait for work. */ tsleep(&zero_state, 0, "pgzero", sleep_time); if (vm_page_zero_check(&zero_count, &zero_state)) npages = idlezero_rate / 10; sleep_time = vm_page_zero_time(zero_count); if (npages) state = STATE_GET_PAGE; /* Fallthrough */ break; case STATE_GET_PAGE: /* * Acquire page to zero */ if (--npages == 0) { state = STATE_IDLE; } else { m = vm_page_free_fromq_fast(); if (m == NULL) { state = STATE_IDLE; } else { state = STATE_ZERO_PAGE; lwb = lwbuf_alloc(m, &lwb_cache); pg = (char *)lwbuf_kva(lwb); i = 0; } } break; case STATE_ZERO_PAGE: /* * Zero-out the page */ while (i < PAGE_SIZE) { if (idlezero_nocache == 1) bzeront(&pg[i], IDLEZERO_RUN); else bzero(&pg[i], IDLEZERO_RUN); i += IDLEZERO_RUN; lwkt_yield(); } state = STATE_RELEASE_PAGE; break; case STATE_RELEASE_PAGE: lwbuf_free(lwb); vm_page_flag_set(m, PG_ZERO); vm_page_free_toq(m); state = STATE_GET_PAGE; ++idlezero_count; /* non-locked, SMP race ok */ break; } lwkt_yield(); } }