/* Try to invalidate pages, for "fs flush" or "fs flushv"; or * try to free pages, when deleting a file. * * Locking: the vcache entry's lock is held. It may be dropped and * re-obtained. * * Since we drop and re-obtain the lock, we can't guarantee that there won't * be some pages around when we return, newly created by concurrent activity. */ void osi_VM_TryToSmush(struct vcache *avc, afs_ucred_t *acred, int sync) { struct vnode *vp; int tries, code; int islocked; vp = AFSTOV(avc); VI_LOCK(vp); if (vp->v_iflag & VI_DOOMED) { VI_UNLOCK(vp); return; } VI_UNLOCK(vp); islocked = islocked_vnode(vp); if (islocked == LK_EXCLOTHER) panic("Trying to Smush over someone else's lock"); else if (islocked == LK_SHARED) { afs_warn("Trying to Smush with a shared lock"); lock_vnode(vp, LK_UPGRADE); } else if (!islocked) lock_vnode(vp, LK_EXCLUSIVE); if (vp->v_bufobj.bo_object != NULL) { AFS_VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); /* * Do we really want OBJPC_SYNC? OBJPC_INVAL would be * faster, if invalidation is really what we are being * asked to do. (It would make more sense, too, since * otherwise this function is practically identical to * osi_VM_StoreAllSegments().) -GAW */ /* * Dunno. We no longer resemble osi_VM_StoreAllSegments, * though maybe that's wrong, now. And OBJPC_SYNC is the * common thing in 70 file systems, it seems. Matt. */ vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); AFS_VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); } tries = 5; code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0); while (code && (tries > 0)) { afs_warn("TryToSmush retrying vinvalbuf"); code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0); --tries; } if (islocked == LK_SHARED) lock_vnode(vp, LK_DOWNGRADE); else if (!islocked) unlock_vnode(vp); }
/* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int fuse_io_invalbuf(struct vnode *vp, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); int error = 0; if (vp->v_iflag & VI_DOOMED) return 0; ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); while (fvdat->flag & FN_FLUSHINPROG) { struct proc *p = td->td_proc; if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) return EIO; fvdat->flag |= FN_FLUSHWANT; tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); error = 0; if (p != NULL) { PROC_LOCK(p); if (SIGNOTEMPTY(p->p_siglist) || SIGNOTEMPTY(td->td_siglist)) error = EINTR; PROC_UNLOCK(p); } if (error == EINTR) return EINTR; } fvdat->flag |= FN_FLUSHINPROG; if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); while (error) { if (error == ERESTART || error == EINTR) { fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return EINTR; } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); } fvdat->flag &= ~FN_FLUSHINPROG; if (fvdat->flag & FN_FLUSHWANT) { fvdat->flag &= ~FN_FLUSHWANT; wakeup(&fvdat->flag); } return (error); }
int ncl_inactive(struct vop_inactive_args *ap) { struct nfsnode *np; struct sillyrename *sp; struct vnode *vp = ap->a_vp; boolean_t retv; np = VTONFS(vp); if (NFS_ISV4(vp) && vp->v_type == VREG) { /* * Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4 * Close operations are delayed until now. Any dirty * buffers/pages must be flushed before the close, so that the * stateid is available for the writes. */ if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); retv = vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } else retv = TRUE; if (retv == TRUE) { (void)ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0); (void)nfsrpc_close(vp, 1, ap->a_td); } } mtx_lock(&np->n_mtx); if (vp->v_type != VDIR) { sp = np->n_sillyrename; np->n_sillyrename = NULL; } else sp = NULL; if (sp) { mtx_unlock(&np->n_mtx); (void) ncl_vinvalbuf(vp, 0, ap->a_td, 1); /* * Remove the silly file that was rename'd earlier */ ncl_removeit(sp, vp); crfree(sp->s_cred); TASK_INIT(&sp->s_task, 0, nfs_freesillyrename, sp); taskqueue_enqueue(taskqueue_thread, &sp->s_task); mtx_lock(&np->n_mtx); } np->n_flag &= NMODIFIED; mtx_unlock(&np->n_mtx); return (0); }
/* Try to invalidate pages, for "fs flush" or "fs flushv"; or * try to free pages, when deleting a file. * * Locking: the vcache entry's lock is held. It may be dropped and * re-obtained. * * Since we drop and re-obtain the lock, we can't guarantee that there won't * be some pages around when we return, newly created by concurrent activity. */ void osi_VM_TryToSmush(struct vcache *avc, afs_ucred_t *acred, int sync) { struct vnode *vp; int tries, code; SPLVAR; vp = AFSTOV(avc); if (vp->v_iflag & VI_DOOMED) { USERPRI; return; } if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_LOCK(vp->v_bufobj.bo_object); /* * Do we really want OBJPC_SYNC? OBJPC_INVAL would be * faster, if invalidation is really what we are being * asked to do. (It would make more sense, too, since * otherwise this function is practically identical to * osi_VM_StoreAllSegments().) -GAW */ /* * Dunno. We no longer resemble osi_VM_StoreAllSegments, * though maybe that's wrong, now. And OBJPC_SYNC is the * common thing in 70 file systems, it seems. Matt. */ vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object); } tries = 5; code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0); while (code && (tries > 0)) { code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0); --tries; } USERPRI; }
/* * vm_contig_pg_clean: * * Do a thorough cleanup of the specified 'queue', which can be either * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not * marked dirty, it is shoved into the page cache, provided no one has * currently aqcuired it, otherwise localized action per object type * is taken for cleanup: * * In the OBJT_VNODE case, the whole page range is cleaned up * using the vm_object_page_clean() routine, by specyfing a * start and end of '0'. * * Otherwise if the object is of any other type, the generic * pageout (daemon) flush routine is invoked. * * The caller must hold vm_token. */ static int vm_contig_pg_clean(int queue) { vm_object_t object; vm_page_t m, m_tmp, next; ASSERT_LWKT_TOKEN_HELD(&vm_token); for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) { KASSERT(m->queue == queue, ("vm_contig_clean: page %p's queue is not %d", m, queue)); next = TAILQ_NEXT(m, pageq); if (m->flags & PG_MARKER) continue; if (vm_page_sleep_busy(m, TRUE, "vpctw0")) return (TRUE); vm_page_test_dirty(m); if (m->dirty) { object = m->object; if (object->type == OBJT_VNODE) { vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY); vm_object_page_clean(object, 0, 0, OBJPC_SYNC); vn_unlock(((struct vnode *)object->handle)); return (TRUE); } else if (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT) { m_tmp = m; vm_pageout_flush(&m_tmp, 1, 0); return (TRUE); } } KKASSERT(m->busy == 0); if (m->dirty == 0 && m->hold_count == 0) { vm_page_busy(m); vm_page_cache(m); } } return (FALSE); }
/* Try to store pages to cache, in order to store a file back to the server. * * Locking: the vcache entry's lock is held. It will usually be dropped and * re-obtained. */ void osi_VM_StoreAllSegments(struct vcache *avc) { struct vnode *vp; struct vm_object *obj; int anyio, tries; ReleaseWriteLock(&avc->lock); AFS_GUNLOCK(); tries = 5; vp = AFSTOV(avc); /* * I don't understand this. Why not just call vm_object_page_clean() * and be done with it? I particularly don't understand why we're calling * vget() here. Is there some reason to believe that the vnode might * be being recycled at this point? I don't think there's any need for * this loop, either -- if we keep the vnode locked all the time, * that and the object lock will prevent any new pages from appearing. * The loop is what causes the race condition. -GAW */ do { anyio = 0; obj = vp->v_object; if (obj != NULL && obj->flags & OBJ_MIGHTBEDIRTY) { if (!vget(vp, LK_EXCLUSIVE | LK_RETRY, curthread)) { obj = vp->v_object; if (obj != NULL) { AFS_VM_OBJECT_WLOCK(obj); vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); AFS_VM_OBJECT_WUNLOCK(obj); anyio = 1; } vput(vp); } } } while (anyio && (--tries > 0)); AFS_GLOCK(); ObtainWriteLock(&avc->lock, 94); }
static int ffs_rawread_sync(struct vnode *vp) { int error; /* * Check for dirty mmap, pending writes and dirty buffers */ lwkt_gettoken(&vp->v_token); if (bio_track_active(&vp->v_track_write) || !RB_EMPTY(&vp->v_rbdirty_tree) || (vp->v_flag & VOBJDIRTY) != 0) { /* Attempt to msync mmap() regions to clean dirty mmap */ if ((vp->v_flag & VOBJDIRTY) != 0) { struct vm_object *obj; if ((obj = vp->v_object) != NULL) vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); } /* Wait for pending writes to complete */ error = bio_track_wait(&vp->v_track_write, 0, 0); if (error != 0) { goto done; } /* Flush dirty buffers */ if (!RB_EMPTY(&vp->v_rbdirty_tree)) { if ((error = VOP_FSYNC(vp, MNT_WAIT, 0)) != 0) { goto done; } if (bio_track_active(&vp->v_track_write) || !RB_EMPTY(&vp->v_rbdirty_tree)) panic("ffs_rawread_sync: dirty bufs"); } } else { error = 0; } done: lwkt_reltoken(&vp->v_token); return error; }
int ncl_inactive(struct vop_inactive_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np; boolean_t retv; if (NFS_ISV4(vp) && vp->v_type == VREG) { /* * Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4 * Close operations are delayed until now. Any dirty * buffers/pages must be flushed before the close, so that the * stateid is available for the writes. */ if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); retv = vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_object); } else retv = TRUE; if (retv == TRUE) { (void)ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0); (void)nfsrpc_close(vp, 1, ap->a_td); } } np = VTONFS(vp); mtx_lock(&np->n_mtx); ncl_releasesillyrename(vp, ap->a_td); /* * NMODIFIED means that there might be dirty/stale buffers * associated with the NFS vnode. None of the other flags are * meaningful after the vnode is unused. */ np->n_flag &= NMODIFIED; mtx_unlock(&np->n_mtx); return (0); }
static void mac_proc_vm_revoke_recurse(struct thread *td, struct ucred *cred, struct vm_map *map) { vm_map_entry_t vme; int vfslocked, result; vm_prot_t revokeperms; vm_object_t backing_object, object; vm_ooffset_t offset; struct vnode *vp; struct mount *mp; if (!mac_mmap_revocation) return; vm_map_lock(map); for (vme = map->header.next; vme != &map->header; vme = vme->next) { if (vme->eflags & MAP_ENTRY_IS_SUB_MAP) { mac_proc_vm_revoke_recurse(td, cred, vme->object.sub_map); continue; } /* * Skip over entries that obviously are not shared. */ if (vme->eflags & (MAP_ENTRY_COW | MAP_ENTRY_NOSYNC) || !vme->max_protection) continue; /* * Drill down to the deepest backing object. */ offset = vme->offset; object = vme->object.vm_object; if (object == NULL) continue; VM_OBJECT_LOCK(object); while ((backing_object = object->backing_object) != NULL) { VM_OBJECT_LOCK(backing_object); offset += object->backing_object_offset; VM_OBJECT_UNLOCK(object); object = backing_object; } VM_OBJECT_UNLOCK(object); /* * At the moment, vm_maps and objects aren't considered by * the MAC system, so only things with backing by a normal * object (read: vnodes) are checked. */ if (object->type != OBJT_VNODE) continue; vp = (struct vnode *)object->handle; vfslocked = VFS_LOCK_GIANT(vp->v_mount); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); result = vme->max_protection; mac_vnode_check_mmap_downgrade(cred, vp, &result); VOP_UNLOCK(vp, 0); /* * Find out what maximum protection we may be allowing now * but a policy needs to get removed. */ revokeperms = vme->max_protection & ~result; if (!revokeperms) { VFS_UNLOCK_GIANT(vfslocked); continue; } printf("pid %ld: revoking %s perms from %#lx:%ld " "(max %s/cur %s)\n", (long)td->td_proc->p_pid, prot2str(revokeperms), (u_long)vme->start, (long)(vme->end - vme->start), prot2str(vme->max_protection), prot2str(vme->protection)); /* * This is the really simple case: if a map has more * max_protection than is allowed, but it's not being * actually used (that is, the current protection is still * allowed), we can just wipe it out and do nothing more. */ if ((vme->protection & revokeperms) == 0) { vme->max_protection -= revokeperms; } else { if (revokeperms & VM_PROT_WRITE) { /* * In the more complicated case, flush out all * pending changes to the object then turn it * copy-on-write. */ vm_object_reference(object); (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VM_OBJECT_LOCK(object); vm_object_page_clean(object, offset, offset + vme->end - vme->start, OBJPC_SYNC); VM_OBJECT_UNLOCK(object); VOP_UNLOCK(vp, 0); vn_finished_write(mp); vm_object_deallocate(object); /* * Why bother if there's no read permissions * anymore? For the rest, we need to leave * the write permissions on for COW, or * remove them entirely if configured to. */ if (!mac_mmap_revocation_via_cow) { vme->max_protection &= ~VM_PROT_WRITE; vme->protection &= ~VM_PROT_WRITE; } if ((revokeperms & VM_PROT_READ) == 0) vme->eflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY; } if (revokeperms & VM_PROT_EXECUTE) { vme->max_protection &= ~VM_PROT_EXECUTE; vme->protection &= ~VM_PROT_EXECUTE; } if (revokeperms & VM_PROT_READ) { vme->max_protection = 0; vme->protection = 0; } pmap_protect(map->pmap, vme->start, vme->end, vme->protection & ~revokeperms); vm_map_simplify_entry(map, vme); } VFS_UNLOCK_GIANT(vfslocked); } vm_map_unlock(map); }
/* * vm_contig_pg_clean: * * Do a thorough cleanup of the specified 'queue', which can be either * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough. If the page is not * marked dirty, it is shoved into the page cache, provided no one has * currently aqcuired it, otherwise localized action per object type * is taken for cleanup: * * In the OBJT_VNODE case, the whole page range is cleaned up * using the vm_object_page_clean() routine, by specyfing a * start and end of '0'. * * Otherwise if the object is of any other type, the generic * pageout (daemon) flush routine is invoked. */ static void vm_contig_pg_clean(int queue, int count) { vm_object_t object; vm_page_t m, m_tmp; struct vm_page marker; struct vpgqueues *pq = &vm_page_queues[queue]; /* * Setup a local marker */ bzero(&marker, sizeof(marker)); marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER; marker.queue = queue; marker.wire_count = 1; vm_page_queues_spin_lock(queue); TAILQ_INSERT_HEAD(&pq->pl, &marker, pageq); vm_page_queues_spin_unlock(queue); /* * Iterate the queue. Note that the vm_page spinlock must be * acquired before the pageq spinlock so it's easiest to simply * not hold it in the loop iteration. */ while (count-- > 0 && (m = TAILQ_NEXT(&marker, pageq)) != NULL) { vm_page_and_queue_spin_lock(m); if (m != TAILQ_NEXT(&marker, pageq)) { vm_page_and_queue_spin_unlock(m); ++count; continue; } KKASSERT(m->queue == queue); TAILQ_REMOVE(&pq->pl, &marker, pageq); TAILQ_INSERT_AFTER(&pq->pl, m, &marker, pageq); if (m->flags & PG_MARKER) { vm_page_and_queue_spin_unlock(m); continue; } if (vm_page_busy_try(m, TRUE)) { vm_page_and_queue_spin_unlock(m); continue; } vm_page_and_queue_spin_unlock(m); /* * We've successfully busied the page */ if (m->queue - m->pc != queue) { vm_page_wakeup(m); continue; } if (m->wire_count || m->hold_count) { vm_page_wakeup(m); continue; } if ((object = m->object) == NULL) { vm_page_wakeup(m); continue; } vm_page_test_dirty(m); if (m->dirty || (m->flags & PG_NEED_COMMIT)) { vm_object_hold(object); KKASSERT(m->object == object); if (object->type == OBJT_VNODE) { vm_page_wakeup(m); vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY); vm_object_page_clean(object, 0, 0, OBJPC_SYNC); vn_unlock(((struct vnode *)object->handle)); } else if (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT) { m_tmp = m; vm_pageout_flush(&m_tmp, 1, 0); } else { vm_page_wakeup(m); } vm_object_drop(object); } else if (m->hold_count == 0) { vm_page_cache(m); } else { vm_page_wakeup(m); } } /* * Scrap our local marker */ vm_page_queues_spin_lock(queue); TAILQ_REMOVE(&pq->pl, &marker, pageq); vm_page_queues_spin_unlock(queue); }