Esempio n. 1
0
File: osi_vm.c Progetto: hwr/openafs
/* Try to invalidate pages, for "fs flush" or "fs flushv"; or
 * try to free pages, when deleting a file.
 *
 * Locking:  the vcache entry's lock is held.  It may be dropped and 
 * re-obtained.
 *
 * Since we drop and re-obtain the lock, we can't guarantee that there won't
 * be some pages around when we return, newly created by concurrent activity.
 */
void
osi_VM_TryToSmush(struct vcache *avc, afs_ucred_t *acred, int sync)
{
    struct vnode *vp;
    int tries, code;
    int islocked;

    vp = AFSTOV(avc);

    VI_LOCK(vp);
    if (vp->v_iflag & VI_DOOMED) {
	VI_UNLOCK(vp);
	return;
    }
    VI_UNLOCK(vp);

    islocked = islocked_vnode(vp);
    if (islocked == LK_EXCLOTHER)
	panic("Trying to Smush over someone else's lock");
    else if (islocked == LK_SHARED) {
	afs_warn("Trying to Smush with a shared lock");
	lock_vnode(vp, LK_UPGRADE);
    } else if (!islocked)
	lock_vnode(vp, LK_EXCLUSIVE);

    if (vp->v_bufobj.bo_object != NULL) {
	AFS_VM_OBJECT_WLOCK(vp->v_bufobj.bo_object);
	/*
	 * Do we really want OBJPC_SYNC?  OBJPC_INVAL would be
	 * faster, if invalidation is really what we are being
	 * asked to do.  (It would make more sense, too, since
	 * otherwise this function is practically identical to
	 * osi_VM_StoreAllSegments().)  -GAW
	 */

	/*
	 * Dunno.  We no longer resemble osi_VM_StoreAllSegments,
	 * though maybe that's wrong, now.  And OBJPC_SYNC is the
	 * common thing in 70 file systems, it seems.  Matt.
	 */

	vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
	AFS_VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object);
    }

    tries = 5;
    code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0);
    while (code && (tries > 0)) {
	afs_warn("TryToSmush retrying vinvalbuf");
	code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0);
	--tries;
    }
    if (islocked == LK_SHARED)
	lock_vnode(vp, LK_DOWNGRADE);
    else if (!islocked)
	unlock_vnode(vp);
}
Esempio n. 2
0
/*
 * Flush and invalidate all dirty buffers. If another process is already
 * doing the flush, just wait for completion.
 */
int
fuse_io_invalbuf(struct vnode *vp, struct thread *td)
{
	struct fuse_vnode_data *fvdat = VTOFUD(vp);
	int error = 0;

	if (vp->v_iflag & VI_DOOMED)
		return 0;

	ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf");

	while (fvdat->flag & FN_FLUSHINPROG) {
		struct proc *p = td->td_proc;

		if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF)
			return EIO;
		fvdat->flag |= FN_FLUSHWANT;
		tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz);
		error = 0;
		if (p != NULL) {
			PROC_LOCK(p);
			if (SIGNOTEMPTY(p->p_siglist) ||
			    SIGNOTEMPTY(td->td_siglist))
				error = EINTR;
			PROC_UNLOCK(p);
		}
		if (error == EINTR)
			return EINTR;
	}
	fvdat->flag |= FN_FLUSHINPROG;

	if (vp->v_bufobj.bo_object != NULL) {
		VM_OBJECT_WLOCK(vp->v_bufobj.bo_object);
		vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
		VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object);
	}
	error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
	while (error) {
		if (error == ERESTART || error == EINTR) {
			fvdat->flag &= ~FN_FLUSHINPROG;
			if (fvdat->flag & FN_FLUSHWANT) {
				fvdat->flag &= ~FN_FLUSHWANT;
				wakeup(&fvdat->flag);
			}
			return EINTR;
		}
		error = vinvalbuf(vp, V_SAVE, PCATCH, 0);
	}
	fvdat->flag &= ~FN_FLUSHINPROG;
	if (fvdat->flag & FN_FLUSHWANT) {
		fvdat->flag &= ~FN_FLUSHWANT;
		wakeup(&fvdat->flag);
	}
	return (error);
}
Esempio n. 3
0
int
ncl_inactive(struct vop_inactive_args *ap)
{
	struct nfsnode *np;
	struct sillyrename *sp;
	struct vnode *vp = ap->a_vp;
	boolean_t retv;

	np = VTONFS(vp);

	if (NFS_ISV4(vp) && vp->v_type == VREG) {
		/*
		 * Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4
		 * Close operations are delayed until now. Any dirty
		 * buffers/pages must be flushed before the close, so that the
		 * stateid is available for the writes.
		 */
		if (vp->v_object != NULL) {
			VM_OBJECT_WLOCK(vp->v_object);
			retv = vm_object_page_clean(vp->v_object, 0, 0,
			    OBJPC_SYNC);
			VM_OBJECT_WUNLOCK(vp->v_object);
		} else
			retv = TRUE;
		if (retv == TRUE) {
			(void)ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0);
			(void)nfsrpc_close(vp, 1, ap->a_td);
		}
	}

	mtx_lock(&np->n_mtx);
	if (vp->v_type != VDIR) {
		sp = np->n_sillyrename;
		np->n_sillyrename = NULL;
	} else
		sp = NULL;
	if (sp) {
		mtx_unlock(&np->n_mtx);
		(void) ncl_vinvalbuf(vp, 0, ap->a_td, 1);
		/*
		 * Remove the silly file that was rename'd earlier
		 */
		ncl_removeit(sp, vp);
		crfree(sp->s_cred);
		TASK_INIT(&sp->s_task, 0, nfs_freesillyrename, sp);
		taskqueue_enqueue(taskqueue_thread, &sp->s_task);
		mtx_lock(&np->n_mtx);
	}
	np->n_flag &= NMODIFIED;
	mtx_unlock(&np->n_mtx);
	return (0);
}
Esempio n. 4
0
/* Try to invalidate pages, for "fs flush" or "fs flushv"; or
 * try to free pages, when deleting a file.
 *
 * Locking:  the vcache entry's lock is held.  It may be dropped and 
 * re-obtained.
 *
 * Since we drop and re-obtain the lock, we can't guarantee that there won't
 * be some pages around when we return, newly created by concurrent activity.
 */
void
osi_VM_TryToSmush(struct vcache *avc, afs_ucred_t *acred, int sync)
{
    struct vnode *vp;
    int tries, code;

    SPLVAR;

    vp = AFSTOV(avc);

    if (vp->v_iflag & VI_DOOMED) {
      USERPRI;
      return;
    }

    if (vp->v_bufobj.bo_object != NULL) {
      VM_OBJECT_LOCK(vp->v_bufobj.bo_object);
      /*
       * Do we really want OBJPC_SYNC?  OBJPC_INVAL would be
       * faster, if invalidation is really what we are being
       * asked to do.  (It would make more sense, too, since
       * otherwise this function is practically identical to
       * osi_VM_StoreAllSegments().)  -GAW
       */

      /*
       * Dunno.  We no longer resemble osi_VM_StoreAllSegments,
       * though maybe that's wrong, now.  And OBJPC_SYNC is the
       * common thing in 70 file systems, it seems.  Matt.
       */

      vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC);
      VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object);
    }

    tries = 5;
    code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0);
    while (code && (tries > 0)) {
      code = osi_vinvalbuf(vp, V_SAVE, PCATCH, 0);
      --tries;
    }
    USERPRI;
}
Esempio n. 5
0
/*
 * vm_contig_pg_clean:
 * 
 * Do a thorough cleanup of the specified 'queue', which can be either
 * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough.  If the page is not
 * marked dirty, it is shoved into the page cache, provided no one has
 * currently aqcuired it, otherwise localized action per object type
 * is taken for cleanup:
 *
 * 	In the OBJT_VNODE case, the whole page range is cleaned up
 * 	using the vm_object_page_clean() routine, by specyfing a
 * 	start and end of '0'.
 *
 * 	Otherwise if the object is of any other type, the generic
 * 	pageout (daemon) flush routine is invoked.
 *
 * The caller must hold vm_token.
 */
static int
vm_contig_pg_clean(int queue)
{
	vm_object_t object;
	vm_page_t m, m_tmp, next;

	ASSERT_LWKT_TOKEN_HELD(&vm_token);

	for (m = TAILQ_FIRST(&vm_page_queues[queue].pl); m != NULL; m = next) {
		KASSERT(m->queue == queue,
			("vm_contig_clean: page %p's queue is not %d", 
			m, queue));
		next = TAILQ_NEXT(m, pageq);

		if (m->flags & PG_MARKER)
			continue;
		
		if (vm_page_sleep_busy(m, TRUE, "vpctw0"))
			return (TRUE);
		
		vm_page_test_dirty(m);
		if (m->dirty) {
			object = m->object;
			if (object->type == OBJT_VNODE) {
				vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY);
				vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
				vn_unlock(((struct vnode *)object->handle));
				return (TRUE);
			} else if (object->type == OBJT_SWAP ||
					object->type == OBJT_DEFAULT) {
				m_tmp = m;
				vm_pageout_flush(&m_tmp, 1, 0);
				return (TRUE);
			}
		}
		KKASSERT(m->busy == 0);
		if (m->dirty == 0 && m->hold_count == 0) {
			vm_page_busy(m);
			vm_page_cache(m);
		}
	}
	return (FALSE);
}
Esempio n. 6
0
File: osi_vm.c Progetto: hwr/openafs
/* Try to store pages to cache, in order to store a file back to the server.
 *
 * Locking:  the vcache entry's lock is held.  It will usually be dropped and
 * re-obtained.
 */
void
osi_VM_StoreAllSegments(struct vcache *avc)
{
    struct vnode *vp;
    struct vm_object *obj;
    int anyio, tries;

    ReleaseWriteLock(&avc->lock);
    AFS_GUNLOCK();
    tries = 5;
    vp = AFSTOV(avc);

    /*
     * I don't understand this.  Why not just call vm_object_page_clean()
     * and be done with it?  I particularly don't understand why we're calling
     * vget() here.  Is there some reason to believe that the vnode might
     * be being recycled at this point?  I don't think there's any need for
     * this loop, either -- if we keep the vnode locked all the time,
     * that and the object lock will prevent any new pages from appearing.
     * The loop is what causes the race condition.  -GAW
     */
    do {
	anyio = 0;
	
	obj = vp->v_object;
	if (obj != NULL && obj->flags & OBJ_MIGHTBEDIRTY) {
	    if (!vget(vp, LK_EXCLUSIVE | LK_RETRY, curthread)) {
		    obj = vp->v_object;
		    if (obj != NULL) {
			AFS_VM_OBJECT_WLOCK(obj);
			vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
			AFS_VM_OBJECT_WUNLOCK(obj);
			anyio = 1;
		    }
		    vput(vp);
		}
	    }
    } while (anyio && (--tries > 0));
    AFS_GLOCK();
    ObtainWriteLock(&avc->lock, 94);
}
Esempio n. 7
0
static int
ffs_rawread_sync(struct vnode *vp)
{
	int error;

	/*
	 * Check for dirty mmap, pending writes and dirty buffers
	 */
	lwkt_gettoken(&vp->v_token);
	if (bio_track_active(&vp->v_track_write) ||
	    !RB_EMPTY(&vp->v_rbdirty_tree) ||
	    (vp->v_flag & VOBJDIRTY) != 0) {
		/* Attempt to msync mmap() regions to clean dirty mmap */ 
		if ((vp->v_flag & VOBJDIRTY) != 0) {
			struct vm_object *obj;
			if ((obj = vp->v_object) != NULL)
				vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
		}

		/* Wait for pending writes to complete */
		error = bio_track_wait(&vp->v_track_write, 0, 0);
		if (error != 0) {
			goto done;
		}
		/* Flush dirty buffers */
		if (!RB_EMPTY(&vp->v_rbdirty_tree)) {
			if ((error = VOP_FSYNC(vp, MNT_WAIT, 0)) != 0) {
				goto done;
			}
			if (bio_track_active(&vp->v_track_write) ||
			    !RB_EMPTY(&vp->v_rbdirty_tree))
				panic("ffs_rawread_sync: dirty bufs");
		}
	} else {
		error = 0;
	}
done:
	lwkt_reltoken(&vp->v_token);
	return error;
}
Esempio n. 8
0
int
ncl_inactive(struct vop_inactive_args *ap)
{
	struct vnode *vp = ap->a_vp;
	struct nfsnode *np;
	boolean_t retv;

	if (NFS_ISV4(vp) && vp->v_type == VREG) {
		/*
		 * Since mmap()'d files do I/O after VOP_CLOSE(), the NFSv4
		 * Close operations are delayed until now. Any dirty
		 * buffers/pages must be flushed before the close, so that the
		 * stateid is available for the writes.
		 */
		if (vp->v_object != NULL) {
			VM_OBJECT_WLOCK(vp->v_object);
			retv = vm_object_page_clean(vp->v_object, 0, 0,
			    OBJPC_SYNC);
			VM_OBJECT_WUNLOCK(vp->v_object);
		} else
			retv = TRUE;
		if (retv == TRUE) {
			(void)ncl_flush(vp, MNT_WAIT, NULL, ap->a_td, 1, 0);
			(void)nfsrpc_close(vp, 1, ap->a_td);
		}
	}

	np = VTONFS(vp);
	mtx_lock(&np->n_mtx);
	ncl_releasesillyrename(vp, ap->a_td);

	/*
	 * NMODIFIED means that there might be dirty/stale buffers
	 * associated with the NFS vnode.  None of the other flags are
	 * meaningful after the vnode is unused.
	 */
	np->n_flag &= NMODIFIED;
	mtx_unlock(&np->n_mtx);
	return (0);
}
Esempio n. 9
0
static void
mac_proc_vm_revoke_recurse(struct thread *td, struct ucred *cred,
    struct vm_map *map)
{
	vm_map_entry_t vme;
	int vfslocked, result;
	vm_prot_t revokeperms;
	vm_object_t backing_object, object;
	vm_ooffset_t offset;
	struct vnode *vp;
	struct mount *mp;

	if (!mac_mmap_revocation)
		return;

	vm_map_lock(map);
	for (vme = map->header.next; vme != &map->header; vme = vme->next) {
		if (vme->eflags & MAP_ENTRY_IS_SUB_MAP) {
			mac_proc_vm_revoke_recurse(td, cred,
			    vme->object.sub_map);
			continue;
		}
		/*
		 * Skip over entries that obviously are not shared.
		 */
		if (vme->eflags & (MAP_ENTRY_COW | MAP_ENTRY_NOSYNC) ||
		    !vme->max_protection)
			continue;
		/*
		 * Drill down to the deepest backing object.
		 */
		offset = vme->offset;
		object = vme->object.vm_object;
		if (object == NULL)
			continue;
		VM_OBJECT_LOCK(object);
		while ((backing_object = object->backing_object) != NULL) {
			VM_OBJECT_LOCK(backing_object);
			offset += object->backing_object_offset;
			VM_OBJECT_UNLOCK(object);
			object = backing_object;
		}
		VM_OBJECT_UNLOCK(object);
		/*
		 * At the moment, vm_maps and objects aren't considered by
		 * the MAC system, so only things with backing by a normal
		 * object (read: vnodes) are checked.
		 */
		if (object->type != OBJT_VNODE)
			continue;
		vp = (struct vnode *)object->handle;
		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
		result = vme->max_protection;
		mac_vnode_check_mmap_downgrade(cred, vp, &result);
		VOP_UNLOCK(vp, 0);
		/*
		 * Find out what maximum protection we may be allowing now
		 * but a policy needs to get removed.
		 */
		revokeperms = vme->max_protection & ~result;
		if (!revokeperms) {
			VFS_UNLOCK_GIANT(vfslocked);
			continue;
		}
		printf("pid %ld: revoking %s perms from %#lx:%ld "
		    "(max %s/cur %s)\n", (long)td->td_proc->p_pid,
		    prot2str(revokeperms), (u_long)vme->start,
		    (long)(vme->end - vme->start),
		    prot2str(vme->max_protection), prot2str(vme->protection));
		/*
		 * This is the really simple case: if a map has more
		 * max_protection than is allowed, but it's not being
		 * actually used (that is, the current protection is still
		 * allowed), we can just wipe it out and do nothing more.
		 */
		if ((vme->protection & revokeperms) == 0) {
			vme->max_protection -= revokeperms;
		} else {
			if (revokeperms & VM_PROT_WRITE) {
				/*
				 * In the more complicated case, flush out all
				 * pending changes to the object then turn it
				 * copy-on-write.
				 */
				vm_object_reference(object);
				(void) vn_start_write(vp, &mp, V_WAIT);
				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
				VM_OBJECT_LOCK(object);
				vm_object_page_clean(object, offset, offset +
				    vme->end - vme->start, OBJPC_SYNC);
				VM_OBJECT_UNLOCK(object);
				VOP_UNLOCK(vp, 0);
				vn_finished_write(mp);
				vm_object_deallocate(object);
				/*
				 * Why bother if there's no read permissions
				 * anymore?  For the rest, we need to leave
				 * the write permissions on for COW, or
				 * remove them entirely if configured to.
				 */
				if (!mac_mmap_revocation_via_cow) {
					vme->max_protection &= ~VM_PROT_WRITE;
					vme->protection &= ~VM_PROT_WRITE;
				} if ((revokeperms & VM_PROT_READ) == 0)
					vme->eflags |= MAP_ENTRY_COW |
					    MAP_ENTRY_NEEDS_COPY;
			}
			if (revokeperms & VM_PROT_EXECUTE) {
				vme->max_protection &= ~VM_PROT_EXECUTE;
				vme->protection &= ~VM_PROT_EXECUTE;
			}
			if (revokeperms & VM_PROT_READ) {
				vme->max_protection = 0;
				vme->protection = 0;
			}
			pmap_protect(map->pmap, vme->start, vme->end,
			    vme->protection & ~revokeperms);
			vm_map_simplify_entry(map, vme);
		}
		VFS_UNLOCK_GIANT(vfslocked);
	}
	vm_map_unlock(map);
}
Esempio n. 10
0
/*
 * vm_contig_pg_clean:
 *
 * Do a thorough cleanup of the specified 'queue', which can be either
 * PQ_ACTIVE or PQ_INACTIVE by doing a walkthrough.  If the page is not
 * marked dirty, it is shoved into the page cache, provided no one has
 * currently aqcuired it, otherwise localized action per object type
 * is taken for cleanup:
 *
 * 	In the OBJT_VNODE case, the whole page range is cleaned up
 * 	using the vm_object_page_clean() routine, by specyfing a
 * 	start and end of '0'.
 *
 * 	Otherwise if the object is of any other type, the generic
 * 	pageout (daemon) flush routine is invoked.
 */
static void
vm_contig_pg_clean(int queue, int count)
{
    vm_object_t object;
    vm_page_t m, m_tmp;
    struct vm_page marker;
    struct vpgqueues *pq = &vm_page_queues[queue];

    /*
     * Setup a local marker
     */
    bzero(&marker, sizeof(marker));
    marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
    marker.queue = queue;
    marker.wire_count = 1;

    vm_page_queues_spin_lock(queue);
    TAILQ_INSERT_HEAD(&pq->pl, &marker, pageq);
    vm_page_queues_spin_unlock(queue);

    /*
     * Iterate the queue.  Note that the vm_page spinlock must be
     * acquired before the pageq spinlock so it's easiest to simply
     * not hold it in the loop iteration.
     */
    while (count-- > 0 && (m = TAILQ_NEXT(&marker, pageq)) != NULL) {
        vm_page_and_queue_spin_lock(m);
        if (m != TAILQ_NEXT(&marker, pageq)) {
            vm_page_and_queue_spin_unlock(m);
            ++count;
            continue;
        }
        KKASSERT(m->queue == queue);

        TAILQ_REMOVE(&pq->pl, &marker, pageq);
        TAILQ_INSERT_AFTER(&pq->pl, m, &marker, pageq);

        if (m->flags & PG_MARKER) {
            vm_page_and_queue_spin_unlock(m);
            continue;
        }
        if (vm_page_busy_try(m, TRUE)) {
            vm_page_and_queue_spin_unlock(m);
            continue;
        }
        vm_page_and_queue_spin_unlock(m);

        /*
         * We've successfully busied the page
         */
        if (m->queue - m->pc != queue) {
            vm_page_wakeup(m);
            continue;
        }
        if (m->wire_count || m->hold_count) {
            vm_page_wakeup(m);
            continue;
        }
        if ((object = m->object) == NULL) {
            vm_page_wakeup(m);
            continue;
        }
        vm_page_test_dirty(m);
        if (m->dirty || (m->flags & PG_NEED_COMMIT)) {
            vm_object_hold(object);
            KKASSERT(m->object == object);

            if (object->type == OBJT_VNODE) {
                vm_page_wakeup(m);
                vn_lock(object->handle, LK_EXCLUSIVE|LK_RETRY);
                vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
                vn_unlock(((struct vnode *)object->handle));
            } else if (object->type == OBJT_SWAP ||
                       object->type == OBJT_DEFAULT) {
                m_tmp = m;
                vm_pageout_flush(&m_tmp, 1, 0);
            } else {
                vm_page_wakeup(m);
            }
            vm_object_drop(object);
        } else if (m->hold_count == 0) {
            vm_page_cache(m);
        } else {
            vm_page_wakeup(m);
        }
    }

    /*
     * Scrap our local marker
     */
    vm_page_queues_spin_lock(queue);
    TAILQ_REMOVE(&pq->pl, &marker, pageq);
    vm_page_queues_spin_unlock(queue);
}