Esempio n. 1
0
/*ARGSUSED*/
void
pvn_plist_init(page_t *pp, page_t *pl[], size_t plsz,
               u_offset_t off, size_t io_len, enum seg_rw rw)
{
    ssize_t sz;
    page_t *ppcur, **ppp;

    /*
     * Set up to load plsz worth
     * starting at the needed page.
     */
    while (pp != NULL && pp->p_offset != off) {
        /*
         * Remove page from the i/o list,
         * release the i/o and the page lock.
         */
        ppcur = pp;
        page_sub(&pp, ppcur);
        page_io_unlock(ppcur);
        (void) page_release(ppcur, 1);
    }

    if (pp == NULL) {
        pl[0] = NULL;
        return;
    }

    sz = plsz;

    /*
     * Initialize the page list array.
     */
    ppp = pl;
    do {
        ppcur = pp;
        *ppp++ = ppcur;
        page_sub(&pp, ppcur);
        page_io_unlock(ppcur);
        if (rw != S_CREATE)
            page_downgrade(ppcur);
        sz -= PAGESIZE;
    } while (sz > 0 && pp != NULL);
    *ppp = NULL;		/* terminate list */

    /*
     * Now free the remaining pages that weren't
     * loaded in the page list.
     */
    while (pp != NULL) {
        ppcur = pp;
        page_sub(&pp, ppcur);
        page_io_unlock(ppcur);
        (void) page_release(ppcur, 1);
    }
}
Esempio n. 2
0
/*
 * NB: Don't check recsize or reclen.
 */
int
ipc_port_send_data(const struct ipc_header *ipch, const void *p, size_t len)
{
	struct vm_page *page;
	struct task *task;
	vaddr_t vaddr;
	int error;

	task = current_task();

	ASSERT(task != NULL, "Must have a running task.");

	if (p == NULL) {
		ASSERT(len == 0, "Cannot send no data with a set data length.");
		if (len != 0)
			return (ERROR_INVALID);
		error = ipc_port_send_page(ipch, NULL);
		if (error != 0)
			return (error);
		return (0);
	}

	ASSERT(len != 0, "Cannot send data without data length.");
	ASSERT(len <= PAGE_SIZE, "Cannot send more than a page.");

	error = page_alloc(PAGE_FLAG_DEFAULT, &page);
	if (error != 0)
		return (error);

	error = page_map_direct(&kernel_vm, page, &vaddr);
	if (error != 0) {
		page_release(page);
		return (error);
	}

	memcpy((void *)vaddr, p, len);
	/*
	 * Clear any trailing data so we don't leak kernel information.
	 */
	if (len != PAGE_SIZE)
		memset((void *)(vaddr + len), 0, PAGE_SIZE - len);

	error = page_unmap_direct(&kernel_vm, page, vaddr);
	if (error != 0)
		panic("%s: page_unmap_direct failed: %m", __func__, error);

	error = ipc_port_send_page(ipch, page);
	if (error != 0) {
		page_release(page);
		return (error);
	}

	return (0);
}
Esempio n. 3
0
int
ipc_port_send(const struct ipc_header *ipch, void *vpage)
{
	struct vm_page *page;
	struct task *task;
	struct vm *vm;
	int error;

	task = current_task();

	ASSERT(task != NULL, "Must have a running task.");
	ASSERT(ipch != NULL, "Must have a header.");

	/*
	 * Extract the vm_page for this page.
	 */
	if (vpage == NULL) {
		page = NULL;
	} else {
		if ((task->t_flags & TASK_KERNEL) == 0)
			vm = task->t_vm;
		else
			vm = &kernel_vm;
		error = page_extract(vm, (vaddr_t)vpage, &page);
		if (error != 0)
			return (error);
		if (vm == &kernel_vm) {
			error = page_unmap_direct(vm, page, (vaddr_t)vpage);
			if (error != 0)
				panic("%s: could not unmap direct page: %m", __func__, error);
		} else {
			error = page_unmap(vm, (vaddr_t)vpage, page);
			if (error != 0)
				panic("%s: could not unmap source page: %m", __func__, error);
			error = vm_free_address(vm, (vaddr_t)vpage);
			if (error != 0)
				panic("%s: could not free source page address: %m", __func__, error);
		}
	}

	error = ipc_port_send_page(ipch, page);
	if (error != 0) {
		if (page != NULL)
			page_release(page);
		return (error);
	}

	return (0);
}
Esempio n. 4
0
/*
 * Entry point to be used by file system getpage subr's and
 * other such routines which either want to unlock pages (B_ASYNC
 * request) or destroy a list of pages if an error occurred.
 */
void
pvn_read_done(page_t *plist, int flags)
{
    page_t *pp;

    while (plist != NULL) {
        pp = plist;
        page_sub(&plist, pp);
        page_io_unlock(pp);
        if (flags & B_ERROR) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_INVAL, 0, kcred);
        } else {
            (void) page_release(pp, 0);
        }
    }
}
Esempio n. 5
0
/*
 * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED, B_DELWRI,
 * B_TRUNC, B_FORCE}.  B_DELWRI indicates that this page is part of a kluster
 * operation and is only to be considered if it doesn't involve any
 * waiting here.  B_TRUNC indicates that the file is being truncated
 * and so no i/o needs to be done. B_FORCE indicates that the page
 * must be destroyed so don't try wrting it out.
 *
 * The caller must ensure that the page is locked.  Returns 1, if
 * the page should be written back (the "iolock" is held in this
 * case), or 0 if the page has been dealt with or has been
 * unlocked.
 */
int
pvn_getdirty(page_t *pp, int flags)
{
    ASSERT((flags & (B_INVAL | B_FREE)) ?
           PAGE_EXCL(pp) : PAGE_SHARED(pp));
    ASSERT(PP_ISFREE(pp) == 0);

    /*
     * If trying to invalidate or free a logically `locked' page,
     * forget it.  Don't need page_struct_lock to check p_lckcnt and
     * p_cowcnt as the page is exclusively locked.
     */
    if ((flags & (B_INVAL | B_FREE)) && !(flags & (B_TRUNC|B_FORCE)) &&
            (pp->p_lckcnt != 0 || pp->p_cowcnt != 0)) {
        page_unlock(pp);
        return (0);
    }

    /*
     * Now acquire the i/o lock so we can add it to the dirty
     * list (if necessary).  We avoid blocking on the i/o lock
     * in the following cases:
     *
     *	If B_DELWRI is set, which implies that this request is
     *	due to a klustering operartion.
     *
     *	If this is an async (B_ASYNC) operation and we are not doing
     *	invalidation (B_INVAL) [The current i/o or fsflush will ensure
     *	that the the page is written out].
     */
    if ((flags & B_DELWRI) || ((flags & (B_INVAL | B_ASYNC)) == B_ASYNC)) {
        if (!page_io_trylock(pp)) {
            page_unlock(pp);
            return (0);
        }
    } else {
        page_io_lock(pp);
    }

    /*
     * If we want to free or invalidate the page then
     * we need to unload it so that anyone who wants
     * it will have to take a minor fault to get it.
     * Otherwise, we're just writing the page back so we
     * need to sync up the hardwre and software mod bit to
     * detect any future modifications.  We clear the
     * software mod bit when we put the page on the dirty
     * list.
     */
    if (flags & (B_INVAL | B_FREE)) {
        (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
    } else {
        (void) hat_pagesync(pp, HAT_SYNC_ZERORM);
    }

    if (!hat_ismod(pp) || (flags & B_TRUNC)) {
        /*
         * Don't need to add it to the
         * list after all.
         */
        page_io_unlock(pp);
        if (flags & B_INVAL) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_INVAL, 0, kcred);
        } else if (flags & B_FREE) {
            /*LINTED: constant in conditional context*/
            VN_DISPOSE(pp, B_FREE, (flags & B_DONTNEED), kcred);
        } else {
            /*
             * This is advisory path for the callers
             * of VOP_PUTPAGE() who prefer freeing the
             * page _only_ if no one else is accessing it.
             * E.g. segmap_release()
             *
             * The above hat_ismod() check is useless because:
             * (1) we may not be holding SE_EXCL lock;
             * (2) we've not unloaded _all_ translations
             *
             * Let page_release() do the heavy-lifting.
             */
            (void) page_release(pp, 1);
        }
        return (0);
    }

    /*
     * Page is dirty, get it ready for the write back
     * and add page to the dirty list.
     */
    hat_clrrefmod(pp);

    /*
     * If we're going to free the page when we're done
     * then we can let others try to use it starting now.
     * We'll detect the fact that they used it when the
     * i/o is done and avoid freeing the page.
     */
    if (flags & B_FREE)
        page_downgrade(pp);


    TRACE_1(TR_FAC_VM, TR_PVN_GETDIRTY, "pvn_getdirty:pp %p", pp);

    return (1);
}
Esempio n. 6
0
/*
 * Handles common work of the VOP_GETPAGE routines by iterating page by page
 * calling the getpage helper for each.
 */
int
pvn_getpages(
    int (*getpage)(vnode_t *, u_offset_t, size_t, uint_t *, page_t *[],
                   size_t, struct seg *, caddr_t, enum seg_rw, cred_t *),
    struct vnode *vp,
    u_offset_t off,
    size_t len,
    uint_t *protp,
    page_t *pl[],
    size_t plsz,
    struct seg *seg,
    caddr_t addr,
    enum seg_rw rw,
    struct cred *cred)
{
    page_t **ppp;
    u_offset_t o, eoff;
    size_t sz, xlen;
    int err;

    /* ensure that we have enough space */
    ASSERT(pl == NULL || plsz >= len);

    /*
     * Loop one page at a time and let getapage function fill
     * in the next page in array.  We only allow one page to be
     * returned at a time (except for the last page) so that we
     * don't have any problems with duplicates and other such
     * painful problems.  This is a very simple minded algorithm,
     * but it does the job correctly.  We hope that the cost of a
     * getapage call for a resident page that we might have been
     * able to get from an earlier call doesn't cost too much.
     */
    ppp = pl;
    sz = (pl != NULL) ? PAGESIZE : 0;
    eoff = off + len;
    xlen = len;
    for (o = off; o < eoff; o += PAGESIZE, addr += PAGESIZE,
            xlen -= PAGESIZE) {
        if (o + PAGESIZE >= eoff && pl != NULL) {
            /*
             * Last time through - allow the all of
             * what's left of the pl[] array to be used.
             */
            sz = plsz - (o - off);
        }
        err = (*getpage)(vp, o, xlen, protp, ppp, sz, seg, addr,
                         rw, cred);
        if (err) {
            /*
             * Release any pages we already got.
             */
            if (o > off && pl != NULL) {
                for (ppp = pl; *ppp != NULL; *ppp++ = NULL)
                    (void) page_release(*ppp, 1);
            }
            break;
        }
        if (pl != NULL)
            ppp++;
    }
    return (err);
}
Esempio n. 7
0
/*
 * Scan page_t's and issue I/O's for modified pages.
 *
 * Also coalesces consecutive small sized free pages into the next larger
 * pagesize. This costs a tiny bit of time in fsflush, but will reduce time
 * spent scanning on later passes and for anybody allocating large pages.
 */
static void
fsflush_do_pages()
{
	vnode_t		*vp;
	ulong_t		pcount;
	hrtime_t	timer = gethrtime();
	ulong_t		releases = 0;
	ulong_t		nexamined = 0;
	ulong_t		nlocked = 0;
	ulong_t		nmodified = 0;
	ulong_t		ncoalesce = 0;
	ulong_t		cnt;
	int		mod;
	int		fspage = 1;
	u_offset_t	offset;
	uint_t		szc;

	page_t		*coal_page = NULL;  /* 1st page in group to coalesce */
	uint_t		coal_szc = 0;	    /* size code, coal_page->p_szc */
	uint_t		coal_cnt = 0;	    /* count of pages seen */

	static ulong_t	nscan = 0;
	static pgcnt_t	last_total_pages = 0;
	static page_t	*pp = NULL;

	/*
	 * Check to see if total_pages has changed.
	 */
	if (total_pages != last_total_pages) {
		last_total_pages = total_pages;
		nscan = (last_total_pages * (tune.t_fsflushr))/v.v_autoup;
	}

	if (pp == NULL)
		pp = memsegs->pages;

	pcount = 0;
	while (pcount < nscan) {

		/*
		 * move to the next page, skipping over large pages
		 * and issuing prefetches.
		 */
		if (pp->p_szc && fspage == 0) {
			pfn_t pfn;

			pfn  = page_pptonum(pp);
			cnt = page_get_pagecnt(pp->p_szc);
			cnt -= pfn & (cnt - 1);
		} else
			cnt = 1;

		pp = page_nextn(pp, cnt);
		prefetch_page_r((void *)pp);
		ASSERT(pp != NULL);
		pcount += cnt;

		/*
		 * Do a bunch of dirty tests (ie. no locking) to determine
		 * if we can quickly skip this page. These tests are repeated
		 * after acquiring the page lock.
		 */
		++nexamined;
		if (PP_ISSWAP(pp)) {
			fspage = 0;
			coal_page = NULL;
			continue;
		}

		/*
		 * skip free pages too, but try coalescing them into larger
		 * pagesizes
		 */
		if (PP_ISFREE(pp)) {
			/*
			 * skip pages with a file system identity or that
			 * are already maximum size
			 */
			fspage = 0;
			szc = pp->p_szc;
			if (pp->p_vnode != NULL || szc == fsf_npgsz - 1) {
				coal_page = NULL;
				continue;
			}

			/*
			 * If not in a coalescing candidate page or the size
			 * codes are different, start a new candidate.
			 */
			if (coal_page == NULL || coal_szc != szc) {

				/*
				 * page must be properly aligned
				 */
				if ((page_pptonum(pp) & fsf_mask[szc]) != 0) {
					coal_page = NULL;
					continue;
				}
				coal_page = pp;
				coal_szc = szc;
				coal_cnt = 1;
				continue;
			}

			/*
			 * acceptable to add this to existing candidate page
			 */
			++coal_cnt;
			if (coal_cnt < fsf_pgcnt[coal_szc])
				continue;

			/*
			 * We've got enough pages to coalesce, so do it.
			 * After promoting, we clear coal_page, so it will
			 * take another pass to promote this to an even
			 * larger page.
			 */
			++ncoalesce;
			(void) page_promote_size(coal_page, coal_szc);
			coal_page = NULL;
			continue;
		} else {
			coal_page = NULL;
		}

		if (PP_ISKAS(pp) ||
		    PAGE_LOCKED(pp) ||
		    pp->p_lckcnt != 0 ||
		    pp->p_cowcnt != 0) {
			fspage = 0;
			continue;
		}


		/*
		 * Reject pages that can't be "exclusively" locked.
		 */
		if (!page_trylock(pp, SE_EXCL))
			continue;
		++nlocked;


		/*
		 * After locking the page, redo the above checks.
		 * Since we locked the page, leave out the PAGE_LOCKED() test.
		 */
		vp = pp->p_vnode;
		if (PP_ISSWAP(pp) ||
		    PP_ISFREE(pp) ||
		    vp == NULL ||
		    PP_ISKAS(pp) ||
		    (vp->v_flag & VISSWAP) != 0) {
			page_unlock(pp);
			fspage = 0;
			continue;
		}
		if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
			page_unlock(pp);
			continue;
		}

		fspage = 1;
		ASSERT(vp->v_type != VCHR);

		/*
		 * Check the modified bit. Leaving the bit alone in hardware.
		 * It will be cleared if we do the putpage.
		 */
		if (IS_VMODSORT(vp))
			mod = hat_ismod(pp);
		else
			mod = hat_pagesync(pp,
			    HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD;

		if (mod) {
			++nmodified;
			offset = pp->p_offset;

			/*
			 * Hold the vnode before releasing the page lock
			 * to prevent it from being freed and re-used by
			 * some other thread.
			 */
			VN_HOLD(vp);

			page_unlock(pp);

			(void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_ASYNC,
			    kcred, NULL);

			VN_RELE(vp);
		} else {

			/*
			 * Catch any pages which should be on the cache list,
			 * but aren't yet.
			 */
			if (hat_page_is_mapped(pp) == 0) {
				++releases;
				(void) page_release(pp, 1);
			} else {
				page_unlock(pp);
			}
		}
	}

	/*
	 * maintain statistics
	 * reset every million wakeups, just to avoid overflow
	 */
	if (++fsf_cycles == 1000000) {
		fsf_cycles = 0;
		fsf_total.fsf_scan = 0;
		fsf_total.fsf_examined = 0;
		fsf_total.fsf_locked = 0;
		fsf_total.fsf_modified = 0;
		fsf_total.fsf_coalesce = 0;
		fsf_total.fsf_time = 0;
		fsf_total.fsf_releases = 0;
	} else {
		fsf_total.fsf_scan += fsf_recent.fsf_scan = nscan;
		fsf_total.fsf_examined += fsf_recent.fsf_examined = nexamined;
		fsf_total.fsf_locked += fsf_recent.fsf_locked = nlocked;
		fsf_total.fsf_modified += fsf_recent.fsf_modified = nmodified;
		fsf_total.fsf_coalesce += fsf_recent.fsf_coalesce = ncoalesce;
		fsf_total.fsf_time += fsf_recent.fsf_time = gethrtime() - timer;
		fsf_total.fsf_releases += fsf_recent.fsf_releases = releases;
	}
}
Esempio n. 8
0
/*
 * XXX
 * receive could take a task-local port number like a fd and speed lookup and
 * minimize locking.
 */
int
ipc_port_receive(ipc_port_t port, struct ipc_header *ipch, void **vpagep)
{
	struct ipc_message *ipcmsg;
	struct ipc_port *ipcp;
	struct task *task;
	vaddr_t vaddr;
	int error, error2;

	task = current_task();

	ASSERT(task != NULL, "Must have a running task.");
	ASSERT(ipch != NULL, "Must be able to copy out header.");

	IPC_PORTS_LOCK();
	ipcp = ipc_port_lookup(port);
	if (ipcp == NULL) {
		IPC_PORTS_UNLOCK();
		return (ERROR_NOT_FOUND);
	}
	IPC_PORTS_UNLOCK();

	if (!ipc_port_right_check(ipcp, task, IPC_PORT_RIGHT_RECEIVE)) {
		IPC_PORT_UNLOCK(ipcp);
		return (ERROR_NO_RIGHT);
	}

	if (TAILQ_EMPTY(&ipcp->ipcp_msgs)) {
		IPC_PORT_UNLOCK(ipcp);
		return (ERROR_AGAIN);
	}

	ipcmsg = TAILQ_FIRST(&ipcp->ipcp_msgs);
	ASSERT(ipcmsg != NULL, "Queue must not change out from under us.");
	ASSERT(ipcmsg->ipcmsg_header.ipchdr_dst == ipcp->ipcp_port,
	       "Destination must be this port.");
	TAILQ_REMOVE(&ipcp->ipcp_msgs, ipcmsg, ipcmsg_link);
	IPC_PORT_UNLOCK(ipcp);

	/*
	 * Insert any passed rights.
	 */
	if (ipcmsg->ipcmsg_header.ipchdr_right != IPC_PORT_RIGHT_NONE) {
		ipcp = ipc_port_lookup(ipcmsg->ipcmsg_header.ipchdr_src);
		if (ipcp == NULL)
			panic("%s: port disappeared.", __func__);
		error = ipc_port_right_insert(ipcp, task, ipcmsg->ipcmsg_header.ipchdr_right);
		if (error != 0)
			panic("%s: grating rights failed: %m", __func__,
			      error);
		IPC_PORT_UNLOCK(ipcp);
	}

	if (ipcmsg->ipcmsg_page == NULL) {
		if (vpagep != NULL)
			*vpagep = NULL;
	} else {
		if (vpagep == NULL) {
			/*
			 * A task may refuse a page flip for any number of reasons.
			 */
			page_release(ipcmsg->ipcmsg_page);
		} else {
			/*
			 * Map this page into the receiving task.
			 */
			if ((task->t_flags & TASK_KERNEL) == 0) {
				/*
				 * User task.
				 */
				error = vm_alloc_address(task->t_vm, &vaddr, 1, false);
				if (error != 0) {
					page_release(ipcmsg->ipcmsg_page);
					free(ipcmsg);
					return (error);
				}

				error = page_map(task->t_vm, vaddr, ipcmsg->ipcmsg_page);
				if (error != 0) {
					error2 = vm_free_address(task->t_vm, vaddr);
					if (error2 != 0)
						panic("%s: vm_free_address failed: %m", __func__, error);
					page_release(ipcmsg->ipcmsg_page);
					free(ipcmsg);
				}
			} else {
				/*
				 * Kernel task.
				 */
				error = page_map_direct(&kernel_vm, ipcmsg->ipcmsg_page, &vaddr);
				if (error != 0) {
					page_release(ipcmsg->ipcmsg_page);
					free(ipcmsg);
					return (error);
				}
			}
			*vpagep = (void *)vaddr;
		}
	}

	*ipch = ipcmsg->ipcmsg_header;

	free(ipcmsg);

	return (0);
}