Esempio n. 1
0
void
cpu_set_upcall(struct thread *td, struct thread *td0)
{
	struct pcb *pcb;
	struct trapframe *tf;

	ia64_highfp_save(td0);

	tf = td->td_frame;
	KASSERT(tf != NULL, ("foo"));
	bcopy(td0->td_frame, tf, sizeof(*tf));
	tf->tf_length = sizeof(struct trapframe);
	tf->tf_flags = FRAME_SYSCALL;
	tf->tf_special.ndirty = 0;
	tf->tf_special.bspstore &= ~0x1ffUL;
	tf->tf_scratch.gr8 = 0;
	tf->tf_scratch.gr9 = 1;
	tf->tf_scratch.gr10 = 0;

	pcb = td->td_pcb;
	KASSERT(pcb != NULL, ("foo"));
	bcopy(td0->td_pcb, pcb, sizeof(*pcb));
	pcb->pcb_special.bspstore = td->td_kstack;
	pcb->pcb_special.pfs = 0;
	pcb->pcb_current_pmap = vmspace_pmap(td->td_proc->p_vmspace);
	pcb->pcb_special.sp = (uintptr_t)tf - 16;
	pcb->pcb_special.rp = FDESC_FUNC(fork_trampoline);
	cpu_set_fork_handler(td, (void (*)(void*))fork_return, td);

	/* Setup to release the spin count in fork_exit(). */
	td->td_md.md_spinlock_count = 1;
	td->td_md.md_saved_intr = 1;
}
Esempio n. 2
0
/*
 * vm_fault_prefault provides a quick way of clustering
 * pagefaults into a processes address space.  It is a "cousin"
 * of vm_map_pmap_enter, except it runs at page fault time instead
 * of mmap time.
 */
static void
vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
{
	int i;
	vm_offset_t addr, starta;
	vm_pindex_t pindex;
	vm_page_t m;
	vm_object_t object;

	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))
		return;

	object = entry->object.vm_object;

	starta = addra - PFBAK * PAGE_SIZE;
	if (starta < entry->start) {
		starta = entry->start;
	} else if (starta > addra) {
		starta = 0;
	}

	for (i = 0; i < PAGEORDER_SIZE; i++) {
		vm_object_t backing_object, lobject;

		addr = addra + prefault_pageorder[i];
		if (addr > addra + (PFFOR * PAGE_SIZE))
			addr = 0;

		if (addr < starta || addr >= entry->end)
			continue;

		if (!pmap_is_prefaultable(pmap, addr))
			continue;

		pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
		lobject = object;
		VM_OBJECT_WLOCK(lobject);
		while ((m = vm_page_lookup(lobject, pindex)) == NULL &&
		    lobject->type == OBJT_DEFAULT &&
		    (backing_object = lobject->backing_object) != NULL) {
			KASSERT((lobject->backing_object_offset & PAGE_MASK) ==
			    0, ("vm_fault_prefault: unaligned object offset"));
			pindex += lobject->backing_object_offset >> PAGE_SHIFT;
			VM_OBJECT_WLOCK(backing_object);
			VM_OBJECT_WUNLOCK(lobject);
			lobject = backing_object;
		}
		/*
		 * give-up when a page is not in memory
		 */
		if (m == NULL) {
			VM_OBJECT_WUNLOCK(lobject);
			break;
		}
		if (m->valid == VM_PAGE_BITS_ALL &&
		    (m->flags & PG_FICTITIOUS) == 0)
			pmap_enter_quick(pmap, addr, m, entry->protection);
		VM_OBJECT_WUNLOCK(lobject);
	}
}
Esempio n. 3
0
/*
 * Passively intercepts the thread switch function to increase
 * the thread priority from a user priority to a kernel priority, reducing
 * syscall and trap overhead for the case where no switch occurs.
 *
 * Synchronizes td_ucred with p_ucred.  This is used by system calls,
 * signal handling, faults, AST traps, and anything else that enters the
 * kernel from userland and provides the kernel with a stable read-only
 * copy of the process ucred.
 *
 * To avoid races with another thread updating p_ucred we obtain p_spin.
 * The other thread doing the update will obtain both p_token and p_spin.
 * In the case where the cached cred pointer matches, we will already have
 * the ref and we don't have to do one blessed thing.
 */
static __inline void
userenter(struct thread *curtd, struct proc *curp)
{
	struct ucred *ocred;
	struct ucred *ncred;

	curtd->td_release = lwkt_passive_release;

	if (curtd->td_ucred != curp->p_ucred) {
		spin_lock(&curp->p_spin);
		ncred = crhold(curp->p_ucred);
		spin_unlock(&curp->p_spin);
		ocred = curtd->td_ucred;
		curtd->td_ucred = ncred;
		if (ocred)
			crfree(ocred);
	}

#ifdef DDB
	/*
	 * Debugging, remove top two user stack pages to catch kernel faults
	 */
	if (freeze_on_seg_fault > 1 && curtd->td_lwp) {
		pmap_remove(vmspace_pmap(curtd->td_lwp->lwp_vmspace),
			    0x00007FFFFFFFD000LU,
			    0x0000800000000000LU);
	}
#endif
}
Esempio n. 4
0
/*
 * Get user stack from the thread.
 * This assumes the thread is unlocked, idle,
 * and 64-bit.
 */
static struct ksample_stack *
stack_capture_user(struct thread *thread)
{
        struct ksample_stack *retval = NULL;
        struct amd64_frame frame = { 0 };
        size_t depth = 0;
        static const size_t MAXDEPTH = 4096 / sizeof(vm_offset_t);
        caddr_t *pcs = NULL;
        int error = 0;
	pmap_t pmap = vmspace_pmap(thread->td_proc->p_vmspace);
	
        frame.f_frame = (void*)thread->td_frame->tf_rbp;
        pcs = malloc(sizeof(*pcs) * MAXDEPTH, M_TEMP, M_WAITOK | M_ZERO);
        pcs[depth++] = (caddr_t)thread->td_frame->tf_rip;

//      printf("%s(%d):  frame.f_frame = %x\n", __FUNCTION__, __LINE__, (unsigned int)frame.f_frame);

        while (frame.f_frame && depth < MAXDEPTH) {
                struct iovec iov;
                struct uio uio;

                iov.iov_base = (caddr_t)&frame;
                iov.iov_len = sizeof(frame);
                uio.uio_iov = &iov;
                uio.uio_iovcnt = 1;
                uio.uio_offset = (off_t)(uintptr_t)frame.f_frame;
                uio.uio_resid = sizeof(frame);
                uio.uio_segflg = UIO_SYSSPACE;
                uio.uio_rw = UIO_READ;
                uio.uio_td = curthread;
		
		// If it's not mapped in, just stop
		if (pmap_extract(pmap, (vm_offset_t)frame.f_frame) == 0) {
			break;
		}
                error = proc_rwmem(thread->td_proc, &uio);
                if (error) {
//			printf("%s(%d):  error = %d\n", __FUNCTION__, __LINE__, error);
			break;
                }
                pcs[depth++] = (caddr_t)frame.f_retaddr;
//              printf("%s(%d):  frame.f_frame = %x\n", __FUNCTION__, __LINE__, (unsigned int)frame.f_frame);
        }
//      printf("%s(%d):  depth = %u\n", __FUNCTION__, __LINE__, (unsigned int)depth);
        retval = malloc(sizeof(struct ksample_stack) + depth * sizeof(caddr_t), M_TEMP, M_WAITOK);
        if (retval) {
                retval->depth = depth;
                bcopy(pcs, retval->pcs, depth * sizeof(pcs[0]));
        }
//	printf("%s(%d)\n", __FUNCTION__, __LINE__);
        free(pcs, M_TEMP);
        return retval;
}
Esempio n. 5
0
/*
 * Finish a fork operation, with process p2 nearly set up.
 * Copy and update the pcb, set up the stack so that the child
 * ready to run and return to user mode.
 */
void
cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
{
	struct pcb *pcb2;
	struct trapframe *tf;

	if ((flags & RFPROC) == 0)
		return;

	if (td1 == curthread) {
		/*
		 * Save the tpidr_el0 and the vfp state, these normally happen
		 * in cpu_switch, but if userland changes these then forks
		 * this may not have happened.
		 */
		td1->td_pcb->pcb_tpidr_el0 = READ_SPECIALREG(tpidr_el0);
		td1->td_pcb->pcb_tpidrro_el0 = READ_SPECIALREG(tpidrro_el0);
#ifdef VFP
		if ((td1->td_pcb->pcb_fpflags & PCB_FP_STARTED) != 0)
			vfp_save_state(td1, td1->td_pcb);
#endif
	}

	pcb2 = (struct pcb *)(td2->td_kstack +
	    td2->td_kstack_pages * PAGE_SIZE) - 1;

	td2->td_pcb = pcb2;
	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));

	td2->td_proc->p_md.md_l0addr =
	    vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l0);

	tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1);
	bcopy(td1->td_frame, tf, sizeof(*tf));
	tf->tf_x[0] = 0;
	tf->tf_x[1] = 0;
	tf->tf_spsr = td1->td_frame->tf_spsr & PSR_M_32;

	td2->td_frame = tf;

	/* Set the return value registers for fork() */
	td2->td_pcb->pcb_x[8] = (uintptr_t)fork_return;
	td2->td_pcb->pcb_x[9] = (uintptr_t)td2;
	td2->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline;
	td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame;
	td2->td_pcb->pcb_fpusaved = &td2->td_pcb->pcb_fpustate;
	td2->td_pcb->pcb_vfpcpu = UINT_MAX;

	/* Setup to release spin count in fork_exit(). */
	td2->td_md.md_spinlock_count = 1;
	td2->td_md.md_saved_daif = td1->td_md.md_saved_daif & ~DAIF_I_MASKED;
}
Esempio n. 6
0
/*
 * Finish a fork operation, with process p2 nearly set up.
 * Copy and update the pcb, set up the stack so that the child
 * ready to run and return to user mode.
 */
void
cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
{
	struct pcb *pcb2;
	struct trapframe *tf;

	if ((flags & RFPROC) == 0)
		return;

	pcb2 = (struct pcb *)(td2->td_kstack +
	    td2->td_kstack_pages * PAGE_SIZE) - 1;

	td2->td_pcb = pcb2;
	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));

	td2->td_pcb->pcb_l1addr =
	    vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1);

	tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1);
	bcopy(td1->td_frame, tf, sizeof(*tf));

	/* Clear syscall error flag */
	tf->tf_t[0] = 0;

	/* Arguments for child */
	tf->tf_a[0] = 0;
	tf->tf_a[1] = 0;
	tf->tf_sstatus |= (SSTATUS_SPIE); /* Enable interrupts. */
	tf->tf_sstatus |= (SSTATUS_SUM); /* Supervisor can access userspace. */
	tf->tf_sstatus &= ~(SSTATUS_SPP); /* User mode. */

	td2->td_frame = tf;

	/* Set the return value registers for fork() */
	td2->td_pcb->pcb_s[0] = (uintptr_t)fork_return;
	td2->td_pcb->pcb_s[1] = (uintptr_t)td2;
	td2->td_pcb->pcb_ra = (uintptr_t)fork_trampoline;
	td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame;

	/* Setup to release spin count in fork_exit(). */
	td2->td_md.md_spinlock_count = 1;
	td2->td_md.md_saved_sstatus_ie = (SSTATUS_SIE);
}
Esempio n. 7
0
/*
 * Try to remove FS references in the specified process.  This function
 * is used during shutdown
 */
static
void
shutdown_cleanup_proc(struct proc *p)
{
	struct filedesc *fdp;
	struct vmspace *vm;

	if (p == NULL)
		return;
	if ((fdp = p->p_fd) != NULL) {
		kern_closefrom(0);
		if (fdp->fd_cdir) {
			cache_drop(&fdp->fd_ncdir);
			vrele(fdp->fd_cdir);
			fdp->fd_cdir = NULL;
		}
		if (fdp->fd_rdir) {
			cache_drop(&fdp->fd_nrdir);
			vrele(fdp->fd_rdir);
			fdp->fd_rdir = NULL;
		}
		if (fdp->fd_jdir) {
			cache_drop(&fdp->fd_njdir);
			vrele(fdp->fd_jdir);
			fdp->fd_jdir = NULL;
		}
	}
	if (p->p_vkernel)
		vkernel_exit(p);
	if (p->p_textvp) {
		vrele(p->p_textvp);
		p->p_textvp = NULL;
	}
	vm = p->p_vmspace;
	if (vm != NULL) {
		pmap_remove_pages(vmspace_pmap(vm),
				  VM_MIN_USER_ADDRESS,
				  VM_MAX_USER_ADDRESS);
		vm_map_remove(&vm->vm_map,
			      VM_MIN_USER_ADDRESS,
			      VM_MAX_USER_ADDRESS);
	}
}
Esempio n. 8
0
int
vm_create(const char *name, struct vm **retvm)
{
	int i;
	struct vm *vm;
	struct vmspace *vmspace;

	const int BSP = 0;

	/*
	 * If vmm.ko could not be successfully initialized then don't attempt
	 * to create the virtual machine.
	 */
	if (!vmm_initialized)
		return (ENXIO);

	if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
		return (EINVAL);

	vmspace = VMSPACE_ALLOC(VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
	if (vmspace == NULL)
		return (ENOMEM);

	vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
	strcpy(vm->name, name);
	vm->vmspace = vmspace;
	mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
	vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
	vm->vioapic = vioapic_init(vm);
	vm->vhpet = vhpet_init(vm);
	vm->vatpic = vatpic_init(vm);

	for (i = 0; i < VM_MAXCPU; i++) {
		vcpu_init(vm, i);
		guest_msrs_init(vm, i);
	}

	vm_activate_cpu(vm, BSP);

	*retvm = vm;
	return (0);
}
Esempio n. 9
0
/*
 * Implement fork's actions on an address space.
 * Here we arrange for the address space to be copied or referenced,
 * allocate a user struct (pcb and kernel stack), then call the
 * machine-dependent layer to fill those in and make the new process
 * ready to run.  The new process is set up so that it returns directly
 * to user mode to avoid stack copying and relocation problems.
 *
 * No requirements.
 */
void
vm_fork(struct proc *p1, struct proc *p2, int flags)
{
	if ((flags & RFPROC) == 0) {
		/*
		 * Divorce the memory, if it is shared, essentially
		 * this changes shared memory amongst threads, into
		 * COW locally.
		 */
		if ((flags & RFMEM) == 0) {
			if (p1->p_vmspace->vm_sysref.refcnt > 1) {
				vmspace_unshare(p1);
			}
		}
		cpu_fork(ONLY_LWP_IN_PROC(p1), NULL, flags);
		return;
	}

	if (flags & RFMEM) {
		vmspace_ref(p1->p_vmspace);
		p2->p_vmspace = p1->p_vmspace;
	}

	while (vm_page_count_severe()) {
		vm_wait(0);
	}

	if ((flags & RFMEM) == 0) {
		p2->p_vmspace = vmspace_fork(p1->p_vmspace);

		pmap_pinit2(vmspace_pmap(p2->p_vmspace));

		if (p1->p_vmspace->vm_shm)
			shmfork(p1, p2);
	}

	pmap_init_proc(p2);
}
Esempio n. 10
0
/*
 * Load a uio.
 */
static int
_bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio,
                     int *nsegs, int flags)
{
    bus_size_t resid;
    bus_size_t minlen;
    struct iovec *iov;
    pmap_t pmap;
    caddr_t addr;
    int error, i;

    if (uio->uio_segflg == UIO_USERSPACE) {
        KASSERT(uio->uio_td != NULL,
                ("bus_dmamap_load_uio: USERSPACE but no proc"));
        pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace);
    } else
        pmap = kernel_pmap;
    resid = uio->uio_resid;
    iov = uio->uio_iov;
    error = 0;

    for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) {
        /*
         * Now at the first iovec to load.  Load each iovec
         * until we have exhausted the residual count.
         */

        addr = (caddr_t) iov[i].iov_base;
        minlen = resid < iov[i].iov_len ? resid : iov[i].iov_len;
        if (minlen > 0) {
            error = _bus_dmamap_load_buffer(dmat, map, addr,
                                            minlen, pmap, flags, NULL, nsegs);
            resid -= minlen;
        }
    }

    return (error);
}
Esempio n. 11
0
static int
vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
{
	int rv, ftype;
	struct vm_map *map;
	struct vcpu *vcpu;
	struct vm_exit *vme;

	vcpu = &vm->vcpu[vcpuid];
	vme = &vcpu->exitinfo;

	ftype = vme->u.paging.fault_type;
	KASSERT(ftype == VM_PROT_READ ||
	    ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
	    ("vm_handle_paging: invalid fault_type %d", ftype));

	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
		    vme->u.paging.gpa, ftype);
		if (rv == 0)
			goto done;
	}

	map = &vm->vmspace->vm_map;
	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);

	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
	    "ftype = %d", rv, vme->u.paging.gpa, ftype);

	if (rv != KERN_SUCCESS)
		return (EFAULT);
done:
	/* restart execution at the faulting instruction */
	vme->inst_length = 0;

	return (0);
}
Esempio n. 12
0
int
vm_run(struct vm *vm, struct vm_run *vmrun)
{
	int error, vcpuid;
	struct vcpu *vcpu;
	struct pcb *pcb;
	uint64_t tscval, rip;
	struct vm_exit *vme;
	bool retu, intr_disabled;
	pmap_t pmap;

	vcpuid = vmrun->cpuid;

	if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
		return (EINVAL);

	pmap = vmspace_pmap(vm->vmspace);
	vcpu = &vm->vcpu[vcpuid];
	vme = &vcpu->exitinfo;
	rip = vmrun->rip;
restart:
	critical_enter();

	KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
	    ("vm_run: absurd pm_active"));

	tscval = rdtsc();

	pcb = PCPU_GET(curpcb);
	set_pcb_flags(pcb, PCB_FULL_IRET);

	restore_guest_msrs(vm, vcpuid);	
	restore_guest_fpustate(vcpu);

	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
	error = VMRUN(vm->cookie, vcpuid, rip, pmap, &vm->rendezvous_func);
	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);

	save_guest_fpustate(vcpu);
	restore_host_msrs(vm, vcpuid);

	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);

	critical_exit();

	if (error == 0) {
		retu = false;
		switch (vme->exitcode) {
		case VM_EXITCODE_IOAPIC_EOI:
			vioapic_process_eoi(vm, vcpuid,
			    vme->u.ioapic_eoi.vector);
			break;
		case VM_EXITCODE_RENDEZVOUS:
			vm_handle_rendezvous(vm, vcpuid);
			error = 0;
			break;
		case VM_EXITCODE_HLT:
			intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
			error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
			break;
		case VM_EXITCODE_PAGING:
			error = vm_handle_paging(vm, vcpuid, &retu);
			break;
		case VM_EXITCODE_INST_EMUL:
			error = vm_handle_inst_emul(vm, vcpuid, &retu);
			break;
		default:
			retu = true;	/* handled in userland */
			break;
		}
	}

	if (error == 0 && retu == false) {
		rip = vme->rip + vme->inst_length;
		goto restart;
	}

	/* copy the exit information */
	bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
	return (error);
}
Esempio n. 13
0
size_t
md_stack_capture_curthread(caddr_t *pcs, size_t size)
{
        struct trapframe *tf = curthread->td_intr_frame;
	pmap_t pmap = vmspace_pmap(curthread->td_proc->p_vmspace);
	
        size_t num_kstacks = 0, num_ustacks = 0;

#if SAMPLE_DEBUG > 2
	printf("%s(%d):  curthread pid %u, tid %u, name %s\n", __FUNCTION__, __LINE__, curthread->td_proc->p_pid, curthread->td_tid, curthread->td_name);
#endif
	
        if (tf == NULL) {
#if SAMPLE_DEBUG > 5
		printf("%s(%d):  intr frame is NULL?\n", __FUNCTION__, __LINE__);
#endif
//		tf = curthread->td_frame;
		/*
		 * I'm not sure what this means.  This should be done via interrupt,
		 * so if curthread wasn't interrupted, then I don't think we can get
		 * anything here?  Please tell me what I'm doing wrong.
		 */
		return 0;
	}

        if (!TRAPF_USERMODE(tf)) {
                // Start with kernel mode
                unsigned long callpc;
                struct amd64_frame *frame;

                frame = (struct amd64_frame*)tf->tf_rbp;

                while (num_kstacks < size) {
                        if (!INKERNEL((long)frame))
                                break;
                        callpc = frame->f_retaddr;
                        if (!INKERNEL(callpc))
                                break;
                        pcs[num_kstacks++] = (caddr_t)callpc;
                        if (frame->f_frame <= frame ||
                            (vm_offset_t)frame->f_frame >= (vm_offset_t)tf->tf_rbp + KSTACK_PAGES * PAGE_SIZE)
                                break;
                        frame = frame->f_frame;
                }
                tf = curthread->td_frame;
        }
        if (TRAPF_USERMODE(tf)) {
                caddr_t *start_pc = pcs + num_kstacks;
                struct amd64_frame frame;

#if SAMPLE_DEBUG > 3
		printf("%s(%d):  doing user mode crawl\n", __FUNCTION__, __LINE__);
#endif
                frame.f_frame = (struct amd64_frame*)tf->tf_rbp;
                if (tf != curthread->td_intr_frame) {
                        start_pc[num_ustacks++] = (caddr_t)tf->tf_rip;
                }
                while ((num_kstacks + num_ustacks) < size) {
                        void *bp = frame.f_frame;
			/*
			 * The calls to GET_WORD replace the non-functional:
			 * copyin_nofault((void*)frame.f_frame, &frame, sizeof(frame));
			 */
			
			frame.f_frame = (struct amd64_frame*)GET_WORD(pmap, (caddr_t)frame.f_frame);
			if (frame.f_frame == 0) {
#if SAMPLE_DEBUG > 4
				printf("%s(%d):  %s:  frame is 0\n", __FUNCTION__, __LINE__, curthread->td_name);
#endif
				break;
			}
			frame.f_retaddr = (long)GET_WORD(pmap, (caddr_t)frame.f_frame + offsetof(struct amd64_frame, f_retaddr));
			if (frame.f_retaddr == 0) {
#if SAMPLE_DEBUG > 4
				printf("%s(%d):  %s:  retaddr from frame %p is 0\n", __FUNCTION__, __LINE__, curthread->td_name, (void*)frame.f_frame);
#endif
				break;
			}
#if SAMPLE_DEBUG > 2
			printf("%s(%d):  f_frame = %ld, f_retaddr = %ld\n", __FUNCTION__, __LINE__, (long)frame.f_frame, (long)frame.f_retaddr);
#endif
			start_pc[num_ustacks++] = (caddr_t)frame.f_retaddr;
			if ((void*)frame.f_frame < bp) {
				break;
			}
                }
        } else {
Esempio n. 14
0
/*
 * The map entries can *almost* be read with programs like cat.  However,
 * large maps need special programs to read.  It is not easy to implement
 * a program that can sense the required size of the buffer, and then
 * subsequently do a read with the appropriate size.  This operation cannot
 * be atomic.  The best that we can do is to allow the program to do a read
 * with an arbitrarily large buffer, and return as much as we can.  We can
 * return an error code if the buffer is too small (EFBIG), then the program
 * can try a bigger buffer.
 */
int
procfs_domap(struct proc *curp, struct lwp *lp, struct pfsnode *pfs,
	     struct uio *uio)
{
	struct proc *p = lp->lwp_proc;
	int len;
	struct vnode *vp;
	char *fullpath, *freepath;
	int error;
	vm_map_t map = &p->p_vmspace->vm_map;
	pmap_t pmap = vmspace_pmap(p->p_vmspace);
	vm_map_entry_t entry;
	char mebuffer[MEBUFFERSIZE];

	if (uio->uio_rw != UIO_READ)
		return (EOPNOTSUPP);

	if (uio->uio_offset != 0)
		return (0);
	
	error = 0;
	vm_map_lock_read(map);
	for (entry = map->header.next;
		((uio->uio_resid > 0) && (entry != &map->header));
		entry = entry->next) {
		vm_object_t obj, tobj, lobj;
		int ref_count, shadow_count, flags;
		vm_offset_t addr;
		vm_offset_t ostart;
		int resident, privateresident;
		char *type;

		if (entry->maptype != VM_MAPTYPE_NORMAL &&
		    entry->maptype != VM_MAPTYPE_VPAGETABLE) {
			continue;
		}

		obj = entry->object.vm_object;
		if (obj)
			vm_object_hold(obj);

		if (obj && (obj->shadow_count == 1))
			privateresident = obj->resident_page_count;
		else
			privateresident = 0;

		/*
		 * Use map->hint as a poor man's ripout detector.
		 */
		map->hint = entry;
		ostart = entry->start;

		/*
		 * Count resident pages (XXX can be horrible on 64-bit)
		 */
		resident = 0;
		addr = entry->start;
		while (addr < entry->end) {
			if (pmap_extract(pmap, addr))
				resident++;
			addr += PAGE_SIZE;
		}
		if (obj) {
			lobj = obj;
			while ((tobj = lobj->backing_object) != NULL) {
				KKASSERT(tobj != obj);
				vm_object_hold(tobj);
				if (tobj == lobj->backing_object) {
					if (lobj != obj) {
						vm_object_lock_swap();
						vm_object_drop(lobj);
					}
					lobj = tobj;
				} else {
					vm_object_drop(tobj);
				}
			}
		} else {
			lobj = NULL;
		}

		freepath = NULL;
		fullpath = "-";
		if (lobj) {
			switch(lobj->type) {
			default:
			case OBJT_DEFAULT:
				type = "default";
				vp = NULL;
				break;
			case OBJT_VNODE:
				type = "vnode";
				vp = lobj->handle;
				vref(vp);
				break;
			case OBJT_SWAP:
				type = "swap";
				vp = NULL;
				break;
			case OBJT_DEVICE:
				type = "device";
				vp = NULL;
				break;
			}
			
			flags = obj->flags;
			ref_count = obj->ref_count;
			shadow_count = obj->shadow_count;
			if (vp != NULL) {
				vn_fullpath(p, vp, &fullpath, &freepath, 1);
				vrele(vp);
			}
			if (lobj != obj)
				vm_object_drop(lobj);
		} else {
			type = "none";
			flags = 0;
			ref_count = 0;
			shadow_count = 0;
		}

		/*
		 * format:
		 *  start, end, res, priv res, cow, access, type, (fullpath).
		 */
		ksnprintf(mebuffer, sizeof(mebuffer),
#if LONG_BIT == 64
			  "0x%016lx 0x%016lx %d %d %p %s%s%s %d %d "
#else
			  "0x%08lx 0x%08lx %d %d %p %s%s%s %d %d "
#endif
			  "0x%04x %s %s %s %s\n",
			(u_long)entry->start, (u_long)entry->end,
			resident, privateresident, obj,
			(entry->protection & VM_PROT_READ)?"r":"-",
			(entry->protection & VM_PROT_WRITE)?"w":"-",
			(entry->protection & VM_PROT_EXECUTE)?"x":"-",
			ref_count, shadow_count, flags,
			(entry->eflags & MAP_ENTRY_COW)?"COW":"NCOW",
			(entry->eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC",
			type, fullpath);

		if (obj)
			vm_object_drop(obj);

		if (freepath != NULL) {
			kfree(freepath, M_TEMP);
			freepath = NULL;
		}

		len = strlen(mebuffer);
		if (len > uio->uio_resid) {
			error = EFBIG;
			break;
		}

		/*
		 * We cannot safely hold the map locked while accessing
		 * userspace as a VM fault might recurse the locked map.
		 */
		vm_map_unlock_read(map);
		error = uiomove(mebuffer, len, uio);
		vm_map_lock_read(map);
		if (error)
			break;

		/*
		 * We use map->hint as a poor man's ripout detector.  If
		 * it does not match the entry we set it to prior to
		 * unlocking the map the entry MIGHT now be stale.  In
		 * this case we do an expensive lookup to find our place
		 * in the iteration again.
		 */
		if (map->hint != entry) {
			vm_map_entry_t reentry;

			vm_map_lookup_entry(map, ostart, &reentry);
			entry = reentry;
		}
	}
	vm_map_unlock_read(map);

	return error;
}
Esempio n. 15
0
void
abort_handler(struct trapframe *tf, int type)
{
	struct vm_map *map;
	struct pcb *pcb;
	struct thread *td;
	u_int user, far, fsr;
	vm_prot_t ftype;
	void *onfault;
	vm_offset_t va;
	int error = 0;
	struct ksig ksig;
	struct proc *p;

	if (type == 1)
		return (prefetch_abort_handler(tf));

	/* Grab FAR/FSR before enabling interrupts */
	far = cpu_faultaddress();
	fsr = cpu_faultstatus();
#if 0
	printf("data abort: fault address=%p (from pc=%p lr=%p)\n",
	       (void*)far, (void*)tf->tf_pc, (void*)tf->tf_svc_lr);
#endif

	/* Update vmmeter statistics */
#if 0
	vmexp.traps++;
#endif

	td = curthread;
	p = td->td_proc;

	PCPU_INC(cnt.v_trap);
	/* Data abort came from user mode? */
	user = TRAP_USERMODE(tf);

	if (user) {
		td->td_pticks = 0;
		td->td_frame = tf;
		if (td->td_cowgen != td->td_proc->p_cowgen)
			thread_cow_update(td);

	}
	/* Grab the current pcb */
	pcb = td->td_pcb;
	/* Re-enable interrupts if they were enabled previously */
	if (td->td_md.md_spinlock_count == 0) {
		if (__predict_true(tf->tf_spsr & PSR_I) == 0)
			enable_interrupts(PSR_I);
		if (__predict_true(tf->tf_spsr & PSR_F) == 0)
			enable_interrupts(PSR_F);
	}


	/* Invoke the appropriate handler, if necessary */
	if (__predict_false(data_aborts[fsr & FAULT_TYPE_MASK].func != NULL)) {
		if ((data_aborts[fsr & FAULT_TYPE_MASK].func)(tf, fsr, far,
		    td, &ksig)) {
			goto do_trapsignal;
		}
		goto out;
	}

	/*
	 * At this point, we're dealing with one of the following data aborts:
	 *
	 *  FAULT_TRANS_S  - Translation -- Section
	 *  FAULT_TRANS_P  - Translation -- Page
	 *  FAULT_DOMAIN_S - Domain -- Section
	 *  FAULT_DOMAIN_P - Domain -- Page
	 *  FAULT_PERM_S   - Permission -- Section
	 *  FAULT_PERM_P   - Permission -- Page
	 *
	 * These are the main virtual memory-related faults signalled by
	 * the MMU.
	 */

	/*
	 * Make sure the Program Counter is sane. We could fall foul of
	 * someone executing Thumb code, in which case the PC might not
	 * be word-aligned. This would cause a kernel alignment fault
	 * further down if we have to decode the current instruction.
	 * XXX: It would be nice to be able to support Thumb at some point.
	 */
	if (__predict_false((tf->tf_pc & 3) != 0)) {
		if (user) {
			/*
			 * Give the user an illegal instruction signal.
			 */
			/* Deliver a SIGILL to the process */
			ksig.signb = SIGILL;
			ksig.code = 0;
			goto do_trapsignal;
		}

		/*
		 * The kernel never executes Thumb code.
		 */
		printf("\ndata_abort_fault: Misaligned Kernel-mode "
		    "Program Counter\n");
		dab_fatal(tf, fsr, far, td, &ksig);
	}

	va = trunc_page((vm_offset_t)far);

	/*
	 * It is only a kernel address space fault iff:
	 *	1. user == 0  and
	 *	2. pcb_onfault not set or
	 *	3. pcb_onfault set and not LDRT/LDRBT/STRT/STRBT instruction.
	 */
	if (user == 0 && (va >= VM_MIN_KERNEL_ADDRESS ||
	    (va < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW)) &&
	    __predict_true((pcb->pcb_onfault == NULL ||
	     (ReadWord(tf->tf_pc) & 0x05200000) != 0x04200000))) {
		map = kernel_map;

		/* Was the fault due to the FPE/IPKDB ? */
		if (__predict_false((tf->tf_spsr & PSR_MODE)==PSR_UND32_MODE)) {

			/*
			 * Force exit via userret()
			 * This is necessary as the FPE is an extension to
			 * userland that actually runs in a priveledged mode
			 * but uses USR mode permissions for its accesses.
			 */
			user = 1;
			ksig.signb = SIGSEGV;
			ksig.code = 0;
			goto do_trapsignal;
		}
	} else {
		map = &td->td_proc->p_vmspace->vm_map;
	}

	/*
	 * We need to know whether the page should be mapped as R or R/W.
	 * On armv4, the fault status register does not indicate whether
	 * the access was a read or write.  We know that a permission fault
	 * can only be the result of a write to a read-only location, so we
	 * can deal with those quickly.  Otherwise we need to disassemble
	 * the faulting instruction to determine if it was a write.
	 */
	if (IS_PERMISSION_FAULT(fsr))
		ftype = VM_PROT_WRITE;
	else {
		u_int insn = ReadWord(tf->tf_pc);

		if (((insn & 0x0c100000) == 0x04000000) ||	/* STR/STRB */
		    ((insn & 0x0e1000b0) == 0x000000b0) ||	/* STRH/STRD */
		    ((insn & 0x0a100000) == 0x08000000)) {	/* STM/CDT */
			ftype = VM_PROT_WRITE;
		} else {
			if ((insn & 0x0fb00ff0) == 0x01000090)	/* SWP */
				ftype = VM_PROT_READ | VM_PROT_WRITE;
			else
				ftype = VM_PROT_READ;
		}
	}

	/*
	 * See if the fault is as a result of ref/mod emulation,
	 * or domain mismatch.
	 */
#ifdef DEBUG
	last_fault_code = fsr;
#endif
	if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK,
	    NULL, "Kernel page fault") != 0)
		goto fatal_pagefault;

	if (pmap_fault_fixup(vmspace_pmap(td->td_proc->p_vmspace), va, ftype,
	    user)) {
		goto out;
	}

	onfault = pcb->pcb_onfault;
	pcb->pcb_onfault = NULL;
	error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
	pcb->pcb_onfault = onfault;
	if (__predict_true(error == 0))
		goto out;
fatal_pagefault:
	if (user == 0) {
		if (pcb->pcb_onfault) {
			tf->tf_r0 = error;
			tf->tf_pc = (register_t)(intptr_t) pcb->pcb_onfault;
			return;
		}

		printf("\nvm_fault(%p, %x, %x, 0) -> %x\n", map, va, ftype,
		    error);
		dab_fatal(tf, fsr, far, td, &ksig);
	}


	if (error == ENOMEM) {
		printf("VM: pid %d (%s), uid %d killed: "
		    "out of swap\n", td->td_proc->p_pid, td->td_name,
		    (td->td_proc->p_ucred) ?
		     td->td_proc->p_ucred->cr_uid : -1);
		ksig.signb = SIGKILL;
	} else {
		ksig.signb = SIGSEGV;
	}
	ksig.code = 0;
do_trapsignal:
	call_trapsignal(td, ksig.signb, ksig.code);
out:
	/* If returning to user mode, make sure to invoke userret() */
	if (user)
		userret(td, tf);
}
/*
 * Utility function to load a linear buffer.  lastaddrp holds state
 * between invocations (for multiple-buffer loads).  segp contains
 * the starting segment on entrance, and the ending segment on exit.
 * first indicates if this is the first invocation of this function.
 */
static int
bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dma_segment_t segs[],
    void *buf, bus_size_t buflen, struct thread *td, int flags,
    vm_offset_t *lastaddrp, int *segp, int first)
{
	bus_size_t sgsize;
	bus_addr_t curaddr, lastaddr, baddr, bmask;
	vm_offset_t vaddr = (vm_offset_t)buf;
	int seg;
	pmap_t pmap;

	if (td != NULL)
		pmap = vmspace_pmap(td->td_proc->p_vmspace);
	else
		pmap = NULL;

	lastaddr = *lastaddrp;
	bmask = ~(dmat->boundary - 1);

	for (seg = *segp; buflen > 0 ; ) {
		/*
		 * Get the physical address for this segment.
		 */
		if (pmap)
			curaddr = pmap_extract(pmap, vaddr);
		else
			curaddr = pmap_kextract(vaddr);

		/*
		 * Compute the segment size, and adjust counts.
		 */
		sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK);
		if (sgsize > dmat->maxsegsz)
			sgsize = dmat->maxsegsz;
		if (buflen < sgsize)
			sgsize = buflen;

		/*
		 * Make sure we don't cross any boundaries.
		 */
		if (dmat->boundary > 0) {
			baddr = (curaddr + dmat->boundary) & bmask;
			if (sgsize > (baddr - curaddr))
				sgsize = (baddr - curaddr);
		}

		/*
		 * Insert chunk into a segment, coalescing with
		 * the previous segment if possible.
		 */
		if (first) {
			segs[seg].ds_addr = curaddr;
			segs[seg].ds_len = sgsize;
			first = 0;
		} else {
			if (curaddr == lastaddr &&
			    (segs[seg].ds_len + sgsize) <= dmat->maxsegsz &&
			    (dmat->boundary == 0 ||
			     (segs[seg].ds_addr & bmask) == (curaddr & bmask)))
				segs[seg].ds_len += sgsize;
			else {
				if (++seg >= dmat->nsegments)
					break;
				segs[seg].ds_addr = curaddr;
				segs[seg].ds_len = sgsize;
			}
		}

		lastaddr = curaddr + sgsize;
		vaddr += sgsize;
		buflen -= sgsize;
	}

	*segp = seg;
	*lastaddrp = lastaddr;

	/*
	 * Did we fit?
	 */
	return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */
}
Esempio n. 17
0
/*
 * Finish a fork operation, with process p2 nearly set up.
 * Copy and update the pcb, set up the stack so that the child
 * ready to run and return to user mode.
 */
void
cpu_fork(register struct thread *td1, register struct proc *p2,
    struct thread *td2, int flags)
{
	struct pcb *pcb2;
	struct trapframe *tf;
	struct mdproc *mdp2;

	if ((flags & RFPROC) == 0)
		return;

	/* Point the pcb to the top of the stack */
	pcb2 = (struct pcb *)
	    (td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1;
#ifdef __XSCALE__
#ifndef CPU_XSCALE_CORE3
	pmap_use_minicache(td2->td_kstack, td2->td_kstack_pages * PAGE_SIZE);
#endif
#endif
	td2->td_pcb = pcb2;
	
	/* Clone td1's pcb */
	bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
	
	/* Point to mdproc and then copy over td1's contents */
	mdp2 = &p2->p_md;
	bcopy(&td1->td_proc->p_md, mdp2, sizeof(*mdp2));

	/* Point the frame to the stack in front of pcb and copy td1's frame */
	td2->td_frame = (struct trapframe *)pcb2 - 1;
	*td2->td_frame = *td1->td_frame;

	/*
	 * Create a new fresh stack for the new process.
	 * Copy the trap frame for the return to user mode as if from a
	 * syscall.  This copies most of the user mode register values.
	 */
	pmap_set_pcb_pagedir(vmspace_pmap(p2->p_vmspace), pcb2);
	pcb2->pcb_regs.sf_r4 = (register_t)fork_return;
	pcb2->pcb_regs.sf_r5 = (register_t)td2;
	pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline;
	pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame);

	pcb2->pcb_vfpcpu = -1;
	pcb2->pcb_vfpstate.fpscr = VFPSCR_DN | VFPSCR_FZ;
	
	tf = td2->td_frame;
	tf->tf_spsr &= ~PSR_C;
	tf->tf_r0 = 0;
	tf->tf_r1 = 0;


	/* Setup to release spin count in fork_exit(). */
	td2->td_md.md_spinlock_count = 1;
	td2->td_md.md_saved_cspr = PSR_SVC32_MODE;;
#ifdef ARM_TP_ADDRESS
	td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS;
#else
	td2->td_md.md_tp = td1->td_md.md_tp;
#endif
}
Esempio n. 18
0
int
acpi_sleep_machdep(struct acpi_softc *sc, int state)
{
	ACPI_STATUS		status;
	vm_offset_t		oldphys;
	struct pmap		*pm;
	vm_page_t		page;
	static vm_page_t	opage = NULL;
	int			ret = 0;
	int			pteobj_allocated = 0;
	u_long			ef;
	struct proc		*p;

	if (sc->acpi_wakeaddr == 0) {
		return (0);
	}

	AcpiSetFirmwareWakingVector(sc->acpi_wakephys);

	ef = read_eflags();
	disable_intr();

	/* Create Identity Mapping */
	if ((p = curproc) == NULL)
		p = &proc0;
	pm = vmspace_pmap(p->p_vmspace);
	if (pm->pm_pteobj == NULL) {
		pm->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI + 1);
		pteobj_allocated = 1;
	}

	oldphys = pmap_extract(pm, sc->acpi_wakephys);
	if (oldphys) {
		opage = PHYS_TO_VM_PAGE(oldphys);
	}
	page = PHYS_TO_VM_PAGE(sc->acpi_wakephys);
	pmap_enter(pm, sc->acpi_wakephys, page,
		   VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE, 1);

	ret_addr = 0;
	if (acpi_savecpu()) {
		/* Execute Sleep */
		p_gdt = (struct region_descriptor *)(sc->acpi_wakeaddr + physical_gdt);
		p_gdt->rd_limit = r_gdt.rd_limit;
		p_gdt->rd_base = vtophys(r_gdt.rd_base);

		WAKECODE_FIXUP(physical_esp, u_int32_t, vtophys(r_esp));
		WAKECODE_FIXUP(previous_cr0, u_int32_t, r_cr0);
		WAKECODE_FIXUP(previous_cr2, u_int32_t, r_cr2);
		WAKECODE_FIXUP(previous_cr3, u_int32_t, r_cr3);
		WAKECODE_FIXUP(previous_cr4, u_int32_t, r_cr4);

		WAKECODE_FIXUP(previous_tr,  u_int16_t, r_tr);
		WAKECODE_BCOPY(previous_gdt, struct region_descriptor, r_gdt);
		WAKECODE_FIXUP(previous_ldt, u_int16_t, r_ldt);
		WAKECODE_BCOPY(previous_idt, struct region_descriptor, r_idt);

		WAKECODE_FIXUP(where_to_recover, void, acpi_restorecpu);

		WAKECODE_FIXUP(previous_ds,  u_int16_t, r_ds);
		WAKECODE_FIXUP(previous_es,  u_int16_t, r_es);
		WAKECODE_FIXUP(previous_fs,  u_int16_t, r_fs);
		WAKECODE_FIXUP(previous_gs,  u_int16_t, r_gs);
		WAKECODE_FIXUP(previous_ss,  u_int16_t, r_ss);

		if (acpi_get_verbose(sc)) {
			acpi_printcpu();
		}

		wbinvd(); 

		if (state == ACPI_STATE_S4 && sc->acpi_s4bios) {
			status = AcpiEnterSleepStateS4Bios();
		} else {
			status = AcpiEnterSleepState(state);
		}

		if (status != AE_OK) {
			device_printf(sc->acpi_dev,
				"AcpiEnterSleepState failed - %s\n",
				AcpiFormatException(status));
			ret = -1;
			goto out;
		}

		for (;;) ;
	} else {
		/* Execute Wakeup */
#if 0
		initializecpu();
#endif
		icu_reinit();

		if (acpi_get_verbose(sc)) {
			acpi_savecpu();
			acpi_printcpu();
		}
	}

out:
	vm_page_lock_queues();
	pmap_remove(pm, sc->acpi_wakephys, sc->acpi_wakephys + PAGE_SIZE);
	vm_page_unlock_queues();
	if (opage) {
		pmap_enter(pm, sc->acpi_wakephys, page,
			   VM_PROT_READ | VM_PROT_WRITE, 0);
	}

	if (pteobj_allocated) {
		vm_object_deallocate(pm->pm_pteobj);
		pm->pm_pteobj = NULL;
	}

	write_eflags(ef);

	return (ret);
}
Esempio n. 19
0
/*
 * Destroy old address space, and allocate a new stack.
 *	The new stack is only sgrowsiz large because it is grown
 *	automatically on a page fault.
 */
int
exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
{
	int error;
	struct proc *p = imgp->proc;
	struct vmspace *vmspace = p->p_vmspace;
	vm_object_t obj;
	struct rlimit rlim_stack;
	vm_offset_t sv_minuser, stack_addr;
	vm_map_t map;
	u_long ssiz;

	imgp->vmspace_destroyed = 1;
	imgp->sysent = sv;

	/* May be called with Giant held */
	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);

	/*
	 * Blow away entire process VM, if address space not shared,
	 * otherwise, create a new VM space so that other threads are
	 * not disrupted
	 */
	map = &vmspace->vm_map;
	if (map_at_zero)
		sv_minuser = sv->sv_minuser;
	else
		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser &&
	    vm_map_max(map) == sv->sv_maxuser &&
	    cpu_exec_vmspace_reuse(p, map)) {
		shmexit(vmspace);
		pmap_remove_pages(vmspace_pmap(vmspace));
		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
		/*
		 * An exec terminates mlockall(MCL_FUTURE), ASLR state
		 * must be re-evaluated.
		 */
		vm_map_lock(map);
		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
		    MAP_ASLR_IGNSTART);
		vm_map_unlock(map);
	} else {
		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
		if (error)
			return (error);
		vmspace = p->p_vmspace;
		map = &vmspace->vm_map;
	}
	map->flags |= imgp->map_flags;

	/* Map a shared page */
	obj = sv->sv_shared_page_obj;
	if (obj != NULL) {
		vm_object_reference(obj);
		error = vm_map_fixed(map, obj, 0,
		    sv->sv_shared_page_base, sv->sv_shared_page_len,
		    VM_PROT_READ | VM_PROT_EXECUTE,
		    VM_PROT_READ | VM_PROT_EXECUTE,
		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
		if (error != KERN_SUCCESS) {
			vm_object_deallocate(obj);
			return (vm_mmap_to_errno(error));
		}
	}

	/* Allocate a new stack */
	if (imgp->stack_sz != 0) {
		ssiz = trunc_page(imgp->stack_sz);
		PROC_LOCK(p);
		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
		PROC_UNLOCK(p);
		if (ssiz > rlim_stack.rlim_max)
			ssiz = rlim_stack.rlim_max;
		if (ssiz > rlim_stack.rlim_cur) {
			rlim_stack.rlim_cur = ssiz;
			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
		}
	} else if (sv->sv_maxssiz != NULL) {
		ssiz = *sv->sv_maxssiz;
	} else {
		ssiz = maxssiz;
	}
	stack_addr = sv->sv_usrstack - ssiz;
	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz,
	    obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
	    sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
	if (error != KERN_SUCCESS)
		return (vm_mmap_to_errno(error));

	/*
	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
	 * are still used to enforce the stack rlimit on the process stack.
	 */
	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
	vmspace->vm_maxsaddr = (char *)stack_addr;

	return (0);
}
Esempio n. 20
0
/*
 * Abort handler.
 *
 * FAR, FSR, and everything what can be lost after enabling
 * interrupts must be grabbed before the interrupts will be
 * enabled. Note that when interrupts will be enabled, we
 * could even migrate to another CPU ...
 *
 * TODO: move quick cases to ASM
 */
void
abort_handler(struct trapframe *tf, int prefetch)
{
	struct thread *td;
	vm_offset_t far, va;
	int idx, rv;
	uint32_t fsr;
	struct ksig ksig;
	struct proc *p;
	struct pcb *pcb;
	struct vm_map *map;
	struct vmspace *vm;
	vm_prot_t ftype;
	bool usermode;
#ifdef INVARIANTS
	void *onfault;
#endif
	td = curthread;
	fsr = (prefetch) ? cp15_ifsr_get(): cp15_dfsr_get();
#if __ARM_ARCH >= 7
	far = (prefetch) ? cp15_ifar_get() : cp15_dfar_get();
#else
	far = (prefetch) ? TRAPF_PC(tf) : cp15_dfar_get();
#endif

	idx = FSR_TO_FAULT(fsr);
	usermode = TRAPF_USERMODE(tf);	/* Abort came from user mode? */
	if (usermode)
		td->td_frame = tf;

	CTR6(KTR_TRAP, "%s: fsr %#x (idx %u) far %#x prefetch %u usermode %d",
	    __func__, fsr, idx, far, prefetch, usermode);

	/*
	 * Firstly, handle aborts that are not directly related to mapping.
	 */
	if (__predict_false(idx == FAULT_EA_IMPREC)) {
		abort_imprecise(tf, fsr, prefetch, usermode);
		return;
	}

	if (__predict_false(idx == FAULT_DEBUG)) {
		abort_debug(tf, fsr, prefetch, usermode, far);
		return;
	}

	/*
	 * ARM has a set of unprivileged load and store instructions
	 * (LDRT/LDRBT/STRT/STRBT ...) which are supposed to be used in other
	 * than user mode and OS should recognize their aborts and behave
	 * appropriately. However, there is no way how to do that reasonably
	 * in general unless we restrict the handling somehow.
	 *
	 * For now, these instructions are used only in copyin()/copyout()
	 * like functions where usermode buffers are checked in advance that
	 * they are not from KVA space. Thus, no action is needed here.
	 */

#ifdef ARM_NEW_PMAP
	rv = pmap_fault(PCPU_GET(curpmap), far, fsr, idx, usermode);
	if (rv == 0) {
		return;
	} else if (rv == EFAULT) {

		call_trapsignal(td, SIGSEGV, SEGV_MAPERR, far);
		userret(td, tf);
		return;
	}
#endif
	/*
	 * Now, when we handled imprecise and debug aborts, the rest of
	 * aborts should be really related to mapping.
	 */

	PCPU_INC(cnt.v_trap);

#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		goto out;
	}
#endif
	if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
		/*
		 * Due to both processor errata and lazy TLB invalidation when
		 * access restrictions are removed from virtual pages, memory
		 * accesses that are allowed by the physical mapping layer may
		 * nonetheless cause one spurious page fault per virtual page.
		 * When the thread is executing a "no faulting" section that
		 * is bracketed by vm_fault_{disable,enable}_pagefaults(),
		 * every page fault is treated as a spurious page fault,
		 * unless it accesses the same virtual address as the most
		 * recent page fault within the same "no faulting" section.
		 */
		if (td->td_md.md_spurflt_addr != far ||
		    (td->td_pflags & TDP_RESETSPUR) != 0) {
			td->td_md.md_spurflt_addr = far;
			td->td_pflags &= ~TDP_RESETSPUR;

			tlb_flush_local(far & ~PAGE_MASK);
			return;
		}
	} else {
		/*
		 * If we get a page fault while in a critical section, then
		 * it is most likely a fatal kernel page fault.  The kernel
		 * is already going to panic trying to get a sleep lock to
		 * do the VM lookup, so just consider it a fatal trap so the
		 * kernel can print out a useful trap message and even get
		 * to the debugger.
		 *
		 * If we get a page fault while holding a non-sleepable
		 * lock, then it is most likely a fatal kernel page fault.
		 * If WITNESS is enabled, then it's going to whine about
		 * bogus LORs with various VM locks, so just skip to the
		 * fatal trap handling directly.
		 */
		if (td->td_critnest != 0 ||
		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
		    "Kernel page fault") != 0) {
			abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
			return;
		}
	}

	/* Re-enable interrupts if they were enabled previously. */
	if (td->td_md.md_spinlock_count == 0) {
		if (__predict_true(tf->tf_spsr & PSR_I) == 0)
			enable_interrupts(PSR_I);
		if (__predict_true(tf->tf_spsr & PSR_F) == 0)
			enable_interrupts(PSR_F);
	}

	p = td->td_proc;
	if (usermode) {
		td->td_pticks = 0;
		if (td->td_cowgen != p->p_cowgen)
			thread_cow_update(td);
	}

	/* Invoke the appropriate handler, if necessary. */
	if (__predict_false(aborts[idx].func != NULL)) {
		if ((aborts[idx].func)(tf, idx, fsr, far, prefetch, td, &ksig))
			goto do_trapsignal;
		goto out;
	}

	/*
	 * Don't pass faulting cache operation to vm_fault(). We don't want
	 * to handle all vm stuff at this moment.
	 */
	pcb = td->td_pcb;
	if (__predict_false(pcb->pcb_onfault == cachebailout)) {
		tf->tf_r0 = far;		/* return failing address */
		tf->tf_pc = (register_t)pcb->pcb_onfault;
		return;
	}

	/* Handle remaining I-cache aborts. */
	if (idx == FAULT_ICACHE) {
		if (abort_icache(tf, idx, fsr, far, prefetch, td, &ksig))
			goto do_trapsignal;
		goto out;
	}

	/*
	 * At this point, we're dealing with one of the following aborts:
	 *
	 *  FAULT_TRAN_xx  - Translation
	 *  FAULT_PERM_xx  - Permission
	 *
	 * These are the main virtual memory-related faults signalled by
	 * the MMU.
	 */

	/* fusubailout is used by [fs]uswintr to avoid page faulting. */
	pcb = td->td_pcb;
	if (__predict_false(pcb->pcb_onfault == fusubailout)) {
		tf->tf_r0 = EFAULT;
		tf->tf_pc = (register_t)pcb->pcb_onfault;
		return;
	}

	va = trunc_page(far);
	if (va >= KERNBASE) {
		/*
		 * Don't allow user-mode faults in kernel address space.
		 */
		if (usermode)
			goto nogo;

		map = kernel_map;
	} else {
		/*
		 * This is a fault on non-kernel virtual memory. If curproc
		 * is NULL or curproc->p_vmspace is NULL the fault is fatal.
		 */
		vm = (p != NULL) ? p->p_vmspace : NULL;
		if (vm == NULL)
			goto nogo;

		map = &vm->vm_map;
		if (!usermode && (td->td_intr_nesting_level != 0 ||
		    pcb->pcb_onfault == NULL)) {
			abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
			return;
		}
	}

	ftype = (fsr & FSR_WNR) ? VM_PROT_WRITE : VM_PROT_READ;
	if (prefetch)
		ftype |= VM_PROT_EXECUTE;

#ifdef DEBUG
	last_fault_code = fsr;
#endif

#ifndef ARM_NEW_PMAP
	if (pmap_fault_fixup(vmspace_pmap(td->td_proc->p_vmspace), va, ftype,
	    usermode)) {
		goto out;
	}
#endif

#ifdef INVARIANTS
	onfault = pcb->pcb_onfault;
	pcb->pcb_onfault = NULL;
#endif

	/* Fault in the page. */
	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);

#ifdef INVARIANTS
	pcb->pcb_onfault = onfault;
#endif

	if (__predict_true(rv == KERN_SUCCESS))
		goto out;
nogo:
	if (!usermode) {
		if (td->td_intr_nesting_level == 0 &&
		    pcb->pcb_onfault != NULL) {
			tf->tf_r0 = rv;
			tf->tf_pc = (int)pcb->pcb_onfault;
			return;
		}
		CTR2(KTR_TRAP, "%s: vm_fault() failed with %d", __func__, rv);
		abort_fatal(tf, idx, fsr, far, prefetch, td, &ksig);
		return;
	}

	ksig.sig = SIGSEGV;
	ksig.code = (rv == KERN_PROTECTION_FAILURE) ? SEGV_ACCERR : SEGV_MAPERR;
	ksig.addr = far;

do_trapsignal:
	call_trapsignal(td, ksig.sig, ksig.code, ksig.addr);
out:
	if (usermode)
		userret(td, tf);
}
Esempio n. 21
0
/*
 * mincore system call handler
 *
 * mincore_args(const void *addr, size_t len, char *vec)
 *
 * No requirements
 */
int
sys_mincore(struct mincore_args *uap)
{
	struct proc *p = curproc;
	vm_offset_t addr, first_addr;
	vm_offset_t end, cend;
	pmap_t pmap;
	vm_map_t map;
	char *vec;
	int error;
	int vecindex, lastvecindex;
	vm_map_entry_t current;
	vm_map_entry_t entry;
	int mincoreinfo;
	unsigned int timestamp;

	/*
	 * Make sure that the addresses presented are valid for user
	 * mode.
	 */
	first_addr = addr = trunc_page((vm_offset_t) uap->addr);
	end = addr + (vm_size_t)round_page(uap->len);
	if (end < addr)
		return (EINVAL);
	if (VM_MAX_USER_ADDRESS > 0 && end > VM_MAX_USER_ADDRESS)
		return (EINVAL);

	/*
	 * Address of byte vector
	 */
	vec = uap->vec;

	map = &p->p_vmspace->vm_map;
	pmap = vmspace_pmap(p->p_vmspace);

	lwkt_gettoken(&map->token);
	vm_map_lock_read(map);
RestartScan:
	timestamp = map->timestamp;

	if (!vm_map_lookup_entry(map, addr, &entry))
		entry = entry->next;

	/*
	 * Do this on a map entry basis so that if the pages are not
	 * in the current processes address space, we can easily look
	 * up the pages elsewhere.
	 */
	lastvecindex = -1;
	for(current = entry;
		(current != &map->header) && (current->start < end);
		current = current->next) {

		/*
		 * ignore submaps (for now) or null objects
		 */
		if (current->maptype != VM_MAPTYPE_NORMAL &&
		    current->maptype != VM_MAPTYPE_VPAGETABLE) {
			continue;
		}
		if (current->object.vm_object == NULL)
			continue;
		
		/*
		 * limit this scan to the current map entry and the
		 * limits for the mincore call
		 */
		if (addr < current->start)
			addr = current->start;
		cend = current->end;
		if (cend > end)
			cend = end;

		/*
		 * scan this entry one page at a time
		 */
		while (addr < cend) {
			/*
			 * Check pmap first, it is likely faster, also
			 * it can provide info as to whether we are the
			 * one referencing or modifying the page.
			 *
			 * If we have to check the VM object, only mess
			 * around with normal maps.  Do not mess around
			 * with virtual page tables (XXX).
			 */
			mincoreinfo = pmap_mincore(pmap, addr);
			if (mincoreinfo == 0 &&
			    current->maptype == VM_MAPTYPE_NORMAL) {
				vm_pindex_t pindex;
				vm_ooffset_t offset;
				vm_page_t m;

				/*
				 * calculate the page index into the object
				 */
				offset = current->offset + (addr - current->start);
				pindex = OFF_TO_IDX(offset);

				/*
				 * if the page is resident, then gather 
				 * information about it.  spl protection is
				 * required to maintain the object 
				 * association.  And XXX what if the page is
				 * busy?  What's the deal with that?
				 *
				 * XXX vm_token - legacy for pmap_ts_referenced
				 *     in i386 and vkernel pmap code.
				 */
				lwkt_gettoken(&vm_token);
				vm_object_hold(current->object.vm_object);
				m = vm_page_lookup(current->object.vm_object,
						    pindex);
				if (m && m->valid) {
					mincoreinfo = MINCORE_INCORE;
					if (m->dirty ||
						pmap_is_modified(m))
						mincoreinfo |= MINCORE_MODIFIED_OTHER;
					if ((m->flags & PG_REFERENCED) ||
						pmap_ts_referenced(m)) {
						vm_page_flag_set(m, PG_REFERENCED);
						mincoreinfo |= MINCORE_REFERENCED_OTHER;
					}
				}
				vm_object_drop(current->object.vm_object);
				lwkt_reltoken(&vm_token);
			}

			/*
			 * subyte may page fault.  In case it needs to modify
			 * the map, we release the lock.
			 */
			vm_map_unlock_read(map);

			/*
			 * calculate index into user supplied byte vector
			 */
			vecindex = OFF_TO_IDX(addr - first_addr);

			/*
			 * If we have skipped map entries, we need to make sure that
			 * the byte vector is zeroed for those skipped entries.
			 */
			while((lastvecindex + 1) < vecindex) {
				error = subyte( vec + lastvecindex, 0);
				if (error) {
					error = EFAULT;
					goto done;
				}
				++lastvecindex;
			}

			/*
			 * Pass the page information to the user
			 */
			error = subyte( vec + vecindex, mincoreinfo);
			if (error) {
				error = EFAULT;
				goto done;
			}

			/*
			 * If the map has changed, due to the subyte, the previous
			 * output may be invalid.
			 */
			vm_map_lock_read(map);
			if (timestamp != map->timestamp)
				goto RestartScan;

			lastvecindex = vecindex;
			addr += PAGE_SIZE;
		}
	}

	/*
	 * subyte may page fault.  In case it needs to modify
	 * the map, we release the lock.
	 */
	vm_map_unlock_read(map);

	/*
	 * Zero the last entries in the byte vector.
	 */
	vecindex = OFF_TO_IDX(end - first_addr);
	while((lastvecindex + 1) < vecindex) {
		error = subyte( vec + lastvecindex, 0);
		if (error) {
			error = EFAULT;
			goto done;
		}
		++lastvecindex;
	}
	
	/*
	 * If the map has changed, due to the subyte, the previous
	 * output may be invalid.
	 */
	vm_map_lock_read(map);
	if (timestamp != map->timestamp)
		goto RestartScan;
	vm_map_unlock_read(map);

	error = 0;
done:
	lwkt_reltoken(&map->token);
	return (error);
}
Esempio n. 22
0
/*
 * Finish a fork operation, with lwp lp2 nearly set up.
 * Copy and update the pcb, set up the stack so that the child
 * ready to run and return to user mode.
 */
void
cpu_fork(struct lwp *lp1, struct lwp *lp2, int flags)
{
	struct pcb *pcb2;

	if ((flags & RFPROC) == 0) {
		if ((flags & RFMEM) == 0) {
			/* unshare user LDT */
			struct pcb *pcb1 = lp1->lwp_thread->td_pcb;
			struct pcb_ldt *pcb_ldt = pcb1->pcb_ldt;
			if (pcb_ldt && pcb_ldt->ldt_refcnt > 1) {
				pcb_ldt = user_ldt_alloc(pcb1,pcb_ldt->ldt_len);
				user_ldt_free(pcb1);
				pcb1->pcb_ldt = pcb_ldt;
				set_user_ldt(pcb1);
			}
		}
		return;
	}

#if NNPX > 0
	/* Ensure that lp1's pcb is up to date. */
	if (mdcpu->gd_npxthread == lp1->lwp_thread)
		npxsave(lp1->lwp_thread->td_savefpu);
#endif
	
	/*
	 * Copy lp1's PCB.  This really only applies to the
	 * debug registers and FP state, but its faster to just copy the
	 * whole thing.  Because we only save the PCB at switchout time,
	 * the register state may not be current.
	 */
	pcb2 = lp2->lwp_thread->td_pcb;
	*pcb2 = *lp1->lwp_thread->td_pcb;

	/*
	 * Create a new fresh stack for the new process.
	 * Copy the trap frame for the return to user mode as if from a
	 * syscall.  This copies the user mode register values.  The
	 * 16 byte offset saves space for vm86, and must match 
	 * common_tss.esp0 (kernel stack pointer on entry from user mode)
	 *
	 * pcb_esp must allocate an additional call-return pointer below
	 * the trap frame which will be restored by cpu_restore from
	 * PCB_EIP, and the thread's td_sp pointer must allocate an
	 * additonal two worsd below the pcb_esp call-return pointer to
	 * hold the LWKT restore function pointer and eflags.
	 *
	 * The LWKT restore function pointer must be set to cpu_restore,
	 * which is our standard heavy weight process switch-in function.
	 * YYY eventually we should shortcut fork_return and fork_trampoline
	 * to use the LWKT restore function directly so we can get rid of
	 * all the extra crap we are setting up.
	 */
	lp2->lwp_md.md_regs = (struct trapframe *)((char *)pcb2 - 16) - 1;
	bcopy(lp1->lwp_md.md_regs, lp2->lwp_md.md_regs, sizeof(*lp2->lwp_md.md_regs));

	/*
	 * Set registers for trampoline to user mode.  Leave space for the
	 * return address on stack.  These are the kernel mode register values.
	 */
	pcb2->pcb_cr3 = vtophys(vmspace_pmap(lp2->lwp_proc->p_vmspace)->pm_pdir);
	pcb2->pcb_edi = 0;
	pcb2->pcb_esi = (int)fork_return;	/* fork_trampoline argument */
	pcb2->pcb_ebp = 0;
	pcb2->pcb_esp = (int)lp2->lwp_md.md_regs - sizeof(void *);
	pcb2->pcb_ebx = (int)lp2;		/* fork_trampoline argument */
	pcb2->pcb_eip = (int)fork_trampoline;
	lp2->lwp_thread->td_sp = (char *)(pcb2->pcb_esp - sizeof(void *));
	*(u_int32_t *)lp2->lwp_thread->td_sp = PSL_USER;
	lp2->lwp_thread->td_sp -= sizeof(void *);
	*(void **)lp2->lwp_thread->td_sp = (void *)cpu_heavy_restore;

	/*
	 * pcb2->pcb_ldt:	duplicated below, if necessary.
	 * pcb2->pcb_savefpu:	cloned above.
	 * pcb2->pcb_flags:	cloned above (always 0 here).
	 * pcb2->pcb_onfault:	cloned above (always NULL here).
	 * pcb2->pcb_onfault_sp:cloned above (don't care)
	 */

	/*
	 * XXX don't copy the i/o pages.  this should probably be fixed.
	 */
	pcb2->pcb_ext = NULL;

        /* Copy the LDT, if necessary. */
        if (pcb2->pcb_ldt != NULL) {
		if (flags & RFMEM) {
			pcb2->pcb_ldt->ldt_refcnt++;
		} else {
			pcb2->pcb_ldt = user_ldt_alloc(pcb2,
				pcb2->pcb_ldt->ldt_len);
		}
        }
	bcopy(&lp1->lwp_thread->td_tls, &lp2->lwp_thread->td_tls,
	      sizeof(lp2->lwp_thread->td_tls));
	/*
	 * Now, cpu_switch() can schedule the new lwp.
	 * pcb_esp is loaded pointing to the cpu_switch() stack frame
	 * containing the return address when exiting cpu_switch.
	 * This will normally be to fork_trampoline(), which will have
	 * %ebx loaded with the new lwp's pointer.  fork_trampoline()
	 * will set up a stack to call fork_return(lp, frame); to complete
	 * the return to user-mode.
	 */
}
Esempio n. 23
0
static int
dma_memcpy(void *dst, void *src, int len, int flags)
{
	struct i80321_dma_softc *sc;
	i80321_dmadesc_t *desc;
	int ret;
	int csr;
	int descnb = 0;
	int tmplen = len;
	int to_nextpagesrc, to_nextpagedst;
	int min_hop;
	vm_paddr_t pa, pa2, tmppa;
	pmap_t pmap = vmspace_pmap(curthread->td_proc->p_vmspace);

	if (!softcs[0] || !softcs[1])
		return (-1);
	mtx_lock_spin(&softcs[0]->mtx);
	if (softcs[0]->flags & BUSY) {
		mtx_unlock_spin(&softcs[0]->mtx);
		mtx_lock_spin(&softcs[1]->mtx);
		if (softcs[1]->flags & BUSY) {
			mtx_unlock(&softcs[1]->mtx);
			return (-1);
		}
		sc = softcs[1];
	} else
		sc = softcs[0];
	sc->flags |= BUSY;
	mtx_unlock_spin(&sc->mtx);
	desc = sc->dmaring[0].desc;
	if (flags & IS_PHYSICAL) {
		desc->next_desc = 0;
		desc->low_pciaddr = (vm_paddr_t)src;
		desc->high_pciaddr = 0;
		desc->local_addr = (vm_paddr_t)dst;
		desc->count = len;
		desc->descr_ctrl = 1 << 6; /* Local memory to local memory. */
		bus_dmamap_sync(sc->dmatag, 
		    sc->dmaring[0].map, 
		    BUS_DMASYNC_PREWRITE);
	} else {
		if (!virt_addr_is_valid(dst, len, 1, !(flags & DST_IS_USER)) || 
		    !virt_addr_is_valid(src, len, 0, !(flags & SRC_IS_USER))) {
			mtx_lock_spin(&sc->mtx);
			sc->flags &= ~BUSY;
			mtx_unlock_spin(&sc->mtx);
			return (-1);
		}
		cpu_dcache_wb_range((vm_offset_t)src, len);
		if ((vm_offset_t)dst & (31))
			cpu_dcache_wb_range((vm_offset_t)dst & ~31, 32);
		if (((vm_offset_t)dst + len) & 31)
			cpu_dcache_wb_range(((vm_offset_t)dst + len) & ~31,
			    32);
		cpu_dcache_inv_range((vm_offset_t)dst, len);
		while (tmplen > 0) {
			pa = (flags & SRC_IS_USER) ?
			    pmap_extract(pmap, (vm_offset_t)src) :
				    vtophys(src);
			pa2 = (flags & DST_IS_USER) ?
			    pmap_extract(pmap, (vm_offset_t)dst) :
				    vtophys(dst);
			to_nextpagesrc = ((vm_offset_t)src & ~PAGE_MASK) +
			    PAGE_SIZE - (vm_offset_t)src;
			to_nextpagedst = ((vm_offset_t)dst & ~PAGE_MASK) +
			    PAGE_SIZE - (vm_offset_t)dst;
			while (to_nextpagesrc < tmplen) {
				tmppa = (flags & SRC_IS_USER) ?
				    pmap_extract(pmap, (vm_offset_t)src +
				    to_nextpagesrc) :
					    vtophys((vm_offset_t)src +
						to_nextpagesrc);
				if (tmppa != pa + to_nextpagesrc)
					break;
				to_nextpagesrc += PAGE_SIZE;
			}
			while (to_nextpagedst < tmplen) {
				tmppa = (flags & DST_IS_USER) ?
				    pmap_extract(pmap, (vm_offset_t)dst +
				    to_nextpagedst) :
					    vtophys((vm_offset_t)dst +
						to_nextpagedst);
				if (tmppa != pa2 + to_nextpagedst)
					break;
				to_nextpagedst += PAGE_SIZE;
			}
			min_hop = to_nextpagedst > to_nextpagesrc ?
			    to_nextpagesrc : to_nextpagedst;
			if (min_hop < 64) {
				tmplen -= min_hop;
				memcpy(dst, src, min_hop);
				cpu_dcache_wbinv_range((vm_offset_t)dst,
				    min_hop);

				src = (void *)((vm_offset_t)src + min_hop);
				dst = (void *)((vm_offset_t)dst + min_hop);
				if (tmplen <= 0 && descnb > 0) {
					sc->dmaring[descnb - 1].desc->next_desc
					    = 0;
					bus_dmamap_sync(sc->dmatag, 
					    sc->dmaring[descnb - 1].map, 
					    BUS_DMASYNC_PREWRITE);
				}
				continue;
			}
			desc->low_pciaddr = pa;
			desc->high_pciaddr = 0;
			desc->local_addr = pa2;
			desc->count = tmplen > min_hop ? min_hop : tmplen;
			desc->descr_ctrl = 1 << 6;
			if (min_hop < tmplen) {
				tmplen -= min_hop;
				src = (void *)((vm_offset_t)src + min_hop);
				dst = (void *)((vm_offset_t)dst + min_hop);
			} else
				tmplen = 0;
			if (descnb + 1 >= DMA_RING_SIZE) {
				mtx_lock_spin(&sc->mtx);
				sc->flags &= ~BUSY;
				mtx_unlock_spin(&sc->mtx);
				return (-1);
			}
			if (tmplen > 0) {
				desc->next_desc = sc->dmaring[descnb + 1].
				    phys_addr;
				bus_dmamap_sync(sc->dmatag, 
				    sc->dmaring[descnb].map, 
				    BUS_DMASYNC_PREWRITE);
				desc = sc->dmaring[descnb + 1].desc;
				descnb++;
			} else {
				desc->next_desc = 0;
				bus_dmamap_sync(sc->dmatag,
				    sc->dmaring[descnb].map,
				    BUS_DMASYNC_PREWRITE);
			}
									
		}

	}
	DMA_REG_WRITE(sc, 4 /* Status register */,
	    DMA_REG_READ(sc, 4) | DMA_CLEAN_MASK);
	DMA_REG_WRITE(sc, 0x10 /* Descriptor addr */,
	    sc->dmaring[0].phys_addr);
	DMA_REG_WRITE(sc, 0 /* Control register */, 1 | 2/* Start transfer */);
	while ((csr = DMA_REG_READ(sc, 0x4)) & (1 << 10));
	/* Wait until it's done. */
	if (csr & 0x2e) /* error */
		ret = -1;
	else
		ret = 0;
	DMA_REG_WRITE(sc, 0, 0);
	mtx_lock_spin(&sc->mtx);
	sc->flags &= ~BUSY;
	mtx_unlock_spin(&sc->mtx);
	return (ret);
}