void cpu_idle_mwait_cycle(void) { struct cpu_info *ci = curcpu(); if ((read_rflags() & PSL_I) == 0) panic("idle with interrupts blocked!"); /* something already queued? */ if (!cpu_is_idle(ci)) return; /* * About to idle; setting the MWAIT_IN_IDLE bit tells * cpu_unidle() that it can't be a no-op and tells cpu_kick() * that it doesn't need to use an IPI. We also set the * MWAIT_KEEP_IDLING bit: those routines clear it to stop * the mwait. Once they're set, we do a final check of the * queue, in case another cpu called setrunqueue() and added * something to the queue and called cpu_unidle() between * the check in sched_idle() and here. */ atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING | MWAIT_ONLY); if (cpu_is_idle(ci)) { monitor(&ci->ci_mwait, 0, 0); if ((ci->ci_mwait & MWAIT_IDLING) == MWAIT_IDLING) mwait(0, 0); } /* done idling; let cpu_kick() know that an IPI is required */ atomic_clearbits_int(&ci->ci_mwait, MWAIT_IDLING); }
/* * Common function for DMA map synchronization. May be called * by bus-specific DMA map synchronization functions. */ void _dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t offset, bus_size_t len, int op) { int i; bus_size_t minlen, wlen; bus_addr_t pa, addr; struct vm_page *pg; for (i = 0; i < map->dm_nsegs && len != 0; i++) { /* Find the beginning segment. */ if (offset >= map->dm_segs[i].ds_len) { offset -= map->dm_segs[i].ds_len; continue; } minlen = len < map->dm_segs[i].ds_len - offset ? len : map->dm_segs[i].ds_len - offset; addr = map->dm_segs[i].ds_addr + offset; switch (op) { case BUS_DMASYNC_POSTWRITE: for (pa = trunc_page(addr), wlen = 0; pa < round_page(addr + minlen); pa += PAGE_SIZE) { pg = PHYS_TO_VM_PAGE(pa); if (pg != NULL) atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE); } } } }
static __inline void uvm_pageremove(struct vm_page *pg) { struct pglist *buck; int s; UVMHIST_FUNC("uvm_pageremove"); UVMHIST_CALLED(pghist); KASSERT(pg->pg_flags & PG_TABLED); buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)]; s = splvm(); simple_lock(&uvm.hashlock); TAILQ_REMOVE(buck, pg, hashq); simple_unlock(&uvm.hashlock); splx(s); #ifdef UBC if (pg->uobject->pgops == &uvm_vnodeops) { uvm_pgcnt_vnode--; } #endif /* object should be locked */ TAILQ_REMOVE(&pg->uobject->memq, pg, listq); atomic_clearbits_int(&pg->pg_flags, PG_TABLED); pg->uobject->uo_npages--; pg->uobject = NULL; pg->pg_version++; }
void dosoftint() { struct cpu_info *ci = curcpu(); int sir, q, mask; #ifdef MULTIPROCESSOR register_t sr; /* Enable interrupts */ sr = getsr(); ENABLEIPI(); __mp_lock(&kernel_lock); #endif while ((sir = ci->ci_softpending) != 0) { atomic_clearbits_int(&ci->ci_softpending, sir); for (q = SI_NQUEUES - 1; q >= 0; q--) { mask = SINTMASK(q); if (sir & mask) softintr_dispatch(q); } } #ifdef MULTIPROCESSOR __mp_unlock(&kernel_lock); setsr(sr); #endif }
int uvm_objwire(struct uvm_object *uobj, voff_t start, voff_t end, struct pglist *pageq) { int i, npages, left, error; struct vm_page *pgs[FETCH_PAGECOUNT]; voff_t offset = start; left = (end - start) >> PAGE_SHIFT; while (left) { npages = MIN(FETCH_PAGECOUNT, left); /* Get the pages */ memset(pgs, 0, sizeof(pgs)); error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, 0, PROT_READ | PROT_WRITE, MADV_SEQUENTIAL, PGO_ALLPAGES | PGO_SYNCIO); if (error) goto error; for (i = 0; i < npages; i++) { KASSERT(pgs[i] != NULL); KASSERT(!(pgs[i]->pg_flags & PG_RELEASED)); if (pgs[i]->pg_flags & PQ_AOBJ) { atomic_clearbits_int(&pgs[i]->pg_flags, PG_CLEAN); uao_dropswap(uobj, i); } } /* Wire the pages */ uvm_lock_pageq(); for (i = 0; i < npages; i++) { uvm_pagewire(pgs[i]); if (pageq != NULL) TAILQ_INSERT_TAIL(pageq, pgs[i], pageq); } uvm_unlock_pageq(); /* Unbusy the pages */ uvm_page_unbusy(pgs, npages); left -= npages; offset += (voff_t)npages << PAGE_SHIFT; } return 0; error: /* Unwire the pages which have been wired */ uvm_objunwire(uobj, start, offset); return error; }
int taskq_sleep(const volatile void *ident, struct mutex *mtx, int priority, const char *wmesg, int tmo) { u_int *flags = &curproc->p_flag; int rv; atomic_clearbits_int(flags, P_CANTSLEEP); rv = msleep(ident, mtx, priority, wmesg, tmo); atomic_setbits_int(flags, P_CANTSLEEP); return (tmo); }
void netintr(void *unused) /* ARGSUSED */ { int n, t = 0; while ((n = netisr) != 0) { atomic_clearbits_int(&netisr, n); #ifdef INET #if NETHER > 0 if (n & (1 << NETISR_ARP)) arpintr(); #endif if (n & (1 << NETISR_IP)) ipintr(); #endif #ifdef INET6 if (n & (1 << NETISR_IPV6)) ip6intr(); #endif #ifdef MPLS if (n & (1 << NETISR_MPLS)) mplsintr(); #endif #if NPPP > 0 if (n & (1 << NETISR_PPP)) pppintr(); #endif #if NBRIDGE > 0 if (n & (1 << NETISR_BRIDGE)) bridgeintr(); #endif #if NPPPOE > 0 if (n & (1 << NETISR_PPPOE)) pppoeintr(); #endif #if NBLUETOOTH > 0 if (n & (1 << NETISR_BT)) btintr(); #endif t |= n; } #if NPFSYNC > 0 if (t & (1 << NETISR_PFSYNC)) pfsyncintr(); #endif if (t & (1 << NETISR_TX)) nettxintr(); }
/* * Stop profiling on a process. */ void stopprofclock(struct process *pr) { int s; if (pr->ps_flags & PS_PROFIL) { atomic_clearbits_int(&pr->ps_flags, PS_PROFIL); if (--profprocs == 0 && stathz != 0) { s = splstatclock(); psdiv = pscnt = 1; setstatclockrate(stathz); splx(s); } } }
/* * Stop profiling on a process. */ void stopprofclock(struct proc *p) { int s; if (p->p_flag & P_PROFIL) { atomic_clearbits_int(&p->p_flag, P_PROFIL); if (--profprocs == 0 && stathz != 0) { s = splstatclock(); psdiv = pscnt = 1; setstatclockrate(stathz); splx(s); } } }
void qlw_update_target(struct qlw_softc *sc, int bus, int target) { struct scsi_link *link; int lun; if ((sc->sc_update_required[bus] & (1 << target)) == 0) return; atomic_clearbits_int(&sc->sc_update_required[bus], (1 << target)); link = scsi_get_link(sc->sc_scsibus[bus], target, 0); if (link == NULL) return; sc->sc_mbox[0] = QLW_MBOX_SET_TARGET_PARAMETERS; sc->sc_mbox[1] = (((bus << 7) | target) << 8); sc->sc_mbox[2] = sc->sc_target[bus][target].qt_params; sc->sc_mbox[2] |= QLW_TARGET_RENEG; sc->sc_mbox[2] &= ~QLW_TARGET_QFRZ; if (link->quirks & SDEV_NOSYNC) sc->sc_mbox[2] &= ~QLW_TARGET_SYNC; if (link->quirks & SDEV_NOWIDE) sc->sc_mbox[2] &= ~QLW_TARGET_WIDE; if (link->quirks & SDEV_NOTAGS) sc->sc_mbox[2] &= ~QLW_TARGET_TAGS; sc->sc_mbox[3] = sc->sc_target[bus][target].qt_sync_period; sc->sc_mbox[3] |= (sc->sc_target[bus][target].qt_sync_offset << 8); if (qlw_mbox(sc, 0x000f, 0x0001)) { printf("couldn't set target parameters: %x\n", sc->sc_mbox[0]); return; } /* XXX do PPR detection */ for (lun = 0; lun < QLW_MAX_LUNS; lun++) { sc->sc_mbox[0] = QLW_MBOX_SET_DEVICE_QUEUE; sc->sc_mbox[1] = (((bus << 7) | target) << 8) | lun; sc->sc_mbox[2] = sc->sc_max_queue_depth[bus]; sc->sc_mbox[3] = sc->sc_target[bus][target].qt_exec_throttle; if (qlw_mbox(sc, 0x000f, 0x0001)) { printf("couldn't set lun parameters: %x\n", sc->sc_mbox[0]); return; } } }
/*ARGSUSED*/ int cttyioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) { struct vnode *ttyvp = cttyvp(p); if (ttyvp == NULL) return (EIO); if (cmd == TIOCSCTTY) /* XXX */ return (EINVAL); if (cmd == TIOCNOTTY) { if (!SESS_LEADER(p)) { atomic_clearbits_int(&p->p_flag, P_CONTROLT); return (0); } else return (EINVAL); } return (VOP_IOCTL(ttyvp, cmd, addr, flag, NOCRED, p)); }
void netintr() { int n; while ((n = netisr) != 0) { atomic_clearbits_int(&netisr, n); #define DONETISR(bit, fn) \ do { \ if (n & (1 << (bit))) \ fn(); \ } while (0) #include <net/netisr_dispatch.h> #undef DONETISR } }
void dosoftint(int pcpl) { struct cpu_info *ci = curcpu(); int sir, q, mask; ppc_intr_enable(1); KERNEL_LOCK(); while ((sir = (ci->ci_ipending & ppc_smask[pcpl])) != 0) { atomic_clearbits_int(&ci->ci_ipending, sir); for (q = SI_NQUEUES - 1; q >= 0; q--) { mask = SI_TO_IRQBIT(q); if (sir & mask) softintr_dispatch(q); } } KERNEL_UNLOCK(); (void)ppc_intr_disable(); }
void taskq_thread(void *xtq) { sleepfn tqsleep = msleep; struct taskq *tq = xtq; struct task work; int last; if (ISSET(tq->tq_flags, TASKQ_MPSAFE)) KERNEL_UNLOCK(); if (ISSET(tq->tq_flags, TASKQ_CANTSLEEP)) { tqsleep = taskq_sleep; atomic_setbits_int(&curproc->p_flag, P_CANTSLEEP); } while (taskq_next_work(tq, &work, tqsleep)) { (*work.t_func)(work.t_arg); sched_pause(); } mtx_enter(&tq->tq_mtx); last = (--tq->tq_running == 0); mtx_leave(&tq->tq_mtx); if (ISSET(tq->tq_flags, TASKQ_MPSAFE)) KERNEL_LOCK(); if (ISSET(tq->tq_flags, TASKQ_CANTSLEEP)) atomic_clearbits_int(&curproc->p_flag, P_CANTSLEEP); if (last) wakeup_one(&tq->tq_running); kthread_exit(0); }
/* * Process IPIs for a CPU. */ static int mips64_ipi_intr(void *arg) { unsigned int pending_ipis, bit; unsigned int cpuid = (unsigned int)(unsigned long)arg; KASSERT (cpuid == cpu_number()); /* figure out which ipi are pending */ pending_ipis = ipi_mailbox[cpuid]; /* clear ipi interrupt */ hw_ipi_intr_clear(cpuid); if (pending_ipis > 0) { /* clear pending ipi, since we're about to handle them */ atomic_clearbits_int(&ipi_mailbox[cpuid], pending_ipis); for (bit = 0; bit < MIPS64_NIPIS; bit++) if (pending_ipis & (1UL << bit)) (*ipifuncs[bit])(); } return 1; }
vaddr_t uvm_km_kmemalloc(struct vm_map *map, struct uvm_object *obj, vsize_t size, int flags) { vaddr_t kva, loopva; voff_t offset; struct vm_page *pg; int mapflags; UVMHIST_FUNC("uvm_km_kmemalloc"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist," (map=%p, obj=%p, size=0x%lx, flags=%d)", map, obj, size, flags); KASSERT(vm_map_pmap(map) == pmap_kernel()); /* * we cannot yet make pmap_enter() not sleep * and thus demand that we are called with NOWAIT in that case */ KASSERT(!((flags & UVM_KMF_NOWAIT) && obj)); /* * setup for call */ mapflags = flags & UVM_KMF_NOWAIT? UVM_FLAG_NOWAIT : 0; mapflags |= flags & UVM_KMF_TRYLOCK; size = round_page(size); kva = vm_map_min(map); /* hint */ /* * allocate some virtual space */ if (__predict_false(uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET, 0, UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, UVM_INH_NONE, UVM_ADV_RANDOM, mapflags)) != 0)) { UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0); return(0); } /* * if all we wanted was VA, return now */ if (flags & UVM_KMF_VALLOC) { UVMHIST_LOG(maphist,"<- done valloc (kva=0x%lx)", kva,0,0,0); return(kva); } /* * recover object offset from virtual address */ if (obj != NULL) offset = kva - vm_map_min(kernel_map); else offset = 0; UVMHIST_LOG(maphist, " kva=0x%lx, offset=0x%lx", kva, offset,0,0); /* * now allocate and map in the memory... note that we are the only ones * whom should ever get a handle on this area of VM. */ loopva = kva; while (loopva != kva + size) { pg = uvm_pagealloc(obj, offset, NULL, 0); if (pg) { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); } if (__predict_false(pg == NULL)) { if ((flags & UVM_KMF_NOWAIT) || ((flags & UVM_KMF_CANFAIL) && uvmexp.swpgonly == uvmexp.swpages)) { /* free everything! */ uvm_unmap(map, kva, kva + size); return (0); } else { uvm_wait("km_getwait2"); /* sleep here */ continue; } } /* * map it in: note that we call pmap_enter with the map and * object unlocked in case we are kmem_map. * * pager mappings that must not sleep here will incidently * be installed using pmap_kenter_pa() and thus not sleep! */ if (obj == NULL) { pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW); } else { pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_RW, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); } loopva += PAGE_SIZE; offset += PAGE_SIZE; } pmap_update(pmap_kernel()); UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0); return(kva); }
/* * Process debugging system call. */ int sys_ptrace(struct proc *p, void *v, register_t *retval) { struct sys_ptrace_args /* { syscallarg(int) req; syscallarg(pid_t) pid; syscallarg(caddr_t) addr; syscallarg(int) data; } */ *uap = v; struct proc *t; /* target thread */ struct process *tr; /* target process */ struct uio uio; struct iovec iov; struct ptrace_io_desc piod; struct ptrace_event pe; struct ptrace_thread_state pts; struct reg *regs; #if defined (PT_SETFPREGS) || defined (PT_GETFPREGS) struct fpreg *fpregs; #endif #if defined (PT_SETXMMREGS) || defined (PT_GETXMMREGS) struct xmmregs *xmmregs; #endif #ifdef PT_WCOOKIE register_t wcookie; #endif int error, write; int temp; int req = SCARG(uap, req); int s; /* "A foolish consistency..." XXX */ switch (req) { case PT_TRACE_ME: t = p; break; /* calls that only operate on the PID */ case PT_READ_I: case PT_READ_D: case PT_WRITE_I: case PT_WRITE_D: case PT_KILL: case PT_ATTACH: case PT_IO: case PT_SET_EVENT_MASK: case PT_GET_EVENT_MASK: case PT_GET_PROCESS_STATE: case PT_GET_THREAD_FIRST: case PT_GET_THREAD_NEXT: default: /* Find the process we're supposed to be operating on. */ if ((t = pfind(SCARG(uap, pid))) == NULL) return (ESRCH); if (t->p_flag & P_THREAD) return (ESRCH); break; /* calls that accept a PID or a thread ID */ case PT_CONTINUE: case PT_DETACH: #ifdef PT_STEP case PT_STEP: #endif case PT_GETREGS: case PT_SETREGS: #ifdef PT_GETFPREGS case PT_GETFPREGS: #endif #ifdef PT_SETFPREGS case PT_SETFPREGS: #endif #ifdef PT_GETXMMREGS case PT_GETXMMREGS: #endif #ifdef PT_SETXMMREGS case PT_SETXMMREGS: #endif if (SCARG(uap, pid) > THREAD_PID_OFFSET) { t = pfind(SCARG(uap, pid) - THREAD_PID_OFFSET); if (t == NULL) return (ESRCH); } else { if ((t = pfind(SCARG(uap, pid))) == NULL) return (ESRCH); if (t->p_flag & P_THREAD) return (ESRCH); } break; } tr = t->p_p; if ((tr->ps_flags & PS_INEXEC) != 0) return (EAGAIN); /* Make sure we can operate on it. */ switch (req) { case PT_TRACE_ME: /* Saying that you're being traced is always legal. */ break; case PT_ATTACH: /* * You can't attach to a process if: * (1) it's the process that's doing the attaching, */ if (tr == p->p_p) return (EINVAL); /* * (2) it's a system process */ if (ISSET(tr->ps_flags, PS_SYSTEM)) return (EPERM); /* * (3) it's already being traced, or */ if (ISSET(tr->ps_flags, PS_TRACED)) return (EBUSY); /* * (4) it's not owned by you, or the last exec * gave us setuid/setgid privs (unless * you're root), or... * * [Note: once PS_SUGID or PS_SUGIDEXEC gets set in * execve(), they stay set until the process does * another execve(). Hence this prevents a setuid * process which revokes its special privileges using * setuid() from being traced. This is good security.] */ if ((tr->ps_ucred->cr_ruid != p->p_ucred->cr_ruid || ISSET(tr->ps_flags, PS_SUGIDEXEC | PS_SUGID)) && (error = suser(p, 0)) != 0) return (error); /* * (4.5) it's not a child of the tracing process. */ if (global_ptrace == 0 && !inferior(tr, p->p_p) && (error = suser(p, 0)) != 0) return (error); /* * (5) ...it's init, which controls the security level * of the entire system, and the system was not * compiled with permanently insecure mode turned * on. */ if ((tr->ps_pid == 1) && (securelevel > -1)) return (EPERM); /* * (6) it's an ancestor of the current process and * not init (because that would create a loop in * the process graph). */ if (tr->ps_pid != 1 && inferior(p->p_p, tr)) return (EINVAL); break; case PT_READ_I: case PT_READ_D: case PT_WRITE_I: case PT_WRITE_D: case PT_IO: case PT_CONTINUE: case PT_KILL: case PT_DETACH: #ifdef PT_STEP case PT_STEP: #endif case PT_SET_EVENT_MASK: case PT_GET_EVENT_MASK: case PT_GET_PROCESS_STATE: case PT_GETREGS: case PT_SETREGS: #ifdef PT_GETFPREGS case PT_GETFPREGS: #endif #ifdef PT_SETFPREGS case PT_SETFPREGS: #endif #ifdef PT_GETXMMREGS case PT_GETXMMREGS: #endif #ifdef PT_SETXMMREGS case PT_SETXMMREGS: #endif #ifdef PT_WCOOKIE case PT_WCOOKIE: #endif /* * You can't do what you want to the process if: * (1) It's not being traced at all, */ if (!ISSET(tr->ps_flags, PS_TRACED)) return (EPERM); /* * (2) it's not being traced by _you_, or */ if (tr->ps_pptr != p->p_p) return (EBUSY); /* * (3) it's not currently stopped. */ if (t->p_stat != SSTOP || !ISSET(tr->ps_flags, PS_WAITED)) return (EBUSY); break; case PT_GET_THREAD_FIRST: case PT_GET_THREAD_NEXT: /* * You can't do what you want to the process if: * (1) It's not being traced at all, */ if (!ISSET(tr->ps_flags, PS_TRACED)) return (EPERM); /* * (2) it's not being traced by _you_, or */ if (tr->ps_pptr != p->p_p) return (EBUSY); /* * Do the work here because the request isn't actually * associated with 't' */ if (SCARG(uap, data) != sizeof(pts)) return (EINVAL); if (req == PT_GET_THREAD_NEXT) { error = copyin(SCARG(uap, addr), &pts, sizeof(pts)); if (error) return (error); t = pfind(pts.pts_tid - THREAD_PID_OFFSET); if (t == NULL || ISSET(t->p_flag, P_WEXIT)) return (ESRCH); if (t->p_p != tr) return (EINVAL); t = TAILQ_NEXT(t, p_thr_link); } else { t = TAILQ_FIRST(&tr->ps_threads); } if (t == NULL) pts.pts_tid = -1; else pts.pts_tid = t->p_pid + THREAD_PID_OFFSET; return (copyout(&pts, SCARG(uap, addr), sizeof(pts))); default: /* It was not a legal request. */ return (EINVAL); } /* Do single-step fixup if needed. */ FIX_SSTEP(t); /* Now do the operation. */ write = 0; *retval = 0; switch (req) { case PT_TRACE_ME: /* Just set the trace flag. */ atomic_setbits_int(&tr->ps_flags, PS_TRACED); tr->ps_oppid = tr->ps_pptr->ps_pid; if (tr->ps_ptstat == NULL) tr->ps_ptstat = malloc(sizeof(*tr->ps_ptstat), M_SUBPROC, M_WAITOK); memset(tr->ps_ptstat, 0, sizeof(*tr->ps_ptstat)); return (0); case PT_WRITE_I: /* XXX no separate I and D spaces */ case PT_WRITE_D: write = 1; temp = SCARG(uap, data); case PT_READ_I: /* XXX no separate I and D spaces */ case PT_READ_D: /* write = 0 done above. */ iov.iov_base = (caddr_t)&temp; iov.iov_len = sizeof(int); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(vaddr_t)SCARG(uap, addr); uio.uio_resid = sizeof(int); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = write ? UIO_WRITE : UIO_READ; uio.uio_procp = p; error = process_domem(p, t, &uio, write ? PT_WRITE_I : PT_READ_I); if (write == 0) *retval = temp; return (error); case PT_IO: error = copyin(SCARG(uap, addr), &piod, sizeof(piod)); if (error) return (error); iov.iov_base = piod.piod_addr; iov.iov_len = piod.piod_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(vaddr_t)piod.piod_offs; uio.uio_resid = piod.piod_len; uio.uio_segflg = UIO_USERSPACE; uio.uio_procp = p; switch (piod.piod_op) { case PIOD_READ_I: req = PT_READ_I; uio.uio_rw = UIO_READ; break; case PIOD_READ_D: req = PT_READ_D; uio.uio_rw = UIO_READ; break; case PIOD_WRITE_I: req = PT_WRITE_I; uio.uio_rw = UIO_WRITE; break; case PIOD_WRITE_D: req = PT_WRITE_D; uio.uio_rw = UIO_WRITE; break; case PIOD_READ_AUXV: req = PT_READ_D; uio.uio_rw = UIO_READ; temp = tr->ps_emul->e_arglen * sizeof(char *); if (uio.uio_offset > temp) return (EIO); if (uio.uio_resid > temp - uio.uio_offset) uio.uio_resid = temp - uio.uio_offset; piod.piod_len = iov.iov_len = uio.uio_resid; error = process_auxv_offset(p, t, &uio); if (error) return (error); break; default: return (EINVAL); } error = process_domem(p, t, &uio, req); piod.piod_len -= uio.uio_resid; (void) copyout(&piod, SCARG(uap, addr), sizeof(piod)); return (error); #ifdef PT_STEP case PT_STEP: /* * From the 4.4BSD PRM: * "Execution continues as in request PT_CONTINUE; however * as soon as possible after execution of at least one * instruction, execution stops again. [ ... ]" */ #endif case PT_CONTINUE: /* * From the 4.4BSD PRM: * "The data argument is taken as a signal number and the * child's execution continues at location addr as if it * incurred that signal. Normally the signal number will * be either 0 to indicate that the signal that caused the * stop should be ignored, or that value fetched out of * the process's image indicating which signal caused * the stop. If addr is (int *)1 then execution continues * from where it stopped." */ if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single) t = tr->ps_single; /* Check that the data is a valid signal number or zero. */ if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG) return (EINVAL); /* If the address parameter is not (int *)1, set the pc. */ if ((int *)SCARG(uap, addr) != (int *)1) if ((error = process_set_pc(t, SCARG(uap, addr))) != 0) goto relebad; #ifdef PT_STEP /* * Arrange for a single-step, if that's requested and possible. */ error = process_sstep(t, req == PT_STEP); if (error) goto relebad; #endif goto sendsig; case PT_DETACH: /* * From the 4.4BSD PRM: * "The data argument is taken as a signal number and the * child's execution continues at location addr as if it * incurred that signal. Normally the signal number will * be either 0 to indicate that the signal that caused the * stop should be ignored, or that value fetched out of * the process's image indicating which signal caused * the stop. If addr is (int *)1 then execution continues * from where it stopped." */ if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single) t = tr->ps_single; /* Check that the data is a valid signal number or zero. */ if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG) return (EINVAL); #ifdef PT_STEP /* * Arrange for a single-step, if that's requested and possible. */ error = process_sstep(t, req == PT_STEP); if (error) goto relebad; #endif /* give process back to original parent or init */ if (tr->ps_oppid != tr->ps_pptr->ps_pid) { struct process *ppr; ppr = prfind(tr->ps_oppid); proc_reparent(tr, ppr ? ppr : initprocess); } /* not being traced any more */ tr->ps_oppid = 0; atomic_clearbits_int(&tr->ps_flags, PS_TRACED|PS_WAITED); sendsig: memset(tr->ps_ptstat, 0, sizeof(*tr->ps_ptstat)); /* Finally, deliver the requested signal (or none). */ if (t->p_stat == SSTOP) { t->p_xstat = SCARG(uap, data); SCHED_LOCK(s); setrunnable(t); SCHED_UNLOCK(s); } else { if (SCARG(uap, data) != 0) psignal(t, SCARG(uap, data)); } return (0); relebad: return (error); case PT_KILL: if (SCARG(uap, pid) < THREAD_PID_OFFSET && tr->ps_single) t = tr->ps_single; /* just send the process a KILL signal. */ SCARG(uap, data) = SIGKILL; goto sendsig; /* in PT_CONTINUE, above. */ case PT_ATTACH: /* * As was done in procfs: * Go ahead and set the trace flag. * Save the old parent (it's reset in * _DETACH, and also in kern_exit.c:wait4() * Reparent the process so that the tracing * proc gets to see all the action. * Stop the target. */ atomic_setbits_int(&tr->ps_flags, PS_TRACED); tr->ps_oppid = tr->ps_pptr->ps_pid; if (tr->ps_pptr != p->p_p) proc_reparent(tr, p->p_p); if (tr->ps_ptstat == NULL) tr->ps_ptstat = malloc(sizeof(*tr->ps_ptstat), M_SUBPROC, M_WAITOK); SCARG(uap, data) = SIGSTOP; goto sendsig; case PT_GET_EVENT_MASK: if (SCARG(uap, data) != sizeof(pe)) return (EINVAL); memset(&pe, 0, sizeof(pe)); pe.pe_set_event = tr->ps_ptmask; return (copyout(&pe, SCARG(uap, addr), sizeof(pe))); case PT_SET_EVENT_MASK: if (SCARG(uap, data) != sizeof(pe)) return (EINVAL); if ((error = copyin(SCARG(uap, addr), &pe, sizeof(pe)))) return (error); tr->ps_ptmask = pe.pe_set_event; return (0); case PT_GET_PROCESS_STATE: if (SCARG(uap, data) != sizeof(*tr->ps_ptstat)) return (EINVAL); if (tr->ps_single) tr->ps_ptstat->pe_tid = tr->ps_single->p_pid + THREAD_PID_OFFSET; return (copyout(tr->ps_ptstat, SCARG(uap, addr), sizeof(*tr->ps_ptstat))); case PT_SETREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); regs = malloc(sizeof(*regs), M_TEMP, M_WAITOK); error = copyin(SCARG(uap, addr), regs, sizeof(*regs)); if (error == 0) { error = process_write_regs(t, regs); } free(regs, M_TEMP, sizeof(*regs)); return (error); case PT_GETREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); regs = malloc(sizeof(*regs), M_TEMP, M_WAITOK); error = process_read_regs(t, regs); if (error == 0) error = copyout(regs, SCARG(uap, addr), sizeof (*regs)); free(regs, M_TEMP, sizeof(*regs)); return (error); #ifdef PT_SETFPREGS case PT_SETFPREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); fpregs = malloc(sizeof(*fpregs), M_TEMP, M_WAITOK); error = copyin(SCARG(uap, addr), fpregs, sizeof(*fpregs)); if (error == 0) { error = process_write_fpregs(t, fpregs); } free(fpregs, M_TEMP, sizeof(*fpregs)); return (error); #endif #ifdef PT_GETFPREGS case PT_GETFPREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); fpregs = malloc(sizeof(*fpregs), M_TEMP, M_WAITOK); error = process_read_fpregs(t, fpregs); if (error == 0) error = copyout(fpregs, SCARG(uap, addr), sizeof(*fpregs)); free(fpregs, M_TEMP, sizeof(*fpregs)); return (error); #endif #ifdef PT_SETXMMREGS case PT_SETXMMREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); xmmregs = malloc(sizeof(*xmmregs), M_TEMP, M_WAITOK); error = copyin(SCARG(uap, addr), xmmregs, sizeof(*xmmregs)); if (error == 0) { error = process_write_xmmregs(t, xmmregs); } free(xmmregs, M_TEMP, sizeof(*xmmregs)); return (error); #endif #ifdef PT_GETXMMREGS case PT_GETXMMREGS: KASSERT((p->p_flag & P_SYSTEM) == 0); if ((error = process_checkioperm(p, tr)) != 0) return (error); xmmregs = malloc(sizeof(*xmmregs), M_TEMP, M_WAITOK); error = process_read_xmmregs(t, xmmregs); if (error == 0) error = copyout(xmmregs, SCARG(uap, addr), sizeof(*xmmregs)); free(xmmregs, M_TEMP, sizeof(*xmmregs)); return (error); #endif #ifdef PT_WCOOKIE case PT_WCOOKIE: wcookie = process_get_wcookie (t); return (copyout(&wcookie, SCARG(uap, addr), sizeof (register_t))); #endif } #ifdef DIAGNOSTIC panic("ptrace: impossible"); #endif return 0; }
vaddr_t uvm_km_alloc1(struct vm_map *map, vsize_t size, vsize_t align, boolean_t zeroit) { vaddr_t kva, loopva; voff_t offset; struct vm_page *pg; UVMHIST_FUNC("uvm_km_alloc1"); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist,"(map=%p, size=0x%lx)", map, size,0,0); KASSERT(vm_map_pmap(map) == pmap_kernel()); size = round_page(size); kva = vm_map_min(map); /* hint */ /* * allocate some virtual space */ if (__predict_false(uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET, align, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0)) != 0)) { UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0); return(0); } /* * recover object offset from virtual address */ offset = kva - vm_map_min(kernel_map); UVMHIST_LOG(maphist," kva=0x%lx, offset=0x%lx", kva, offset,0,0); /* * now allocate the memory. we must be careful about released pages. */ loopva = kva; while (size) { simple_lock(&uvm.kernel_object->vmobjlock); pg = uvm_pagelookup(uvm.kernel_object, offset); /* * if we found a page in an unallocated region, it must be * released */ if (pg) { if ((pg->pg_flags & PG_RELEASED) == 0) panic("uvm_km_alloc1: non-released page"); atomic_setbits_int(&pg->pg_flags, PG_WANTED); UVM_UNLOCK_AND_WAIT(pg, &uvm.kernel_object->vmobjlock, FALSE, "km_alloc", 0); continue; /* retry */ } /* allocate ram */ pg = uvm_pagealloc(uvm.kernel_object, offset, NULL, 0); if (pg) { atomic_clearbits_int(&pg->pg_flags, PG_BUSY); UVM_PAGE_OWN(pg, NULL); } simple_unlock(&uvm.kernel_object->vmobjlock); if (__predict_false(pg == NULL)) { if (curproc == uvm.pagedaemon_proc) { /* * It is unfeasible for the page daemon to * sleep for memory, so free what we have * allocated and fail. */ uvm_unmap(map, kva, loopva - kva); return (NULL); } else { uvm_wait("km_alloc1w"); /* wait for memory */ continue; } } /* * map it in; note we're never called with an intrsafe * object, so we always use regular old pmap_enter(). */ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_ALL, PMAP_WIRED | VM_PROT_READ | VM_PROT_WRITE); loopva += PAGE_SIZE; offset += PAGE_SIZE; size -= PAGE_SIZE; } pmap_update(map->pmap); /* * zero on request (note that "size" is now zero due to the above loop * so we need to subtract kva from loopva to reconstruct the size). */ if (zeroit) memset((caddr_t)kva, 0, loopva - kva); UVMHIST_LOG(maphist,"<- done (kva=0x%lx)", kva,0,0,0); return(kva); }
/* ARGSUSED */ int sys_execve(struct proc *p, void *v, register_t *retval) { struct sys_execve_args /* { syscallarg(const char *) path; syscallarg(char *const *) argp; syscallarg(char *const *) envp; } */ *uap = v; int error; struct exec_package pack; struct nameidata nid; struct vattr attr; struct ucred *cred = p->p_ucred; char *argp; char * const *cpp, *dp, *sp; #ifdef KTRACE char *env_start; #endif struct process *pr = p->p_p; long argc, envc; size_t len, sgap; #ifdef MACHINE_STACK_GROWS_UP size_t slen; #endif char *stack; struct ps_strings arginfo; struct vmspace *vm = pr->ps_vmspace; char **tmpfap; extern struct emul emul_native; #if NSYSTRACE > 0 int wassugid = ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC); size_t pathbuflen; #endif char *pathbuf = NULL; struct vnode *otvp; /* get other threads to stop */ if ((error = single_thread_set(p, SINGLE_UNWIND, 1))) return (error); /* * Cheap solution to complicated problems. * Mark this process as "leave me alone, I'm execing". */ atomic_setbits_int(&pr->ps_flags, PS_INEXEC); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE)) { systrace_execve0(p); pathbuf = pool_get(&namei_pool, PR_WAITOK); error = copyinstr(SCARG(uap, path), pathbuf, MAXPATHLEN, &pathbuflen); if (error != 0) goto clrflag; } #endif if (pathbuf != NULL) { NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_SYSSPACE, pathbuf, p); } else { NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); } /* * initialize the fields of the exec package. */ if (pathbuf != NULL) pack.ep_name = pathbuf; else pack.ep_name = (char *)SCARG(uap, path); pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK); pack.ep_hdrlen = exec_maxhdrsz; pack.ep_hdrvalid = 0; pack.ep_ndp = &nid; pack.ep_interp = NULL; pack.ep_emul_arg = NULL; VMCMDSET_INIT(&pack.ep_vmcmds); pack.ep_vap = &attr; pack.ep_emul = &emul_native; pack.ep_flags = 0; /* see if we can run it. */ if ((error = check_exec(p, &pack)) != 0) { goto freehdr; } /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */ /* allocate an argument buffer */ argp = km_alloc(NCARGS, &kv_exec, &kp_pageable, &kd_waitok); #ifdef DIAGNOSTIC if (argp == NULL) panic("execve: argp == NULL"); #endif dp = argp; argc = 0; /* copy the fake args list, if there's one, freeing it as we go */ if (pack.ep_flags & EXEC_HASARGL) { tmpfap = pack.ep_fa; while (*tmpfap != NULL) { char *cp; cp = *tmpfap; while (*cp) *dp++ = *cp++; *dp++ = '\0'; free(*tmpfap, M_EXEC, 0); tmpfap++; argc++; } free(pack.ep_fa, M_EXEC, 0); pack.ep_flags &= ~EXEC_HASARGL; } /* Now get argv & environment */ if (!(cpp = SCARG(uap, argp))) { error = EFAULT; goto bad; } if (pack.ep_flags & EXEC_SKIPARG) cpp++; while (1) { len = argp + ARG_MAX - dp; if ((error = copyin(cpp, &sp, sizeof(sp))) != 0) goto bad; if (!sp) break; if ((error = copyinstr(sp, dp, len, &len)) != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto bad; } dp += len; cpp++; argc++; } /* must have at least one argument */ if (argc == 0) { error = EINVAL; goto bad; } #ifdef KTRACE if (KTRPOINT(p, KTR_EXECARGS)) ktrexec(p, KTR_EXECARGS, argp, dp - argp); #endif envc = 0; /* environment does not need to be there */ if ((cpp = SCARG(uap, envp)) != NULL ) { #ifdef KTRACE env_start = dp; #endif while (1) { len = argp + ARG_MAX - dp; if ((error = copyin(cpp, &sp, sizeof(sp))) != 0) goto bad; if (!sp) break; if ((error = copyinstr(sp, dp, len, &len)) != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto bad; } dp += len; cpp++; envc++; } #ifdef KTRACE if (KTRPOINT(p, KTR_EXECENV)) ktrexec(p, KTR_EXECENV, env_start, dp - env_start); #endif } dp = (char *)(((long)dp + _STACKALIGNBYTES) & ~_STACKALIGNBYTES); sgap = STACKGAPLEN; /* * If we have enabled random stackgap, the stack itself has already * been moved from a random location, but is still aligned to a page * boundary. Provide the lower bits of random placement now. */ if (stackgap_random != 0) { sgap += arc4random() & PAGE_MASK; sgap = (sgap + _STACKALIGNBYTES) & ~_STACKALIGNBYTES; } /* Now check if args & environ fit into new stack */ len = ((argc + envc + 2 + pack.ep_emul->e_arglen) * sizeof(char *) + sizeof(long) + dp + sgap + sizeof(struct ps_strings)) - argp; len = (len + _STACKALIGNBYTES) &~ _STACKALIGNBYTES; if (len > pack.ep_ssize) { /* in effect, compare to initial limit */ error = ENOMEM; goto bad; } /* adjust "active stack depth" for process VSZ */ pack.ep_ssize = len; /* maybe should go elsewhere, but... */ /* * we're committed: any further errors will kill the process, so * kill the other threads now. */ single_thread_set(p, SINGLE_EXIT, 0); /* * Prepare vmspace for remapping. Note that uvmspace_exec can replace * pr_vmspace! */ uvmspace_exec(p, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); vm = pr->ps_vmspace; /* Now map address space */ vm->vm_taddr = (char *)trunc_page(pack.ep_taddr); vm->vm_tsize = atop(round_page(pack.ep_taddr + pack.ep_tsize) - trunc_page(pack.ep_taddr)); vm->vm_daddr = (char *)trunc_page(pack.ep_daddr); vm->vm_dsize = atop(round_page(pack.ep_daddr + pack.ep_dsize) - trunc_page(pack.ep_daddr)); vm->vm_dused = 0; vm->vm_ssize = atop(round_page(pack.ep_ssize)); vm->vm_maxsaddr = (char *)pack.ep_maxsaddr; vm->vm_minsaddr = (char *)pack.ep_minsaddr; /* create the new process's VM space by running the vmcmds */ #ifdef DIAGNOSTIC if (pack.ep_vmcmds.evs_used == 0) panic("execve: no vmcmds"); #endif error = exec_process_vmcmds(p, &pack); /* if an error happened, deallocate and punt */ if (error) goto exec_abort; /* old "stackgap" is gone now */ pr->ps_stackgap = 0; #ifdef MACHINE_STACK_GROWS_UP pr->ps_strings = (vaddr_t)vm->vm_maxsaddr + sgap; if (uvm_map_protect(&vm->vm_map, (vaddr_t)vm->vm_maxsaddr, trunc_page(pr->ps_strings), PROT_NONE, TRUE)) goto exec_abort; #else pr->ps_strings = (vaddr_t)vm->vm_minsaddr - sizeof(arginfo) - sgap; if (uvm_map_protect(&vm->vm_map, round_page(pr->ps_strings + sizeof(arginfo)), (vaddr_t)vm->vm_minsaddr, PROT_NONE, TRUE)) goto exec_abort; #endif /* remember information about the process */ arginfo.ps_nargvstr = argc; arginfo.ps_nenvstr = envc; #ifdef MACHINE_STACK_GROWS_UP stack = (char *)vm->vm_maxsaddr + sizeof(arginfo) + sgap; slen = len - sizeof(arginfo) - sgap; #else stack = (char *)(vm->vm_minsaddr - len); #endif /* Now copy argc, args & environ to new stack */ if (!(*pack.ep_emul->e_copyargs)(&pack, &arginfo, stack, argp)) goto exec_abort; /* copy out the process's ps_strings structure */ if (copyout(&arginfo, (char *)pr->ps_strings, sizeof(arginfo))) goto exec_abort; stopprofclock(pr); /* stop profiling */ fdcloseexec(p); /* handle close on exec */ execsigs(p); /* reset caught signals */ TCB_SET(p, NULL); /* reset the TCB address */ pr->ps_kbind_addr = 0; /* reset the kbind bits */ pr->ps_kbind_cookie = 0; /* set command name & other accounting info */ memset(p->p_comm, 0, sizeof(p->p_comm)); len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN); memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, len); pr->ps_acflag &= ~AFORK; /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; vref(pack.ep_vp); pr->ps_textvp = pack.ep_vp; if (otvp) vrele(otvp); atomic_setbits_int(&pr->ps_flags, PS_EXEC); if (pr->ps_flags & PS_PPWAIT) { atomic_clearbits_int(&pr->ps_flags, PS_PPWAIT); atomic_clearbits_int(&pr->ps_pptr->ps_flags, PS_ISPWAIT); wakeup(pr->ps_pptr); } /* * If process does execve() while it has a mismatched real, * effective, or saved uid/gid, we set PS_SUGIDEXEC. */ if (cred->cr_uid != cred->cr_ruid || cred->cr_uid != cred->cr_svuid || cred->cr_gid != cred->cr_rgid || cred->cr_gid != cred->cr_svgid) atomic_setbits_int(&pr->ps_flags, PS_SUGIDEXEC); else atomic_clearbits_int(&pr->ps_flags, PS_SUGIDEXEC); atomic_clearbits_int(&pr->ps_flags, PS_TAMED); tame_dropwpaths(pr); /* * deal with set[ug]id. * MNT_NOEXEC has already been used to disable s[ug]id. */ if ((attr.va_mode & (VSUID | VSGID)) && proc_cansugid(p)) { int i; atomic_setbits_int(&pr->ps_flags, PS_SUGID|PS_SUGIDEXEC); #ifdef KTRACE /* * If process is being ktraced, turn off - unless * root set it. */ if (pr->ps_tracevp && !(pr->ps_traceflag & KTRFAC_ROOT)) ktrcleartrace(pr); #endif p->p_ucred = cred = crcopy(cred); if (attr.va_mode & VSUID) cred->cr_uid = attr.va_uid; if (attr.va_mode & VSGID) cred->cr_gid = attr.va_gid; /* * For set[ug]id processes, a few caveats apply to * stdin, stdout, and stderr. */ error = 0; fdplock(p->p_fd); for (i = 0; i < 3; i++) { struct file *fp = NULL; /* * NOTE - This will never return NULL because of * immature fds. The file descriptor table is not * shared because we're suid. */ fp = fd_getfile(p->p_fd, i); /* * Ensure that stdin, stdout, and stderr are already * allocated. We do not want userland to accidentally * allocate descriptors in this range which has implied * meaning to libc. */ if (fp == NULL) { short flags = FREAD | (i == 0 ? 0 : FWRITE); struct vnode *vp; int indx; if ((error = falloc(p, &fp, &indx)) != 0) break; #ifdef DIAGNOSTIC if (indx != i) panic("sys_execve: falloc indx != i"); #endif if ((error = cdevvp(getnulldev(), &vp)) != 0) { fdremove(p->p_fd, indx); closef(fp, p); break; } if ((error = VOP_OPEN(vp, flags, cred, p)) != 0) { fdremove(p->p_fd, indx); closef(fp, p); vrele(vp); break; } if (flags & FWRITE) vp->v_writecount++; fp->f_flag = flags; fp->f_type = DTYPE_VNODE; fp->f_ops = &vnops; fp->f_data = (caddr_t)vp; FILE_SET_MATURE(fp, p); } } fdpunlock(p->p_fd); if (error) goto exec_abort; } else atomic_clearbits_int(&pr->ps_flags, PS_SUGID); /* * Reset the saved ugids and update the process's copy of the * creds if the creds have been changed */ if (cred->cr_uid != cred->cr_svuid || cred->cr_gid != cred->cr_svgid) { /* make sure we have unshared ucreds */ p->p_ucred = cred = crcopy(cred); cred->cr_svuid = cred->cr_uid; cred->cr_svgid = cred->cr_gid; } if (pr->ps_ucred != cred) { struct ucred *ocred; ocred = pr->ps_ucred; crhold(cred); pr->ps_ucred = cred; crfree(ocred); } if (pr->ps_flags & PS_SUGIDEXEC) { int i, s = splclock(); timeout_del(&pr->ps_realit_to); for (i = 0; i < nitems(pr->ps_timer); i++) { timerclear(&pr->ps_timer[i].it_interval); timerclear(&pr->ps_timer[i].it_value); } splx(s); } /* reset CPU time usage for the thread, but not the process */ timespecclear(&p->p_tu.tu_runtime); p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0; km_free(argp, NCARGS, &kv_exec, &kp_pageable); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); vn_close(pack.ep_vp, FREAD, cred, p); /* * notify others that we exec'd */ KNOTE(&pr->ps_klist, NOTE_EXEC); /* setup new registers and do misc. setup. */ if (pack.ep_emul->e_fixup != NULL) { if ((*pack.ep_emul->e_fixup)(p, &pack) != 0) goto free_pack_abort; } #ifdef MACHINE_STACK_GROWS_UP (*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack + slen, retval); #else (*pack.ep_emul->e_setregs)(p, &pack, (u_long)stack, retval); #endif /* map the process's signal trampoline code */ if (exec_sigcode_map(pr, pack.ep_emul)) goto free_pack_abort; #ifdef __HAVE_EXEC_MD_MAP /* perform md specific mappings that process might need */ if (exec_md_map(p, &pack)) goto free_pack_abort; #endif if (pr->ps_flags & PS_TRACED) psignal(p, SIGTRAP); free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); /* * Call emulation specific exec hook. This can setup per-process * p->p_emuldata or do any other per-process stuff an emulation needs. * * If we are executing process of different emulation than the * original forked process, call e_proc_exit() of the old emulation * first, then e_proc_exec() of new emulation. If the emulation is * same, the exec hook code should deallocate any old emulation * resources held previously by this process. */ if (pr->ps_emul && pr->ps_emul->e_proc_exit && pr->ps_emul != pack.ep_emul) (*pr->ps_emul->e_proc_exit)(p); p->p_descfd = 255; if ((pack.ep_flags & EXEC_HASFD) && pack.ep_fd < 255) p->p_descfd = pack.ep_fd; /* * Call exec hook. Emulation code may NOT store reference to anything * from &pack. */ if (pack.ep_emul->e_proc_exec) (*pack.ep_emul->e_proc_exec)(p, &pack); #if defined(KTRACE) && defined(COMPAT_LINUX) /* update ps_emul, but don't ktrace it if native-execing-native */ if (pr->ps_emul != pack.ep_emul || pack.ep_emul != &emul_native) { pr->ps_emul = pack.ep_emul; if (KTRPOINT(p, KTR_EMUL)) ktremul(p); } #else /* update ps_emul, the old value is no longer needed */ pr->ps_emul = pack.ep_emul; #endif atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); single_thread_clear(p, P_SUSPSIG); #if NSYSTRACE > 0 if (ISSET(p->p_flag, P_SYSTRACE) && wassugid && !ISSET(pr->ps_flags, PS_SUGID | PS_SUGIDEXEC)) systrace_execve1(pathbuf, p); #endif if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (0); bad: /* free the vmspace-creation commands, and release their references */ kill_vmcmds(&pack.ep_vmcmds); /* kill any opened file descriptor, if necessary */ if (pack.ep_flags & EXEC_HASFD) { pack.ep_flags &= ~EXEC_HASFD; fdplock(p->p_fd); (void) fdrelease(p, pack.ep_fd); fdpunlock(p->p_fd); } if (pack.ep_interp != NULL) pool_put(&namei_pool, pack.ep_interp); if (pack.ep_emul_arg != NULL) free(pack.ep_emul_arg, M_TEMP, pack.ep_emul_argsize); /* close and put the exec'd file */ vn_close(pack.ep_vp, FREAD, cred, p); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); km_free(argp, NCARGS, &kv_exec, &kp_pageable); freehdr: free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); #if NSYSTRACE > 0 clrflag: #endif atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); single_thread_clear(p, P_SUSPSIG); if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); return (error); exec_abort: /* * the old process doesn't exist anymore. exit gracefully. * get rid of the (new) address space we have created, if any, get rid * of our namei data and vnode, and exit noting failure */ uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS); if (pack.ep_interp != NULL) pool_put(&namei_pool, pack.ep_interp); if (pack.ep_emul_arg != NULL) free(pack.ep_emul_arg, M_TEMP, pack.ep_emul_argsize); pool_put(&namei_pool, nid.ni_cnd.cn_pnbuf); vn_close(pack.ep_vp, FREAD, cred, p); km_free(argp, NCARGS, &kv_exec, &kp_pageable); free_pack_abort: free(pack.ep_hdr, M_EXEC, pack.ep_hdrlen); if (pathbuf != NULL) pool_put(&namei_pool, pathbuf); exit1(p, W_EXITCODE(0, SIGABRT), EXIT_NORMAL); /* NOTREACHED */ atomic_clearbits_int(&pr->ps_flags, PS_INEXEC); return (0); }
int uvm_objwire(struct uvm_object *uobj, off_t start, off_t end, struct pglist *pageq) { int i, npages, error; struct vm_page *pgs[FETCH_PAGECOUNT]; off_t offset = start, left; left = (end - start) >> PAGE_SHIFT; mtx_enter(&uobj->vmobjlock); while (left) { npages = MIN(FETCH_PAGECOUNT, left); /* Get the pages */ memset(pgs, 0, sizeof(pgs)); error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, 0, VM_PROT_READ | VM_PROT_WRITE, UVM_ADV_SEQUENTIAL, PGO_ALLPAGES | PGO_SYNCIO); if (error) goto error; mtx_enter(&uobj->vmobjlock); for (i = 0; i < npages; i++) { KASSERT(pgs[i] != NULL); KASSERT(!(pgs[i]->pg_flags & PG_RELEASED)); #if 0 /* * Loan break */ if (pgs[i]->loan_count) { while (pgs[i]->loan_count) { pg = uvm_loanbreak(pgs[i]); if (!pg) { mtx_leave(&uobj->vmobjlock); uvm_wait("uobjwirepg"); mtx_enter(&uobj->vmobjlock); continue; } } pgs[i] = pg; } #endif if (pgs[i]->pg_flags & PQ_AOBJ) { atomic_clearbits_int(&pgs[i]->pg_flags, PG_CLEAN); uao_dropswap(uobj, i); } } /* Wire the pages */ uvm_lock_pageq(); for (i = 0; i < npages; i++) { uvm_pagewire(pgs[i]); if (pageq != NULL) TAILQ_INSERT_TAIL(pageq, pgs[i], pageq); } uvm_unlock_pageq(); /* Unbusy the pages */ uvm_page_unbusy(pgs, npages); left -= npages; offset += (off_t)npages << PAGE_SHIFT; } mtx_leave(&uobj->vmobjlock); return 0; error: /* Unwire the pages which have been wired */ uvm_objunwire(uobj, start, offset); return error; }
/* * The CPU ends up here when its ready to run * This is called from code in mptramp.s; at this point, we are running * in the idle pcb/idle stack of the new cpu. When this function returns, * this processor will enter the idle loop and start looking for work. * * XXX should share some of this with init386 in machdep.c */ void cpu_hatch(void *v) { struct cpu_info *ci = (struct cpu_info *)v; int s; cpu_init_msrs(ci); #ifdef DEBUG if (ci->ci_flags & CPUF_PRESENT) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif ci->ci_flags |= CPUF_PRESENT; lapic_enable(); lapic_startclock(); if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { /* * We need to wait until we can identify, otherwise dmesg * output will be messy. */ while ((ci->ci_flags & CPUF_IDENTIFY) == 0) delay(10); identifycpu(ci); /* Signal we're done */ atomic_clearbits_int(&ci->ci_flags, CPUF_IDENTIFY); /* Prevent identifycpu() from running again */ atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFIED); } while ((ci->ci_flags & CPUF_GO) == 0) delay(10); #ifdef DEBUG if (ci->ci_flags & CPUF_RUNNING) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif lcr0(ci->ci_idle_pcb->pcb_cr0); cpu_init_idt(); lapic_set_lvt(); gdt_init_cpu(ci); fpuinit(ci); lldt(0); cpu_init(ci); s = splhigh(); lcr8(0); enable_intr(); microuptime(&ci->ci_schedstate.spc_runtime); splx(s); SCHED_LOCK(s); cpu_switchto(NULL, sched_chooseproc()); }
int uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, struct vm_anon *anon) { boolean_t we_own; /* we own anon's page? */ boolean_t locked; /* did we relock? */ struct vm_page *pg; int result; result = 0; /* XXX shut up gcc */ uvmexp.fltanget++; /* bump rusage counters */ if (anon->an_page) curproc->p_ru.ru_minflt++; else curproc->p_ru.ru_majflt++; /* * loop until we get it, or fail. */ while (1) { we_own = FALSE; /* TRUE if we set PG_BUSY on a page */ pg = anon->an_page; /* * if there is a resident page and it is loaned, then anon * may not own it. call out to uvm_anon_lockpage() to ensure * the real owner of the page has been identified and locked. */ if (pg && pg->loan_count) pg = uvm_anon_lockloanpg(anon); /* * page there? make sure it is not busy/released. */ if (pg) { /* * at this point, if the page has a uobject [meaning * we have it on loan], then that uobject is locked * by us! if the page is busy, we drop all the * locks (including uobject) and try again. */ if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0) { return (VM_PAGER_OK); } atomic_setbits_int(&pg->pg_flags, PG_WANTED); uvmexp.fltpgwait++; /* * the last unlock must be an atomic unlock+wait on * the owner of page */ if (pg->uobject) { /* owner is uobject ? */ uvmfault_unlockall(ufi, amap, NULL, anon); UVM_UNLOCK_AND_WAIT(pg, &pg->uobject->vmobjlock, FALSE, "anonget1",0); } else { /* anon owns page */ uvmfault_unlockall(ufi, amap, NULL, NULL); UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0, "anonget2",0); } /* ready to relock and try again */ } else { /* * no page, we must try and bring it in. */ pg = uvm_pagealloc(NULL, 0, anon, 0); if (pg == NULL) { /* out of RAM. */ uvmfault_unlockall(ufi, amap, NULL, anon); uvmexp.fltnoram++; uvm_wait("flt_noram1"); /* ready to relock and try again */ } else { /* we set the PG_BUSY bit */ we_own = TRUE; uvmfault_unlockall(ufi, amap, NULL, anon); /* * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN * page into the uvm_swap_get function with * all data structures unlocked. note that * it is ok to read an_swslot here because * we hold PG_BUSY on the page. */ uvmexp.pageins++; result = uvm_swap_get(pg, anon->an_swslot, PGO_SYNCIO); /* * we clean up after the i/o below in the * "we_own" case */ /* ready to relock and try again */ } } /* * now relock and try again */ locked = uvmfault_relock(ufi); if (locked || we_own) simple_lock(&anon->an_lock); /* * if we own the page (i.e. we set PG_BUSY), then we need * to clean up after the I/O. there are three cases to * consider: * [1] page released during I/O: free anon and ReFault. * [2] I/O not OK. free the page and cause the fault * to fail. * [3] I/O OK! activate the page and sync with the * non-we_own case (i.e. drop anon lock if not locked). */ if (we_own) { if (pg->pg_flags & PG_WANTED) { /* still holding object lock */ wakeup(pg); } /* un-busy! */ atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); /* * if we were RELEASED during I/O, then our anon is * no longer part of an amap. we need to free the * anon and try again. */ if (pg->pg_flags & PG_RELEASED) { pmap_page_protect(pg, VM_PROT_NONE); simple_unlock(&anon->an_lock); uvm_anfree(anon); /* frees page for us */ if (locked) uvmfault_unlockall(ufi, amap, NULL, NULL); uvmexp.fltpgrele++; return (VM_PAGER_REFAULT); /* refault! */ } if (result != VM_PAGER_OK) { KASSERT(result != VM_PAGER_PEND); /* remove page from anon */ anon->an_page = NULL; /* * remove the swap slot from the anon * and mark the anon as having no real slot. * don't free the swap slot, thus preventing * it from being used again. */ uvm_swap_markbad(anon->an_swslot, 1); anon->an_swslot = SWSLOT_BAD; /* * note: page was never !PG_BUSY, so it * can't be mapped and thus no need to * pmap_page_protect it... */ uvm_lock_pageq(); uvm_pagefree(pg); uvm_unlock_pageq(); if (locked) uvmfault_unlockall(ufi, amap, NULL, anon); else simple_unlock(&anon->an_lock); return (VM_PAGER_ERROR); } /* * must be OK, clear modify (already PG_CLEAN) * and activate */ pmap_clear_modify(pg); uvm_lock_pageq(); uvm_pageactivate(pg); uvm_unlock_pageq(); if (!locked) simple_unlock(&anon->an_lock); } /* * we were not able to relock. restart fault. */ if (!locked) return (VM_PAGER_REFAULT); /* * verify no one has touched the amap and moved the anon on us. */ if (ufi != NULL && amap_lookup(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start) != anon) { uvmfault_unlockall(ufi, amap, NULL, anon); return (VM_PAGER_REFAULT); } /* * try it again! */ uvmexp.fltanretry++; continue; } /* while (1) */ /*NOTREACHED*/ }