void create_init(void) { uint8_t *j; init_process = ptab_alloc(); udata.u_ptab = init_process; udata.u_top = PROGLOAD + 4096; /* Plenty for the boot */ init_process->p_top = udata.u_top; map_init(); newproc(init_process); udata.u_insys = 1; init_process->p_status = P_RUNNING; /* wipe file table */ for (j = udata.u_files; j < (udata.u_files + UFTSIZE); ++j) { *j = NO_FILE; } /* Poke the execve arguments into user data space so _execve() can read them back */ argptr = PROGLOAD; progptr = PROGLOAD + 2048; uzero((void *)progptr, 32); add_argument("/init"); }
void create_init(void) { uint8_t *j, *e; udata.u_top = PROGLOAD + 512; /* Plenty for the boot */ init_process = ptab_alloc(); udata.u_ptab = init_process; init_process->p_top = udata.u_top; map_init(); /* wipe file table */ e = udata.u_files + UFTSIZE; for (j = udata.u_files; j < e; ++j) *j = NO_FILE; makeproc(init_process, &udata); init_process->p_status = P_RUNNING; udata.u_insys = 1; init_process->p_status = P_RUNNING; /* Poke the execve arguments into user data space so _execve() can read them back */ /* Some systems only have a tiny window we can use at boot as most of this space is loaded with common memory */ argptr = PROGLOAD; progptr = PROGLOAD + 256; uzero((void *)progptr, 32); add_argument("/init"); }
int sys_read(uint8_t minor, uint8_t rawflag, uint8_t flag) { unsigned char *addr = (unsigned char *) ptab; rawflag;flag; switch(minor){ case 0: return 0; case 1: return uputsys((unsigned char *)udata.u_offset, udata.u_count); case 2: if (udata.u_sysio) memset(udata.u_base, 0, udata.u_count); else uzero(udata.u_base, udata.u_count); return udata.u_count; case 3: if (udata.u_offset >= PTABSIZE * sizeof(struct p_tab)) return 0; return uputsys(addr + udata.u_offset, udata.u_count); default: udata.u_error = ENXIO; return -1; } }
arg_t _time(void) { time_t t; switch (type) { case 0: rdtime(&t); uput(&t, tvec, sizeof(t)); return (0); case 1: uput(&t.low, &ticks.full, sizeof(ticks)); uzero(&t.high, sizeof(t.high)); return 0; default: udata.u_error = EINVAL; return -1; } }
arg_t _brk(void) { /* Don't allow break to be set outside of the range the platform permits. For most platforms this is within 512 bytes of the stack pointer FIXME: if we get more complex mapping rule types then we may need to make this something like if (brk_valid(addr)) so we can keep it portable */ if (addr >= brk_limit()) { kprintf("%d: out of memory\n", udata.u_ptab->p_pid); udata.u_error = ENOMEM; return -1; } /* If we have done a break that gives us more room we must zero the extra as we no longer guarantee it is clear already */ if (addr > udata.u_break) uzero((void *)udata.u_break, addr - udata.u_break); udata.u_break = addr; return 0; }
int sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) { volatile int minstacksz; int newstack; label_t ljb; volatile caddr_t sp; caddr_t fp; struct regs *rp; volatile greg_t upc; volatile proc_t *p = ttoproc(curthread); klwp_t *lwp = ttolwp(curthread); ucontext_t *volatile tuc = NULL; ucontext_t *uc; siginfo_t *sip_addr; volatile int watched; rp = lwptoregs(lwp); upc = rp->r_pc; minstacksz = SA(sizeof (struct sigframe)) + SA(sizeof (*uc)); if (sip != NULL) minstacksz += SA(sizeof (siginfo_t)); ASSERT((minstacksz & (STACK_ALIGN - 1ul)) == 0); /* * Figure out whether we will be handling this signal on * an alternate stack specified by the user. Then allocate * and validate the stack requirements for the signal handler * context. on_fault will catch any faults. */ newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) && !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE)); if (newstack) { fp = (caddr_t)(SA((uintptr_t)lwp->lwp_sigaltstack.ss_sp) + SA(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN); } else if ((rp->r_ss & 0xffff) != UDS_SEL) { user_desc_t *ldt; /* * If the stack segment selector is -not- pointing at * the UDS_SEL descriptor and we have an LDT entry for * it instead, add the base address to find the effective va. */ if ((ldt = p->p_ldt) != NULL) fp = (caddr_t)rp->r_sp + USEGD_GETBASE(&ldt[SELTOIDX(rp->r_ss)]); else fp = (caddr_t)rp->r_sp; } else fp = (caddr_t)rp->r_sp; /* * Force proper stack pointer alignment, even in the face of a * misaligned stack pointer from user-level before the signal. * Don't use the SA() macro because that rounds up, not down. */ fp = (caddr_t)((uintptr_t)fp & ~(STACK_ALIGN - 1ul)); sp = fp - minstacksz; /* * Make sure lwp hasn't trashed its stack. */ if (sp >= (caddr_t)USERLIMIT || fp >= (caddr_t)USERLIMIT) { #ifdef DEBUG printf("sendsig: bad signal stack cmd=%s, pid=%d, sig=%d\n", PTOU(p)->u_comm, p->p_pid, sig); printf("sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n", (void *)sp, (void *)hdlr, (uintptr_t)upc); printf("sp above USERLIMIT\n"); #endif return (0); } watched = watch_disable_addr((caddr_t)sp, minstacksz, S_WRITE); if (on_fault(&ljb)) goto badstack; if (sip != NULL) { zoneid_t zoneid; fp -= SA(sizeof (siginfo_t)); uzero(fp, sizeof (siginfo_t)); if (SI_FROMUSER(sip) && (zoneid = p->p_zone->zone_id) != GLOBAL_ZONEID && zoneid != sip->si_zoneid) { k_siginfo_t sani_sip = *sip; sani_sip.si_pid = p->p_zone->zone_zsched->p_pid; sani_sip.si_uid = 0; sani_sip.si_ctid = -1; sani_sip.si_zoneid = zoneid; copyout_noerr(&sani_sip, fp, sizeof (sani_sip)); } else copyout_noerr(sip, fp, sizeof (*sip)); sip_addr = (siginfo_t *)fp; if (sig == SIGPROF && curthread->t_rprof != NULL && curthread->t_rprof->rp_anystate) { /* * We stand on our head to deal with * the real time profiling signal. * Fill in the stuff that doesn't fit * in a normal k_siginfo structure. */ int i = sip->si_nsysarg; while (--i >= 0) suword32_noerr(&(sip_addr->si_sysarg[i]), (uint32_t)lwp->lwp_arg[i]); copyout_noerr(curthread->t_rprof->rp_state, sip_addr->si_mstate, sizeof (curthread->t_rprof->rp_state)); } } else sip_addr = NULL; /* save the current context on the user stack */ fp -= SA(sizeof (*tuc)); uc = (ucontext_t *)fp; tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP); savecontext(tuc, &lwp->lwp_sigoldmask); copyout_noerr(tuc, uc, sizeof (*tuc)); kmem_free(tuc, sizeof (*tuc)); tuc = NULL; lwp->lwp_oldcontext = (uintptr_t)uc; if (newstack) { lwp->lwp_sigaltstack.ss_flags |= SS_ONSTACK; if (lwp->lwp_ustack) copyout_noerr(&lwp->lwp_sigaltstack, (stack_t *)lwp->lwp_ustack, sizeof (stack_t)); } /* * Set up signal handler arguments */ { struct sigframe frame; frame.sip = sip_addr; frame.ucp = uc; frame.signo = sig; frame.retaddr = (void (*)())0xffffffff; /* never return! */ copyout_noerr(&frame, sp, sizeof (frame)); } no_fault(); if (watched) watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE); rp->r_sp = (greg_t)sp; rp->r_pc = (greg_t)hdlr; rp->r_ps = PSL_USER | (rp->r_ps & PS_IOPL); if ((rp->r_cs & 0xffff) != UCS_SEL || (rp->r_ss & 0xffff) != UDS_SEL) { rp->r_cs = UCS_SEL; rp->r_ss = UDS_SEL; } /* * Don't set lwp_eosys here. sendsig() is called via psig() after * lwp_eosys is handled, so setting it here would affect the next * system call. */ return (1); badstack: no_fault(); if (watched) watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE); if (tuc) kmem_free(tuc, sizeof (*tuc)); #ifdef DEBUG printf("sendsig: bad signal stack cmd=%s, pid=%d, sig=%d\n", PTOU(p)->u_comm, p->p_pid, sig); printf("on fault, sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n", (void *)sp, (void *)hdlr, (uintptr_t)upc); #endif return (0); }
int sendsig(int sig, k_siginfo_t *sip, void (*hdlr)()) { volatile int minstacksz; int newstack; label_t ljb; volatile caddr_t sp; caddr_t fp; volatile struct regs *rp; volatile greg_t upc; volatile proc_t *p = ttoproc(curthread); struct as *as = p->p_as; klwp_t *lwp = ttolwp(curthread); ucontext_t *volatile tuc = NULL; ucontext_t *uc; siginfo_t *sip_addr; volatile int watched; /* * This routine is utterly dependent upon STACK_ALIGN being * 16 and STACK_ENTRY_ALIGN being 8. Let's just acknowledge * that and require it. */ #if STACK_ALIGN != 16 || STACK_ENTRY_ALIGN != 8 #error "sendsig() amd64 did not find the expected stack alignments" #endif rp = lwptoregs(lwp); upc = rp->r_pc; /* * Since we're setting up to run the signal handler we have to * arrange that the stack at entry to the handler is (only) * STACK_ENTRY_ALIGN (i.e. 8) byte aligned so that when the handler * executes its push of %rbp, the stack realigns to STACK_ALIGN * (i.e. 16) correctly. * * The new sp will point to the sigframe and the ucontext_t. The * above means that sp (and thus sigframe) will be 8-byte aligned, * but not 16-byte aligned. ucontext_t, however, contains %xmm regs * which must be 16-byte aligned. Because of this, for correct * alignment, sigframe must be a multiple of 8-bytes in length, but * not 16-bytes. This will place ucontext_t at a nice 16-byte boundary. */ /* LINTED: logical expression always true: op "||" */ ASSERT((sizeof (struct sigframe) % 16) == 8); minstacksz = sizeof (struct sigframe) + SA(sizeof (*uc)); if (sip != NULL) minstacksz += SA(sizeof (siginfo_t)); ASSERT((minstacksz & (STACK_ENTRY_ALIGN - 1ul)) == 0); /* * Figure out whether we will be handling this signal on * an alternate stack specified by the user. Then allocate * and validate the stack requirements for the signal handler * context. on_fault will catch any faults. */ newstack = sigismember(&PTOU(curproc)->u_sigonstack, sig) && !(lwp->lwp_sigaltstack.ss_flags & (SS_ONSTACK|SS_DISABLE)); if (newstack) { fp = (caddr_t)(SA((uintptr_t)lwp->lwp_sigaltstack.ss_sp) + SA(lwp->lwp_sigaltstack.ss_size) - STACK_ALIGN); } else { /* * Drop below the 128-byte reserved region of the stack frame * we're interrupting. */ fp = (caddr_t)rp->r_sp - STACK_RESERVE; } /* * Force proper stack pointer alignment, even in the face of a * misaligned stack pointer from user-level before the signal. */ fp = (caddr_t)((uintptr_t)fp & ~(STACK_ENTRY_ALIGN - 1ul)); /* * Most of the time during normal execution, the stack pointer * is aligned on a STACK_ALIGN (i.e. 16 byte) boundary. However, * (for example) just after a call instruction (which pushes * the return address), the callers stack misaligns until the * 'push %rbp' happens in the callee prolog. So while we should * expect the stack pointer to be always at least STACK_ENTRY_ALIGN * aligned, we should -not- expect it to always be STACK_ALIGN aligned. * We now adjust to ensure that the new sp is aligned to * STACK_ENTRY_ALIGN but not to STACK_ALIGN. */ sp = fp - minstacksz; if (((uintptr_t)sp & (STACK_ALIGN - 1ul)) == 0) { sp -= STACK_ENTRY_ALIGN; minstacksz = fp - sp; } /* * Now, make sure the resulting signal frame address is sane */ if (sp >= as->a_userlimit || fp >= as->a_userlimit) { #ifdef DEBUG printf("sendsig: bad signal stack cmd=%s, pid=%d, sig=%d\n", PTOU(p)->u_comm, p->p_pid, sig); printf("sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n", (void *)sp, (void *)hdlr, (uintptr_t)upc); printf("sp above USERLIMIT\n"); #endif return (0); } watched = watch_disable_addr((caddr_t)sp, minstacksz, S_WRITE); if (on_fault(&ljb)) goto badstack; if (sip != NULL) { zoneid_t zoneid; fp -= SA(sizeof (siginfo_t)); uzero(fp, sizeof (siginfo_t)); if (SI_FROMUSER(sip) && (zoneid = p->p_zone->zone_id) != GLOBAL_ZONEID && zoneid != sip->si_zoneid) { k_siginfo_t sani_sip = *sip; sani_sip.si_pid = p->p_zone->zone_zsched->p_pid; sani_sip.si_uid = 0; sani_sip.si_ctid = -1; sani_sip.si_zoneid = zoneid; copyout_noerr(&sani_sip, fp, sizeof (sani_sip)); } else copyout_noerr(sip, fp, sizeof (*sip)); sip_addr = (siginfo_t *)fp; if (sig == SIGPROF && curthread->t_rprof != NULL && curthread->t_rprof->rp_anystate) { /* * We stand on our head to deal with * the real time profiling signal. * Fill in the stuff that doesn't fit * in a normal k_siginfo structure. */ int i = sip->si_nsysarg; while (--i >= 0) sulword_noerr( (ulong_t *)&(sip_addr->si_sysarg[i]), (ulong_t)lwp->lwp_arg[i]); copyout_noerr(curthread->t_rprof->rp_state, sip_addr->si_mstate, sizeof (curthread->t_rprof->rp_state)); } } else sip_addr = NULL; /* * save the current context on the user stack directly after the * sigframe. Since sigframe is 8-byte-but-not-16-byte aligned, * and since sizeof (struct sigframe) is 24, this guarantees * 16-byte alignment for ucontext_t and its %xmm registers. */ uc = (ucontext_t *)(sp + sizeof (struct sigframe)); tuc = kmem_alloc(sizeof (*tuc), KM_SLEEP); no_fault(); savecontext(tuc, &lwp->lwp_sigoldmask); if (on_fault(&ljb)) goto badstack; copyout_noerr(tuc, uc, sizeof (*tuc)); kmem_free(tuc, sizeof (*tuc)); tuc = NULL; lwp->lwp_oldcontext = (uintptr_t)uc; if (newstack) { lwp->lwp_sigaltstack.ss_flags |= SS_ONSTACK; if (lwp->lwp_ustack) copyout_noerr(&lwp->lwp_sigaltstack, (stack_t *)lwp->lwp_ustack, sizeof (stack_t)); } /* * Set up signal handler return and stack linkage */ { struct sigframe frame; /* * ensure we never return "normally" */ frame.retaddr = (caddr_t)(uintptr_t)-1L; frame.signo = sig; frame.sip = sip_addr; copyout_noerr(&frame, sp, sizeof (frame)); } no_fault(); if (watched) watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE); /* * Set up user registers for execution of signal handler. */ rp->r_sp = (greg_t)sp; rp->r_pc = (greg_t)hdlr; rp->r_ps = PSL_USER | (rp->r_ps & PS_IOPL); rp->r_rdi = sig; rp->r_rsi = (uintptr_t)sip_addr; rp->r_rdx = (uintptr_t)uc; if ((rp->r_cs & 0xffff) != UCS_SEL || (rp->r_ss & 0xffff) != UDS_SEL) { /* * Try our best to deliver the signal. */ rp->r_cs = UCS_SEL; rp->r_ss = UDS_SEL; } /* * Don't set lwp_eosys here. sendsig() is called via psig() after * lwp_eosys is handled, so setting it here would affect the next * system call. */ return (1); badstack: no_fault(); if (watched) watch_enable_addr((caddr_t)sp, minstacksz, S_WRITE); if (tuc) kmem_free(tuc, sizeof (*tuc)); #ifdef DEBUG printf("sendsig: bad signal stack cmd=%s, pid=%d, sig=%d\n", PTOU(p)->u_comm, p->p_pid, sig); printf("on fault, sigsp = 0x%p, action = 0x%p, upc = 0x%lx\n", (void *)sp, (void *)hdlr, (uintptr_t)upc); #endif return (0); }
arg_t _execve(void) { /* We aren't re-entrant where this matters */ uint8_t hdr[16]; staticfast inoptr ino; char **nargv; /* In user space */ char **nenvp; /* In user space */ struct s_argblk *abuf, *ebuf; int argc; uint16_t progptr; uint16_t progload; staticfast uint16_t top; uint16_t bin_size; /* Will need to be bigger on some cpus */ uint16_t bss; top = ramtop; if (!(ino = n_open_lock(name, NULLINOPTR))) return (-1); if (!((getperm(ino) & OTH_EX) && (ino->c_node.i_mode & F_REG) && (ino->c_node.i_mode & (OWN_EX | OTH_EX | GRP_EX)))) { udata.u_error = EACCES; goto nogood; } setftime(ino, A_TIME); udata.u_offset = 0; udata.u_count = 16; udata.u_base = hdr; udata.u_sysio = true; readi(ino, 0); if (udata.u_done != 16) { udata.u_error = ENOEXEC; goto nogood; } if (!header_ok(hdr)) { udata.u_error = ENOEXEC; goto nogood2; } progload = hdr[7] << 8; if (progload == 0) progload = PROGLOAD; top = *(uint16_t *)(hdr + 8); if (top == 0) /* Legacy 'all space' binary */ top = ramtop; else /* Requested an amount, so adjust for the base */ top += progload; bss = *(uint16_t *)(hdr + 14); /* Binary doesn't fit */ /* FIXME: review overflows */ bin_size = ino->c_node.i_size; progptr = bin_size + 1024 + bss; if (progload < PROGLOAD || top - progload < progptr || progptr < bin_size) { udata.u_error = ENOMEM; goto nogood2; } udata.u_ptab->p_status = P_NOSLEEP; /* If we made pagemap_realloc keep hold of some defined area we could in theory just move the arguments up or down as part of the process - that would save us all this hassle but replace it with new hassle */ /* Gather the arguments, and put them in temporary buffers. */ abuf = (struct s_argblk *) tmpbuf(); /* Put environment in another buffer. */ ebuf = (struct s_argblk *) tmpbuf(); /* Read args and environment from process memory */ if (rargs(argv, abuf) || rargs(envp, ebuf)) goto nogood3; /* SN */ /* This must be the last test as it makes changes if it works */ /* FIXME: once we sort out chmem we can make stack and data two elements. We never allocate 'code' as there is no split I/D */ /* This is only safe from deadlocks providing pagemap_realloc doesn't sleep */ if (pagemap_realloc(0, top - MAPBASE, 0)) goto nogood3; /* From this point on we are commmited to the exec() completing */ /* Core dump and ptrace permission logic */ #ifdef CONFIG_LEVEL_2 /* Q: should uid == 0 mean we always allow core */ if ((!(getperm(ino) & OTH_RD)) || (ino->c_node.i_mode & (SET_UID | SET_GID))) udata.u_flags |= U_FLAG_NOCORE; else udata.u_flags &= ~U_FLAG_NOCORE; #endif udata.u_top = top; udata.u_ptab->p_top = top; /* setuid, setgid if executable requires it */ if (ino->c_node.i_mode & SET_UID) udata.u_euid = ino->c_node.i_uid; if (ino->c_node.i_mode & SET_GID) udata.u_egid = ino->c_node.i_gid; /* FIXME: In the execve case we may on some platforms have space below PROGLOAD to clear... */ /* We are definitely going to succeed with the exec, * so we can start writing over the old program */ uput(hdr, (uint8_t *)progload, 16); /* At this point, we are committed to reading in and * executing the program. This call must not block. */ close_on_exec(); /* * Read in the rest of the program, block by block. We rely upon * the optimization path in readi to spot this is a big move to user * space and move it directly. */ progptr = progload + 16; if (bin_size > 16) { bin_size -= 16; udata.u_base = (uint8_t *)progptr; /* We copied the first block already */ udata.u_count = bin_size; udata.u_sysio = false; readi(ino, 0); if (udata.u_done != bin_size) goto nogood4; progptr += bin_size; } /* Wipe the memory in the BSS. We don't wipe the memory above that on 8bit boxes, but defer it to brk/sbrk() */ uzero((uint8_t *)progptr, bss); /* Set initial break for program */ udata.u_break = (int)ALIGNUP(progptr + bss); /* Turn off caught signals */ memset(udata.u_sigvec, 0, sizeof(udata.u_sigvec)); // place the arguments, environment and stack at the top of userspace memory, // Write back the arguments and the environment nargv = wargs(((char *) top - 2), abuf, &argc); nenvp = wargs((char *) (nargv), ebuf, NULL); // Fill in udata.u_name with program invocation name uget((void *) ugetw(nargv), udata.u_name, 8); memcpy(udata.u_ptab->p_name, udata.u_name, 8); tmpfree(abuf); tmpfree(ebuf); i_deref(ino); /* Shove argc and the address of argv just below envp FIXME: should flip them in crt0.S of app for R2L setups so we can get rid of the ifdefs */ #ifdef CONFIG_CALL_R2L /* Arguments are stacked the 'wrong' way around */ uputw((uint16_t) nargv, nenvp - 2); uputw((uint16_t) argc, nenvp - 1); #else uputw((uint16_t) nargv, nenvp - 1); uputw((uint16_t) argc, nenvp - 2); #endif /* Set stack pointer for the program */ udata.u_isp = nenvp - 2; /* Start execution (never returns) */ udata.u_ptab->p_status = P_RUNNING; doexec(progload); /* tidy up in various failure modes */ nogood4: /* Must not run userspace */ ssig(udata.u_ptab, SIGKILL); nogood3: udata.u_ptab->p_status = P_RUNNING; tmpfree(abuf); tmpfree(ebuf); nogood2: nogood: i_unlock_deref(ino); return (-1); }
/*ARGSUSED3*/ static int mmrw(dev_t dev, struct uio *uio, enum uio_rw rw, cred_t *cred) { pfn_t v; struct iovec *iov; int error = 0; size_t c; ssize_t oresid = uio->uio_resid; minor_t minor = getminor(dev); while (uio->uio_resid > 0 && error == 0) { iov = uio->uio_iov; if (iov->iov_len == 0) { uio->uio_iov++; uio->uio_iovcnt--; if (uio->uio_iovcnt < 0) panic("mmrw"); continue; } switch (minor) { case M_MEM: memlist_read_lock(); if (!address_in_memlist(phys_install, (uint64_t)uio->uio_loffset, 1)) { memlist_read_unlock(); error = EFAULT; break; } memlist_read_unlock(); v = BTOP((u_offset_t)uio->uio_loffset); error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, 0, NULL); break; case M_KMEM: case M_ALLKMEM: { page_t **ppp = NULL; caddr_t vaddr = (caddr_t)uio->uio_offset; int try_lock = NEED_LOCK_KVADDR(vaddr); int locked = 0; if ((error = plat_mem_do_mmio(uio, rw)) != ENOTSUP) break; /* * If vaddr does not map a valid page, as_pagelock() * will return failure. Hence we can't check the * return value and return EFAULT here as we'd like. * seg_kp and seg_kpm do not properly support * as_pagelock() for this context so we avoid it * using the try_lock set check above. Some day when * the kernel page locking gets redesigned all this * muck can be cleaned up. */ if (try_lock) locked = (as_pagelock(&kas, &ppp, vaddr, PAGESIZE, S_WRITE) == 0); v = hat_getpfnum(kas.a_hat, (caddr_t)(uintptr_t)uio->uio_loffset); if (v == PFN_INVALID) { if (locked) as_pageunlock(&kas, ppp, vaddr, PAGESIZE, S_WRITE); error = EFAULT; break; } error = mmio(uio, rw, v, uio->uio_loffset & PAGEOFFSET, minor == M_ALLKMEM || mm_kmem_io_access, (locked && ppp) ? *ppp : NULL); if (locked) as_pageunlock(&kas, ppp, vaddr, PAGESIZE, S_WRITE); } break; case M_ZERO: if (rw == UIO_READ) { label_t ljb; if (on_fault(&ljb)) { no_fault(); error = EFAULT; break; } uzero(iov->iov_base, iov->iov_len); no_fault(); uio->uio_resid -= iov->iov_len; uio->uio_loffset += iov->iov_len; break; } /* else it's a write, fall through to NULL case */ /*FALLTHROUGH*/ case M_NULL: if (rw == UIO_READ) return (0); c = iov->iov_len; iov->iov_base += c; iov->iov_len -= c; uio->uio_loffset += c; uio->uio_resid -= c; break; } } return (uio->uio_resid == oresid ? error : 0); }
arg_t _execve(void) { /* Not ideal on stack */ struct binfmt_flat binflat; inoptr ino; char **nargv; /* In user space */ char **nenvp; /* In user space */ struct s_argblk *abuf, *ebuf; int argc; uint32_t bin_size; /* Will need to be bigger on some cpus */ uaddr_t progbase, top; uaddr_t go; uint32_t true_brk; if (!(ino = n_open_lock(name, NULLINOPTR))) return (-1); if (!((getperm(ino) & OTH_EX) && (ino->c_node.i_mode & F_REG) && (ino->c_node.i_mode & (OWN_EX | OTH_EX | GRP_EX)))) { udata.u_error = EACCES; goto nogood; } setftime(ino, A_TIME); udata.u_offset = 0; udata.u_count = sizeof(struct binfmt_flat); udata.u_base = (void *)&binflat; udata.u_sysio = true; readi(ino, 0); if (udata.u_done != sizeof(struct binfmt_flat)) { udata.u_error = ENOEXEC; goto nogood; } /* FIXME: ugly - save this as valid_hdr modifies it */ true_brk = binflat.bss_end; /* Hard coded for our 68K format. We don't quite use the ucLinux names, we don't want to load a ucLinux binary in error! */ if (memcmp(binflat.magic, "bFLT", 4) || !valid_hdr(ino, &binflat)) { udata.u_error = ENOEXEC; goto nogood2; } /* Memory needed */ bin_size = binflat.bss_end + binflat.stack_size; /* Overflow ? */ if (bin_size < binflat.bss_end) { udata.u_error = ENOEXEC; goto nogood2; } /* Gather the arguments, and put them in temporary buffers. */ abuf = (struct s_argblk *) tmpbuf(); /* Put environment in another buffer. */ ebuf = (struct s_argblk *) tmpbuf(); /* Read args and environment from process memory */ if (rargs(argv, abuf) || rargs(envp, ebuf)) goto nogood3; /* This must be the last test as it makes changes if it works */ /* FIXME: need to update this to support split code/data and to fix stack handling nicely */ /* FIXME: ENOMEM fix needs to go to 16bit ? */ if ((udata.u_error = pagemap_realloc(0, bin_size, 0)) != 0) goto nogood3; /* Core dump and ptrace permission logic */ #ifdef CONFIG_LEVEL_2 /* Q: should uid == 0 mean we always allow core */ if ((!(getperm(ino) & OTH_RD)) || (ino->c_node.i_mode & (SET_UID | SET_GID))) udata.u_flags |= U_FLAG_NOCORE; else udata.u_flags &= ~U_FLAG_NOCORE; #endif udata.u_codebase = progbase = pagemap_base(); /* From this point on we are commmited to the exec() completing so we can start writing over the old program */ uput(&binflat, (uint8_t *)progbase, sizeof(struct binfmt_flat)); /* setuid, setgid if executable requires it */ if (ino->c_node.i_mode & SET_UID) udata.u_euid = ino->c_node.i_uid; if (ino->c_node.i_mode & SET_GID) udata.u_egid = ino->c_node.i_gid; top = progbase + bin_size; udata.u_top = top; udata.u_ptab->p_top = top; // kprintf("user space at %p\n", progbase); // kprintf("top at %p\n", progbase + bin_size); bin_size = binflat.reloc_start + 4 * binflat.reloc_count; go = (uint32_t)progbase + binflat.entry; close_on_exec(); /* * Read in the rest of the program, block by block. We rely upon * the optimization path in readi to spot this is a big move to user * space and move it directly. */ if (bin_size > sizeof(struct binfmt_flat)) { /* We copied the header already */ bin_size -= sizeof(struct binfmt_flat); udata.u_base = (uint8_t *)progbase + sizeof(struct binfmt_flat); udata.u_count = bin_size; udata.u_sysio = false; readi(ino, 0); if (udata.u_done != bin_size) goto nogood4; } /* Header isn't counted in relocations */ relocate(&binflat, progbase, bin_size); /* This may wipe the relocations */ uzero((uint8_t *)progbase + binflat.data_end, binflat.bss_end - binflat.data_end + binflat.stack_size); /* Use of brk eats into the stack allocation */ /* Use the temporary we saved (hack) as we mangled bss_end */ udata.u_break = udata.u_codebase + true_brk; /* Turn off caught signals */ memset(udata.u_sigvec, 0, sizeof(udata.u_sigvec)); /* place the arguments, environment and stack at the top of userspace memory. */ /* Write back the arguments and the environment */ nargv = wargs(((char *) top - 4), abuf, &argc); nenvp = wargs((char *) (nargv), ebuf, NULL); /* Fill in udata.u_name with Program invocation name */ uget((void *) ugetl(nargv, NULL), udata.u_name, 8); memcpy(udata.u_ptab->p_name, udata.u_name, 8); tmpfree(abuf); tmpfree(ebuf); i_unlock_deref(ino); /* Shove argc and the address of argv just below envp */ uputl((uint32_t) nargv, nenvp - 1); uputl((uint32_t) argc, nenvp - 2); // Set stack pointer for the program udata.u_isp = nenvp - 2; /* * Sort of - it's a good way to deal with all the stupidity of * random 68K platforms we will have to handle, and a nice place * to stuff the signal trampoline 8) */ install_vdso(); // kprintf("Go = %p ISP = %p\n", go, udata.u_isp); doexec(go); nogood4: /* Must not run userspace */ ssig(udata.u_ptab, SIGKILL); nogood3: tmpfree(abuf); tmpfree(ebuf); nogood2: nogood: i_unlock_deref(ino); return (-1); }