/*===========================================================================* * pm_fork * *===========================================================================*/ void pm_fork(endpoint_t pproc, endpoint_t cproc, pid_t cpid) { /* Perform those aspects of the fork() system call that relate to files. * In particular, let the child inherit its parent's file descriptors. * The parent and child parameters tell who forked off whom. The file * system uses the same slot numbers as the kernel. Only PM makes this call. */ struct fproc *cp, *pp; int i, parentno, childno; mutex_t c_fp_lock; /* Check up-to-dateness of fproc. */ okendpt(pproc, &parentno); /* PM gives child endpoint, which implies process slot information. * Don't call isokendpt, because that will verify if the endpoint * number is correct in fproc, which it won't be. */ childno = _ENDPOINT_P(cproc); if (childno < 0 || childno >= NR_PROCS) panic("VFS: bogus child for forking: %d", cproc); if (fproc[childno].fp_pid != PID_FREE) panic("VFS: forking on top of in-use child: %d", childno); /* Copy the parent's fproc struct to the child. */ /* However, the mutex variables belong to a slot and must stay the same. */ c_fp_lock = fproc[childno].fp_lock; fproc[childno] = fproc[parentno]; fproc[childno].fp_lock = c_fp_lock; /* Increase the counters in the 'filp' table. */ cp = &fproc[childno]; pp = &fproc[parentno]; for (i = 0; i < OPEN_MAX; i++) if (cp->fp_filp[i] != NULL) cp->fp_filp[i]->filp_count++; /* Fill in new process and endpoint id. */ cp->fp_pid = cpid; cp->fp_endpoint = cproc; /* A forking process never has an outstanding grant, as it isn't blocking on * I/O. */ if (GRANT_VALID(pp->fp_grant)) { panic("VFS: fork: pp (endpoint %d) has grant %d\n", pp->fp_endpoint, pp->fp_grant); } if (GRANT_VALID(cp->fp_grant)) { panic("VFS: fork: cp (endpoint %d) has grant %d\n", cp->fp_endpoint, cp->fp_grant); } /* A child is not a process leader, not being revived, etc. */ cp->fp_flags = FP_NOFLAGS; /* Record the fact that both root and working dir have another user. */ if (cp->fp_rd) dup_vnode(cp->fp_rd); if (cp->fp_wd) dup_vnode(cp->fp_wd); }
/*===========================================================================* * sys_task * *===========================================================================*/ PUBLIC void sys_task() { /* Main entry point of sys_task. Get the message and dispatch on type. */ static message m; register int result; register struct proc *caller_ptr; unsigned int call_nr; int s; /* Initialize the system task. */ initialize(); while (TRUE) { /* Get work. Block and wait until a request message arrives. */ receive(ANY, &m); call_nr = (unsigned) m.m_type - KERNEL_CALL; who_e = m.m_source; okendpt(who_e, &who_p); caller_ptr = proc_addr(who_p); /* See if the caller made a valid request and try to handle it. */ if (! (priv(caller_ptr)->s_call_mask & (1<<call_nr))) { #if DEBUG_ENABLE_IPC_WARNINGS kprintf("SYSTEM: request %d from %d denied.\n", call_nr,m.m_source); #endif result = ECALLDENIED; /* illegal message type */ } else if (call_nr >= NR_SYS_CALLS) { /* check call number */ #if DEBUG_ENABLE_IPC_WARNINGS kprintf("SYSTEM: illegal request %d from %d.\n", call_nr,m.m_source); #endif result = EBADREQUEST; /* illegal message type */ } else { result = (*call_vec[call_nr])(&m); /* handle the system call */ } /* Send a reply, unless inhibited by a handler function. Use the kernel * function lock_send() to prevent a system call trap. The destination * is known to be blocked waiting for a message. */ if (result != EDONTREPLY) { m.m_type = result; /* report status of call */ if (OK != (s=lock_send(m.m_source, &m))) { kprintf("SYSTEM, reply to %d failed: %d\n", m.m_source, s); } } } }
/*===========================================================================* * do_vmctl * *===========================================================================*/ int do_vmctl(struct proc * caller, message * m_ptr) { int proc_nr; endpoint_t ep = m_ptr->SVMCTL_WHO; struct proc *p, *rp, **rpp, *target; if(ep == SELF) { ep = caller->p_endpoint; } if(!isokendpt(ep, &proc_nr)) { printf("do_vmctl: unexpected endpoint %d from VM\n", ep); return EINVAL; } p = proc_addr(proc_nr); switch(m_ptr->SVMCTL_PARAM) { case VMCTL_CLEAR_PAGEFAULT: assert(RTS_ISSET(p,RTS_PAGEFAULT)); RTS_UNSET(p, RTS_PAGEFAULT); return OK; case VMCTL_MEMREQ_GET: /* Send VM the information about the memory request. We can * not simply send the first request on the list, because IPC * filters may forbid VM from getting requests for particular * sources. However, IPC filters are used only in rare cases. */ for (rpp = &vmrequest; *rpp != NULL; rpp = &(*rpp)->p_vmrequest.nextrequestor) { rp = *rpp; assert(RTS_ISSET(rp, RTS_VMREQUEST)); okendpt(rp->p_vmrequest.target, &proc_nr); target = proc_addr(proc_nr); /* Check against IPC filters. */ if (!allow_ipc_filtered_memreq(rp, target)) continue; /* Reply with request fields. */ if (rp->p_vmrequest.req_type != VMPTYPE_CHECK) panic("VMREQUEST wrong type"); m_ptr->SVMCTL_MRG_TARGET = rp->p_vmrequest.target; m_ptr->SVMCTL_MRG_ADDR = rp->p_vmrequest.params.check.start; m_ptr->SVMCTL_MRG_LENGTH = rp->p_vmrequest.params.check.length; m_ptr->SVMCTL_MRG_FLAG = rp->p_vmrequest.params.check.writeflag; m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint; rp->p_vmrequest.vmresult = VMSUSPEND; /* Remove from request chain. */ *rpp = rp->p_vmrequest.nextrequestor; return rp->p_vmrequest.req_type; } return ENOENT; case VMCTL_MEMREQ_REPLY: assert(RTS_ISSET(p, RTS_VMREQUEST)); assert(p->p_vmrequest.vmresult == VMSUSPEND); okendpt(p->p_vmrequest.target, &proc_nr); target = proc_addr(proc_nr); p->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE; assert(p->p_vmrequest.vmresult != VMSUSPEND); switch(p->p_vmrequest.type) { case VMSTYPE_KERNELCALL: /* * we will have to resume execution of the kernel call * as soon the scheduler picks up this process again */ p->p_misc_flags |= MF_KCALL_RESUME; break; case VMSTYPE_DELIVERMSG: assert(p->p_misc_flags & MF_DELIVERMSG); assert(p == target); assert(RTS_ISSET(p, RTS_VMREQUEST)); break; case VMSTYPE_MAP: assert(RTS_ISSET(p, RTS_VMREQUEST)); break; default: panic("strange request type: %d",p->p_vmrequest.type); } RTS_UNSET(p, RTS_VMREQUEST); return OK; case VMCTL_KERN_PHYSMAP: { int i = m_ptr->SVMCTL_VALUE; return arch_phys_map(i, (phys_bytes *) &m_ptr->SVMCTL_MAP_PHYS_ADDR, (phys_bytes *) &m_ptr->SVMCTL_MAP_PHYS_LEN, &m_ptr->SVMCTL_MAP_FLAGS); } case VMCTL_KERN_MAP_REPLY: { return arch_phys_map_reply(m_ptr->SVMCTL_VALUE, (vir_bytes) m_ptr->SVMCTL_MAP_VIR_ADDR); } case VMCTL_VMINHIBIT_SET: /* check if we must stop a process on a different CPU */ #if CONFIG_SMP if (p->p_cpu != cpuid) { smp_schedule_vminhibit(p); } else #endif RTS_SET(p, RTS_VMINHIBIT); #if CONFIG_SMP p->p_misc_flags |= MF_FLUSH_TLB; #endif return OK; case VMCTL_VMINHIBIT_CLEAR: assert(RTS_ISSET(p, RTS_VMINHIBIT)); /* * the processes is certainly not runnable, no need to tell its * cpu */ RTS_UNSET(p, RTS_VMINHIBIT); #ifdef CONFIG_SMP if (p->p_misc_flags & MF_SENDA_VM_MISS) { struct priv *privp; p->p_misc_flags &= ~MF_SENDA_VM_MISS; privp = priv(p); try_deliver_senda(p, (asynmsg_t *) privp->s_asyntab, privp->s_asynsize); } /* * We don't know whether kernel has the changed mapping * installed to access userspace memory. And if so, on what CPU. * More over we don't know what mapping has changed and how and * therefore we must invalidate all mappings we have anywhere. * Next time we map memory, we map it fresh. */ bits_fill(p->p_stale_tlb, CONFIG_MAX_CPUS); #endif return OK; case VMCTL_CLEARMAPCACHE: /* VM says: forget about old mappings we have cached. */ mem_clear_mapcache(); return OK; case VMCTL_BOOTINHIBIT_CLEAR: RTS_UNSET(p, RTS_BOOTINHIBIT); return OK; } /* Try architecture-specific vmctls. */ return arch_do_vmctl(m_ptr, p); }
/*===========================================================================* * service_pm * *===========================================================================*/ static void service_pm() { int r, slot; switch (job_call_nr) { case PM_SETUID: { endpoint_t proc_e; uid_t euid, ruid; proc_e = job_m_in.PM_PROC; euid = job_m_in.PM_EID; ruid = job_m_in.PM_RID; pm_setuid(proc_e, euid, ruid); m_out.m_type = PM_SETUID_REPLY; m_out.PM_PROC = proc_e; } break; case PM_SETGID: { endpoint_t proc_e; gid_t egid, rgid; proc_e = job_m_in.PM_PROC; egid = job_m_in.PM_EID; rgid = job_m_in.PM_RID; pm_setgid(proc_e, egid, rgid); m_out.m_type = PM_SETGID_REPLY; m_out.PM_PROC = proc_e; } break; case PM_SETSID: { endpoint_t proc_e; proc_e = job_m_in.PM_PROC; pm_setsid(proc_e); m_out.m_type = PM_SETSID_REPLY; m_out.PM_PROC = proc_e; } break; case PM_EXEC: case PM_EXIT: case PM_DUMPCORE: { endpoint_t proc_e = job_m_in.PM_PROC; okendpt(proc_e, &slot); fp = &fproc[slot]; if (fp->fp_flags & FP_PENDING) { /* This process has a request pending, but PM wants it * gone. Forget about the pending request and satisfy * PM's request instead. Note that a pending request * AND an EXEC request are mutually exclusive. Also, PM * should send only one request/process at a time. */ assert(fp->fp_job.j_m_in.m_source != PM_PROC_NR); } /* PM requests on behalf of a proc are handled after the * system call that might be in progress for that proc has * finished. If the proc is not busy, we start a dummy call. */ if (!(fp->fp_flags & FP_PENDING) && mutex_trylock(&fp->fp_lock) == 0) { mutex_unlock(&fp->fp_lock); worker_start(do_dummy); fp->fp_flags |= FP_DROP_WORK; } fp->fp_job.j_m_in = job_m_in; fp->fp_flags |= FP_PM_PENDING; return; } case PM_FORK: case PM_SRV_FORK: { endpoint_t pproc_e, proc_e; pid_t child_pid; uid_t reuid; gid_t regid; pproc_e = job_m_in.PM_PPROC; proc_e = job_m_in.PM_PROC; child_pid = job_m_in.PM_CPID; reuid = job_m_in.PM_REUID; regid = job_m_in.PM_REGID; pm_fork(pproc_e, proc_e, child_pid); m_out.m_type = PM_FORK_REPLY; if (job_call_nr == PM_SRV_FORK) { m_out.m_type = PM_SRV_FORK_REPLY; pm_setuid(proc_e, reuid, reuid); pm_setgid(proc_e, regid, regid); } m_out.PM_PROC = proc_e; } break; case PM_SETGROUPS: { endpoint_t proc_e; int group_no; gid_t *group_addr; proc_e = job_m_in.PM_PROC; group_no = job_m_in.PM_GROUP_NO; group_addr = (gid_t *) job_m_in.PM_GROUP_ADDR; pm_setgroups(proc_e, group_no, group_addr); m_out.m_type = PM_SETGROUPS_REPLY; m_out.PM_PROC = proc_e; } break; case PM_UNPAUSE: { endpoint_t proc_e; proc_e = job_m_in.PM_PROC; unpause(proc_e); m_out.m_type = PM_UNPAUSE_REPLY; m_out.PM_PROC = proc_e; } break; case PM_REBOOT: pm_reboot(); /* Reply dummy status to PM for synchronization */ m_out.m_type = PM_REBOOT_REPLY; break; default: printf("VFS: don't know how to handle PM request %d\n", job_call_nr); return; } r = send(PM_PROC_NR, &m_out); if (r != OK) panic("service_pm: send failed: %d", r); }
/*===========================================================================* * do_vmctl * *===========================================================================*/ PUBLIC int do_vmctl(struct proc * caller, message * m_ptr) { int proc_nr; endpoint_t ep = m_ptr->SVMCTL_WHO; struct proc *p, *rp, *target; int err; if(ep == SELF) { ep = caller->p_endpoint; } if(!isokendpt(ep, &proc_nr)) { printf("do_vmctl: unexpected endpoint %d from VM\n", ep); return EINVAL; } p = proc_addr(proc_nr); switch(m_ptr->SVMCTL_PARAM) { case VMCTL_CLEAR_PAGEFAULT: assert(RTS_ISSET(p,RTS_PAGEFAULT)); RTS_UNSET(p, RTS_PAGEFAULT); return OK; case VMCTL_MEMREQ_GET: /* Send VM the information about the memory request. */ if(!(rp = vmrequest)) return ESRCH; assert(RTS_ISSET(rp, RTS_VMREQUEST)); okendpt(rp->p_vmrequest.target, &proc_nr); target = proc_addr(proc_nr); /* Reply with request fields. */ switch(rp->p_vmrequest.req_type) { case VMPTYPE_CHECK: m_ptr->SVMCTL_MRG_TARGET = rp->p_vmrequest.target; m_ptr->SVMCTL_MRG_ADDR = rp->p_vmrequest.params.check.start; m_ptr->SVMCTL_MRG_LENGTH = rp->p_vmrequest.params.check.length; m_ptr->SVMCTL_MRG_FLAG = rp->p_vmrequest.params.check.writeflag; m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint; break; case VMPTYPE_SMAP: case VMPTYPE_SUNMAP: case VMPTYPE_COWMAP: assert(RTS_ISSET(target,RTS_VMREQTARGET)); RTS_UNSET(target, RTS_VMREQTARGET); m_ptr->SVMCTL_MRG_TARGET = rp->p_vmrequest.target; m_ptr->SVMCTL_MRG_ADDR = rp->p_vmrequest.params.map.vir_d; m_ptr->SVMCTL_MRG_EP2 = rp->p_vmrequest.params.map.ep_s; m_ptr->SVMCTL_MRG_ADDR2 = rp->p_vmrequest.params.map.vir_s; m_ptr->SVMCTL_MRG_LENGTH = rp->p_vmrequest.params.map.length; m_ptr->SVMCTL_MRG_FLAG = rp->p_vmrequest.params.map.writeflag; m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint; break; default: panic("VMREQUEST wrong type"); } rp->p_vmrequest.vmresult = VMSUSPEND; /* Remove from request chain. */ vmrequest = vmrequest->p_vmrequest.nextrequestor; return rp->p_vmrequest.req_type; case VMCTL_MEMREQ_REPLY: assert(RTS_ISSET(p, RTS_VMREQUEST)); assert(p->p_vmrequest.vmresult == VMSUSPEND); okendpt(p->p_vmrequest.target, &proc_nr); target = proc_addr(proc_nr); p->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE; assert(p->p_vmrequest.vmresult != VMSUSPEND); switch(p->p_vmrequest.type) { case VMSTYPE_KERNELCALL: /* * we will have to resume execution of the kernel call * as soon the scheduler picks up this process again */ p->p_misc_flags |= MF_KCALL_RESUME; break; case VMSTYPE_DELIVERMSG: assert(p->p_misc_flags & MF_DELIVERMSG); assert(p == target); assert(RTS_ISSET(p, RTS_VMREQUEST)); break; case VMSTYPE_MAP: assert(RTS_ISSET(p, RTS_VMREQUEST)); break; default: panic("strange request type: %d",p->p_vmrequest.type); } RTS_UNSET(p, RTS_VMREQUEST); return OK; case VMCTL_ENABLE_PAGING: if(vm_running) panic("do_vmctl: paging already enabled"); if (arch_enable_paging(caller, m_ptr) != OK) panic("do_vmctl: paging enabling failed"); return OK; case VMCTL_KERN_PHYSMAP: { int i = m_ptr->SVMCTL_VALUE; return arch_phys_map(i, (phys_bytes *) &m_ptr->SVMCTL_MAP_PHYS_ADDR, (phys_bytes *) &m_ptr->SVMCTL_MAP_PHYS_LEN, &m_ptr->SVMCTL_MAP_FLAGS); } case VMCTL_KERN_MAP_REPLY: { return arch_phys_map_reply(m_ptr->SVMCTL_VALUE, (vir_bytes) m_ptr->SVMCTL_MAP_VIR_ADDR); } case VMCTL_VMINHIBIT_SET: /* check if we must stop a process on a different CPU */ #if CONFIG_SMP if (p->p_cpu != cpuid) { smp_schedule_vminhibit(p); } else #endif RTS_SET(p, RTS_VMINHIBIT); #if CONFIG_SMP p->p_misc_flags |= MF_FLUSH_TLB; #endif return OK; case VMCTL_VMINHIBIT_CLEAR: assert(RTS_ISSET(p, RTS_VMINHIBIT)); /* * the processes is certainly not runnable, no need to tell its * cpu */ RTS_UNSET(p, RTS_VMINHIBIT); return OK; } /* Try architecture-specific vmctls. */ return arch_do_vmctl(m_ptr, p); }
/*===========================================================================* * sys_task * *===========================================================================*/ PUBLIC void sys_task() { /* Main entry point of sys_task. Get the message and dispatch on type. */ static message m; register int result; register struct proc *caller_ptr; int s; int call_nr; int n = 0; /* Initialize the system task. */ initialize(); while (TRUE) { struct proc *restarting; restarting = vmrestart_check(&m); if(!restarting) { int r; /* Get work. Block and wait until a request message arrives. */ if((r=receive(ANY, &m)) != OK) minix_panic("receive() failed", r); } sys_call_code = (unsigned) m.m_type; call_nr = sys_call_code - KERNEL_CALL; who_e = m.m_source; okendpt(who_e, &who_p); caller_ptr = proc_addr(who_p); /* See if the caller made a valid request and try to handle it. */ if (call_nr < 0 || call_nr >= NR_SYS_CALLS) { /* check call number */ kprintf("SYSTEM: illegal request %d from %d.\n", call_nr,m.m_source); result = EBADREQUEST; /* illegal message type */ } else if (!GET_BIT(priv(caller_ptr)->s_k_call_mask, call_nr)) { result = ECALLDENIED; /* illegal message type */ } else { result = (*call_vec[call_nr])(&m); /* handle the system call */ } if(result == VMSUSPEND) { /* Special case: message has to be saved for handling * until VM tells us it's allowed. VM has been notified * and we must wait for its reply to restart the call. */ vmassert(RTS_ISSET(caller_ptr, VMREQUEST)); vmassert(caller_ptr->p_vmrequest.type == VMSTYPE_KERNELCALL); memcpy(&caller_ptr->p_vmrequest.saved.reqmsg, &m, sizeof(m)); } else if (result != EDONTREPLY) { /* Send a reply, unless inhibited by a handler function. * Use the kernel function lock_send() to prevent a system * call trap. */ if(restarting) { vmassert(!RTS_ISSET(restarting, VMREQUEST)); #if 0 vmassert(!RTS_ISSET(restarting, VMREQTARGET)); #endif } m.m_type = result; /* report status of call */ if(WILLRECEIVE(caller_ptr, SYSTEM)) { if (OK != (s=lock_send(m.m_source, &m))) { kprintf("SYSTEM, reply to %d failed: %d\n", m.m_source, s); } } else { kprintf("SYSTEM: not replying to %d; not ready\n", caller_ptr->p_endpoint); } } } }
/*===========================================================================* * pm_exec * *===========================================================================*/ int pm_exec(endpoint_t proc_e, vir_bytes path, size_t path_len, vir_bytes frame, size_t frame_len, vir_bytes *pc, vir_bytes *newsp, int user_exec_flags) { /* Perform the execve(name, argv, envp) call. The user library builds a * complete stack image, including pointers, args, environ, etc. The stack * is copied to a buffer inside VFS, and then to the new core image. */ int r, slot; vir_bytes vsp; struct fproc *rfp; int extrabase = 0; static char mbuf[ARG_MAX]; /* buffer for stack and zeroes */ struct vfs_exec_info execi; int i; static char fullpath[PATH_MAX], elf_interpreter[PATH_MAX], firstexec[PATH_MAX], finalexec[PATH_MAX]; struct lookup resolve; struct fproc *vmfp = &fproc[VM_PROC_NR]; stackhook_t makestack = NULL; static int n; n++; struct filp *newfilp = NULL; lock_exec(); lock_proc(vmfp, 0); /* unset execi values are 0. */ memset(&execi, 0, sizeof(execi)); execi.vmfd = -1; /* passed from exec() libc code */ execi.userflags = user_exec_flags; execi.args.stack_high = kinfo.user_sp; execi.args.stack_size = DEFAULT_STACK_LIMIT; okendpt(proc_e, &slot); rfp = fp = &fproc[slot]; lookup_init(&resolve, fullpath, PATH_NOFLAGS, &execi.vmp, &execi.vp); resolve.l_vmnt_lock = VMNT_READ; resolve.l_vnode_lock = VNODE_READ; /* Fetch the stack from the user before destroying the old core image. */ if (frame_len > ARG_MAX) FAILCHECK(ENOMEM); /* stack too big */ r = sys_datacopy(proc_e, (vir_bytes) frame, SELF, (vir_bytes) mbuf, (size_t) frame_len); if (r != OK) { /* can't fetch stack (e.g. bad virtual addr) */ printf("VFS: pm_exec: sys_datacopy failed\n"); FAILCHECK(r); } /* The default is to keep the original user and group IDs */ execi.args.new_uid = rfp->fp_effuid; execi.args.new_gid = rfp->fp_effgid; /* Get the exec file name. */ FAILCHECK(fetch_name(path, path_len, fullpath)); strlcpy(finalexec, fullpath, PATH_MAX); strlcpy(firstexec, fullpath, PATH_MAX); /* Get_read_vp will return an opened vn in execi. * if necessary it releases the existing vp so we can * switch after we find out what's inside the file. * It reads the start of the file. */ Get_read_vp(execi, fullpath, 1, 1, &resolve, fp); /* If this is a script (i.e. has a #!/interpreter line), * retrieve the name of the interpreter and open that * executable instead. */ if(is_script(&execi)) { /* patch_stack will add interpreter name and * args to stack and retrieve the new binary * name into fullpath. */ FAILCHECK(fetch_name(path, path_len, fullpath)); FAILCHECK(patch_stack(execi.vp, mbuf, &frame_len, fullpath)); strlcpy(finalexec, fullpath, PATH_MAX); strlcpy(firstexec, fullpath, PATH_MAX); Get_read_vp(execi, fullpath, 1, 0, &resolve, fp); } /* If this is a dynamically linked executable, retrieve * the name of that interpreter in elf_interpreter and open that * executable instead. But open the current executable in an * fd for the current process. */ if(elf_has_interpreter(execi.args.hdr, execi.args.hdr_len, elf_interpreter, sizeof(elf_interpreter))) { /* Switch the executable vnode to the interpreter */ execi.is_dyn = 1; /* The interpreter (loader) needs an fd to the main program, * which is currently in finalexec */ if((r = execi.elf_main_fd = common_open(finalexec, O_RDONLY, 0)) < 0) { printf("VFS: exec: dynamic: open main exec failed %s (%d)\n", fullpath, r); FAILCHECK(r); } /* ld.so is linked at 0, but it can relocate itself; we * want it higher to trap NULL pointer dereferences. */ execi.args.load_offset = 0x10000; /* Remember it */ strlcpy(execi.execname, finalexec, PATH_MAX); /* The executable we need to execute first (loader) * is in elf_interpreter, and has to be in fullpath to * be looked up */ strlcpy(fullpath, elf_interpreter, PATH_MAX); strlcpy(firstexec, elf_interpreter, PATH_MAX); Get_read_vp(execi, fullpath, 0, 0, &resolve, fp); } /* We also want an FD for VM to mmap() the process in if possible. */ { struct vnode *vp = execi.vp; assert(vp); if(vp->v_vmnt->m_haspeek && major(vp->v_dev) != MEMORY_MAJOR) { int newfd = -1; if(get_fd(vmfp, 0, R_BIT, &newfd, &newfilp) == OK) { assert(newfd >= 0 && newfd < OPEN_MAX); assert(!vmfp->fp_filp[newfd]); newfilp->filp_count = 1; newfilp->filp_vno = vp; newfilp->filp_flags = O_RDONLY; FD_SET(newfd, &vmfp->fp_filp_inuse); vmfp->fp_filp[newfd] = newfilp; /* dup_vnode(vp); */ execi.vmfd = newfd; execi.args.memmap = vfs_memmap; } } } /* callback functions and data */ execi.args.copymem = read_seg; execi.args.clearproc = libexec_clearproc_vm_procctl; execi.args.clearmem = libexec_clear_sys_memset; execi.args.allocmem_prealloc_cleared = libexec_alloc_mmap_prealloc_cleared; execi.args.allocmem_prealloc_junk = libexec_alloc_mmap_prealloc_junk; execi.args.allocmem_ondemand = libexec_alloc_mmap_ondemand; execi.args.opaque = &execi; execi.args.proc_e = proc_e; execi.args.frame_len = frame_len; execi.args.filesize = execi.vp->v_size; for (i = 0; exec_loaders[i].load_object != NULL; i++) { r = (*exec_loaders[i].load_object)(&execi.args); /* Loaded successfully, so no need to try other loaders */ if (r == OK) { makestack = exec_loaders[i].setup_stack; break; } } FAILCHECK(r); /* Inform PM */ FAILCHECK(libexec_pm_newexec(proc_e, &execi.args)); /* Save off PC */ *pc = execi.args.pc; /* call a stack-setup function if this executable type wants it */ vsp = execi.args.stack_high - frame_len; if(makestack) FAILCHECK(makestack(&execi, mbuf, &frame_len, &vsp, &extrabase)); /* Patch up stack and copy it from VFS to new core image. */ libexec_patch_ptr(mbuf, vsp + extrabase); FAILCHECK(sys_datacopy(SELF, (vir_bytes) mbuf, proc_e, (vir_bytes) vsp, (phys_bytes)frame_len)); /* Return new stack pointer to caller */ *newsp = vsp; clo_exec(rfp); if (execi.args.allow_setuid) { /* If after loading the image we're still allowed to run with * setuid or setgid, change credentials now */ rfp->fp_effuid = execi.args.new_uid; rfp->fp_effgid = execi.args.new_gid; } /* Remember the new name of the process */ strlcpy(rfp->fp_name, execi.args.progname, PROC_NAME_LEN); pm_execfinal: if(newfilp) unlock_filp(newfilp); else if (execi.vp != NULL) { unlock_vnode(execi.vp); put_vnode(execi.vp); } if(execi.vmfd >= 0 && !execi.vmfd_used) { if(OK != close_fd(vmfp, execi.vmfd)) { printf("VFS: unexpected close fail of vm fd\n"); } } unlock_proc(vmfp); unlock_exec(); return(r); }
/*===========================================================================* * do_vumap * *===========================================================================*/ int do_vumap(struct proc *caller, message *m_ptr) { /* Map a vector of grants or local virtual addresses to physical addresses. * Designed to be used by drivers to perform an efficient lookup of physical * addresses for the purpose of direct DMA from/to a remote process. */ endpoint_t endpt, source, granter; struct proc *procp; struct vumap_vir vvec[MAPVEC_NR]; struct vumap_phys pvec[MAPVEC_NR]; vir_bytes vaddr, paddr, vir_addr; phys_bytes phys_addr; int i, r, proc_nr, vcount, pcount, pmax, access; size_t size, chunk, offset; endpt = caller->p_endpoint; /* Retrieve and check input parameters. */ source = m_ptr->VUMAP_ENDPT; vaddr = (vir_bytes) m_ptr->VUMAP_VADDR; vcount = m_ptr->VUMAP_VCOUNT; offset = m_ptr->VUMAP_OFFSET; access = m_ptr->VUMAP_ACCESS; paddr = (vir_bytes) m_ptr->VUMAP_PADDR; pmax = m_ptr->VUMAP_PMAX; if (vcount <= 0 || pmax <= 0) return EINVAL; if (vcount > MAPVEC_NR) vcount = MAPVEC_NR; if (pmax > MAPVEC_NR) pmax = MAPVEC_NR; /* Convert access to safecopy access flags. */ switch (access) { case VUA_READ: access = CPF_READ; break; case VUA_WRITE: access = CPF_WRITE; break; case VUA_READ|VUA_WRITE: access = CPF_READ|CPF_WRITE; break; default: return EINVAL; } /* Copy in the vector of virtual addresses. */ size = vcount * sizeof(vvec[0]); if (data_copy(endpt, vaddr, KERNEL, (vir_bytes) vvec, size) != OK) return EFAULT; pcount = 0; /* Go through the input entries, one at a time. Stop early in case the output * vector has filled up. */ for (i = 0; i < vcount && pcount < pmax; i++) { size = vvec[i].vv_size; if (size <= offset) return EINVAL; size -= offset; if (source != SELF) { r = verify_grant(source, endpt, vvec[i].vv_grant, size, access, offset, &vir_addr, &granter); if (r != OK) return r; } else { vir_addr = vvec[i].vv_addr + offset; granter = endpt; } okendpt(granter, &proc_nr); procp = proc_addr(proc_nr); /* Each virtual range is made up of one or more physical ranges. */ while (size > 0 && pcount < pmax) { chunk = vm_lookup_range(procp, vir_addr, &phys_addr, size); if (!chunk) { /* Try to get the memory allocated, unless the memory * is supposed to be there to be read from. */ if (access & CPF_READ) return EFAULT; /* This call may suspend the current call, or return an * error for a previous invocation. */ return vm_check_range(caller, procp, vir_addr, size); } pvec[pcount].vp_addr = phys_addr; pvec[pcount].vp_size = chunk; pcount++; vir_addr += chunk; size -= chunk; } offset = 0; } /* Copy out the resulting vector of physical addresses. */ assert(pcount > 0); size = pcount * sizeof(pvec[0]); r = data_copy_vmcheck(caller, KERNEL, (vir_bytes) pvec, endpt, paddr, size); if (r == OK) m_ptr->VUMAP_PCOUNT = pcount; return r; }
/*===========================================================================* * pm_exec * *===========================================================================*/ PUBLIC int pm_exec(int proc_e, char *path, vir_bytes path_len, char *frame, vir_bytes frame_len, vir_bytes *pc) { /* Perform the execve(name, argv, envp) call. The user library builds a * complete stack image, including pointers, args, environ, etc. The stack * is copied to a buffer inside VFS, and then to the new core image. */ int r, r1, round, proc_s; vir_bytes vsp; struct fproc *rfp; struct vnode *vp; char *cp; static char mbuf[ARG_MAX]; /* buffer for stack and zeroes */ struct exec_info execi; int i; okendpt(proc_e, &proc_s); rfp = fp = &fproc[proc_s]; who_e = proc_e; who_p = proc_s; super_user = (fp->fp_effuid == SU_UID ? TRUE : FALSE); /* su? */ /* Get the exec file name. */ if ((r = fetch_name(path, path_len, 0)) != OK) return(r); /* Fetch the stack from the user before destroying the old core image. */ if (frame_len > ARG_MAX) { printf("VFS: pm_exec: stack too big\n"); return(ENOMEM); /* stack too big */ } r = sys_datacopy(proc_e, (vir_bytes) frame, SELF, (vir_bytes) mbuf, (phys_bytes) frame_len); if (r != OK) { /* can't fetch stack (e.g. bad virtual addr) */ printf("pm_exec: sys_datacopy failed\n"); return(r); } /* The default is to keep the original user and group IDs */ execi.new_uid = rfp->fp_effuid; execi.new_gid = rfp->fp_effgid; for (round= 0; round < 2; round++) { /* round = 0 (first attempt), or 1 (interpreted script) */ /* Save the name of the program */ (cp= strrchr(user_fullpath, '/')) ? cp++ : (cp= user_fullpath); strncpy(execi.progname, cp, PROC_NAME_LEN-1); execi.progname[PROC_NAME_LEN-1] = '\0'; execi.setugid = 0; /* Open executable */ if ((vp = eat_path(PATH_NOFLAGS, fp)) == NULL) return(err_code); execi.vp = vp; if ((vp->v_mode & I_TYPE) != I_REGULAR) r = ENOEXEC; else if ((r1 = forbidden(vp, X_BIT)) != OK) r = r1; else r = req_stat(vp->v_fs_e, vp->v_inode_nr, VFS_PROC_NR, (char *) &(execi.sb), 0, 0); if (r != OK) { put_vnode(vp); return(r); } if (round == 0) { /* Deal with setuid/setgid executables */ if (vp->v_mode & I_SET_UID_BIT) { execi.new_uid = vp->v_uid; execi.setugid = 1; } if (vp->v_mode & I_SET_GID_BIT) { execi.new_gid = vp->v_gid; execi.setugid = 1; } } r = map_header(&execi.hdr, execi.vp); if (r != OK) { put_vnode(vp); return(r); } if (!is_script(execi.hdr, execi.vp->v_size) || round != 0) break; /* Get fresh copy of the file name. */ if ((r = fetch_name(path, path_len, 0)) != OK) printf("VFS pm_exec: 2nd fetch_name failed\n"); else if ((r = patch_stack(vp, mbuf, &frame_len)) != OK) printf("VFS pm_exec: patch_stack failed\n"); put_vnode(vp); if (r != OK) return(r); } execi.proc_e = proc_e; execi.frame_len = frame_len; for(i = 0; exec_loaders[i].load_object != NULL; i++) { r = (*exec_loaders[i].load_object)(&execi); /* Loaded successfully, so no need to try other loaders */ if (r == OK) break; } put_vnode(vp); /* No exec loader could load the object */ if (r != OK) { return(ENOEXEC); } /* Save off PC */ *pc = execi.pc; /* Patch up stack and copy it from VFS to new core image. */ vsp = execi.stack_top; vsp -= frame_len; patch_ptr(mbuf, vsp); if ((r = sys_datacopy(SELF, (vir_bytes) mbuf, proc_e, (vir_bytes) vsp, (phys_bytes)frame_len)) != OK) { printf("VFS: datacopy failed (%d) trying to copy to %lu\n", r, vsp); return(r); } if (r != OK) return(r); clo_exec(rfp); if (execi.setugid) { /* If after loading the image we're still allowed to run with * setuid or setgid, change the credentials now */ rfp->fp_effuid = execi.new_uid; rfp->fp_effgid = execi.new_gid; } /* This child has now exec()ced. */ rfp->fp_execced = 1; return(OK); }