void smp_sched_handler(void) { unsigned flgs; unsigned cpu = cpuid; flgs = sched_ipi_data[cpu].flags; if (flgs) { struct proc * p; p = (struct proc *)sched_ipi_data[cpu].data; if (flgs & SCHED_IPI_STOP_PROC) { RTS_SET(p, RTS_PROC_STOP); } if (flgs & SCHED_IPI_SAVE_CTX) { /* all context has been saved already, FPU remains */ if (proc_used_fpu(p) && get_cpulocal_var(fpu_owner) == p) { disable_fpu_exception(); save_local_fpu(p, FALSE /*retain*/); /* we're preparing to migrate somewhere else */ release_fpu(p); } } if (flgs & SCHED_IPI_VM_INHIBIT) { RTS_SET(p, RTS_VMINHIBIT); } } __insn_barrier(); sched_ipi_data[cpu].flags = 0; }
void smp_schedule_vminhibit(struct proc * p) { if (proc_is_runnable(p)) smp_schedule_sync(p, SCHED_IPI_VM_INHIBIT); else RTS_SET(p, RTS_VMINHIBIT); assert(RTS_ISSET(p, RTS_VMINHIBIT)); }
void smp_schedule_stop_proc(struct proc * p) { if (proc_is_runnable(p)) smp_schedule_sync(p, SCHED_IPI_STOP_PROC); else RTS_SET(p, RTS_PROC_STOP); assert(RTS_ISSET(p, RTS_PROC_STOP)); }
/*===========================================================================* * do_runctl * *===========================================================================*/ int do_runctl(struct proc * caller, message * m_ptr) { /* Control a process's RTS_PROC_STOP flag. Used for process management. * If the process is queued sending a message or stopped for system call * tracing, and the RC_DELAY request flag is given, set MF_SIG_DELAY instead * of RTS_PROC_STOP, and send a SIGSNDELAY signal later when the process is done * sending (ending the delay). Used by PM for safe signal delivery. */ int proc_nr, action, flags; register struct proc *rp; /* Extract the message parameters and do sanity checking. */ if (!isokendpt(m_ptr->RC_ENDPT, &proc_nr)) return(EINVAL); if (iskerneln(proc_nr)) return(EPERM); rp = proc_addr(proc_nr); action = m_ptr->RC_ACTION; flags = m_ptr->RC_FLAGS; /* Is the target sending or syscall-traced? Then set MF_SIG_DELAY instead. * Do this only when the RC_DELAY flag is set in the request flags field. * The process will not become runnable before PM has called SYS_ENDKSIG. * Note that asynchronous messages are not covered: a process using SENDA * should not also install signal handlers *and* expect POSIX compliance. */ if (action == RC_STOP && (flags & RC_DELAY)) { if (RTS_ISSET(rp, RTS_SENDING) || (rp->p_misc_flags & MF_SC_DEFER)) rp->p_misc_flags |= MF_SIG_DELAY; if (rp->p_misc_flags & MF_SIG_DELAY) return (EBUSY); } /* Either set or clear the stop flag. */ switch (action) { case RC_STOP: #if CONFIG_SMP /* check if we must stop a process on a different CPU */ if (rp->p_cpu != cpuid) { smp_schedule_stop_proc(rp); break; } #endif RTS_SET(rp, RTS_PROC_STOP); break; case RC_RESUME: assert(RTS_ISSET(rp, RTS_PROC_STOP)); RTS_UNSET(rp, RTS_PROC_STOP); break; default: return(EINVAL); } return(OK); }
/* * This function gets always called only after smp_sched_handler() has been * already called. It only serves the purpose of acknowledging the IPI and * preempting the current process if the CPU was not idle. */ void smp_ipi_sched_handler(void) { struct proc * curr; ipi_ack(); curr = get_cpulocal_var(proc_ptr); if (curr->p_endpoint != IDLE) { RTS_SET(curr, RTS_PREEMPTED); } }
/*===========================================================================* * do_vmctl * *===========================================================================*/ int do_vmctl(struct proc * caller, message * m_ptr) { int proc_nr; endpoint_t ep = m_ptr->SVMCTL_WHO; struct proc *p, *rp, **rpp, *target; if(ep == SELF) { ep = caller->p_endpoint; } if(!isokendpt(ep, &proc_nr)) { printf("do_vmctl: unexpected endpoint %d from VM\n", ep); return EINVAL; } p = proc_addr(proc_nr); switch(m_ptr->SVMCTL_PARAM) { case VMCTL_CLEAR_PAGEFAULT: assert(RTS_ISSET(p,RTS_PAGEFAULT)); RTS_UNSET(p, RTS_PAGEFAULT); return OK; case VMCTL_MEMREQ_GET: /* Send VM the information about the memory request. We can * not simply send the first request on the list, because IPC * filters may forbid VM from getting requests for particular * sources. However, IPC filters are used only in rare cases. */ for (rpp = &vmrequest; *rpp != NULL; rpp = &(*rpp)->p_vmrequest.nextrequestor) { rp = *rpp; assert(RTS_ISSET(rp, RTS_VMREQUEST)); okendpt(rp->p_vmrequest.target, &proc_nr); target = proc_addr(proc_nr); /* Check against IPC filters. */ if (!allow_ipc_filtered_memreq(rp, target)) continue; /* Reply with request fields. */ if (rp->p_vmrequest.req_type != VMPTYPE_CHECK) panic("VMREQUEST wrong type"); m_ptr->SVMCTL_MRG_TARGET = rp->p_vmrequest.target; m_ptr->SVMCTL_MRG_ADDR = rp->p_vmrequest.params.check.start; m_ptr->SVMCTL_MRG_LENGTH = rp->p_vmrequest.params.check.length; m_ptr->SVMCTL_MRG_FLAG = rp->p_vmrequest.params.check.writeflag; m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint; rp->p_vmrequest.vmresult = VMSUSPEND; /* Remove from request chain. */ *rpp = rp->p_vmrequest.nextrequestor; return rp->p_vmrequest.req_type; } return ENOENT; case VMCTL_MEMREQ_REPLY: assert(RTS_ISSET(p, RTS_VMREQUEST)); assert(p->p_vmrequest.vmresult == VMSUSPEND); okendpt(p->p_vmrequest.target, &proc_nr); target = proc_addr(proc_nr); p->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE; assert(p->p_vmrequest.vmresult != VMSUSPEND); switch(p->p_vmrequest.type) { case VMSTYPE_KERNELCALL: /* * we will have to resume execution of the kernel call * as soon the scheduler picks up this process again */ p->p_misc_flags |= MF_KCALL_RESUME; break; case VMSTYPE_DELIVERMSG: assert(p->p_misc_flags & MF_DELIVERMSG); assert(p == target); assert(RTS_ISSET(p, RTS_VMREQUEST)); break; case VMSTYPE_MAP: assert(RTS_ISSET(p, RTS_VMREQUEST)); break; default: panic("strange request type: %d",p->p_vmrequest.type); } RTS_UNSET(p, RTS_VMREQUEST); return OK; case VMCTL_KERN_PHYSMAP: { int i = m_ptr->SVMCTL_VALUE; return arch_phys_map(i, (phys_bytes *) &m_ptr->SVMCTL_MAP_PHYS_ADDR, (phys_bytes *) &m_ptr->SVMCTL_MAP_PHYS_LEN, &m_ptr->SVMCTL_MAP_FLAGS); } case VMCTL_KERN_MAP_REPLY: { return arch_phys_map_reply(m_ptr->SVMCTL_VALUE, (vir_bytes) m_ptr->SVMCTL_MAP_VIR_ADDR); } case VMCTL_VMINHIBIT_SET: /* check if we must stop a process on a different CPU */ #if CONFIG_SMP if (p->p_cpu != cpuid) { smp_schedule_vminhibit(p); } else #endif RTS_SET(p, RTS_VMINHIBIT); #if CONFIG_SMP p->p_misc_flags |= MF_FLUSH_TLB; #endif return OK; case VMCTL_VMINHIBIT_CLEAR: assert(RTS_ISSET(p, RTS_VMINHIBIT)); /* * the processes is certainly not runnable, no need to tell its * cpu */ RTS_UNSET(p, RTS_VMINHIBIT); #ifdef CONFIG_SMP if (p->p_misc_flags & MF_SENDA_VM_MISS) { struct priv *privp; p->p_misc_flags &= ~MF_SENDA_VM_MISS; privp = priv(p); try_deliver_senda(p, (asynmsg_t *) privp->s_asyntab, privp->s_asynsize); } /* * We don't know whether kernel has the changed mapping * installed to access userspace memory. And if so, on what CPU. * More over we don't know what mapping has changed and how and * therefore we must invalidate all mappings we have anywhere. * Next time we map memory, we map it fresh. */ bits_fill(p->p_stale_tlb, CONFIG_MAX_CPUS); #endif return OK; case VMCTL_CLEARMAPCACHE: /* VM says: forget about old mappings we have cached. */ mem_clear_mapcache(); return OK; case VMCTL_BOOTINHIBIT_CLEAR: RTS_UNSET(p, RTS_BOOTINHIBIT); return OK; } /* Try architecture-specific vmctls. */ return arch_do_vmctl(m_ptr, p); }
/*===========================================================================* * do_fork * *===========================================================================*/ int do_fork(struct proc * caller, message * m_ptr) { /* Handle sys_fork(). * m_lsys_krn_sys_fork.endpt has forked. * The child is m_lsys_krn_sys_fork.slot. */ #if defined(__i386__) char *old_fpu_save_area_p; #endif register struct proc *rpc; /* child process pointer */ struct proc *rpp; /* parent process pointer */ int gen; int p_proc; int namelen; if(!isokendpt(m_ptr->m_lsys_krn_sys_fork.endpt, &p_proc)) return EINVAL; rpp = proc_addr(p_proc); rpc = proc_addr(m_ptr->m_lsys_krn_sys_fork.slot); if (isemptyp(rpp) || ! isemptyp(rpc)) return(EINVAL); assert(!(rpp->p_misc_flags & MF_DELIVERMSG)); /* needs to be receiving so we know where the message buffer is */ if(!RTS_ISSET(rpp, RTS_RECEIVING)) { printf("kernel: fork not done synchronously?\n"); return EINVAL; } /* make sure that the FPU context is saved in parent before copy */ save_fpu(rpp); /* Copy parent 'proc' struct to child. And reinitialize some fields. */ gen = _ENDPOINT_G(rpc->p_endpoint); #if defined(__i386__) old_fpu_save_area_p = rpc->p_seg.fpu_state; #endif *rpc = *rpp; /* copy 'proc' struct */ #if defined(__i386__) rpc->p_seg.fpu_state = old_fpu_save_area_p; if(proc_used_fpu(rpp)) memcpy(rpc->p_seg.fpu_state, rpp->p_seg.fpu_state, FPU_XFP_SIZE); #endif if(++gen >= _ENDPOINT_MAX_GENERATION) /* increase generation */ gen = 1; /* generation number wraparound */ rpc->p_nr = m_ptr->m_lsys_krn_sys_fork.slot; /* this was obliterated by copy */ rpc->p_endpoint = _ENDPOINT(gen, rpc->p_nr); /* new endpoint of slot */ rpc->p_reg.retreg = 0; /* child sees pid = 0 to know it is child */ rpc->p_user_time = 0; /* set all the accounting times to 0 */ rpc->p_sys_time = 0; rpc->p_misc_flags &= ~(MF_VIRT_TIMER | MF_PROF_TIMER | MF_SC_TRACE | MF_SPROF_SEEN | MF_STEP); rpc->p_virt_left = 0; /* disable, clear the process-virtual timers */ rpc->p_prof_left = 0; /* Mark process name as being a forked copy */ namelen = strlen(rpc->p_name); #define FORKSTR "*F" if(namelen+strlen(FORKSTR) < sizeof(rpc->p_name)) strcat(rpc->p_name, FORKSTR); /* the child process is not runnable until it's scheduled. */ RTS_SET(rpc, RTS_NO_QUANTUM); reset_proc_accounting(rpc); rpc->p_cpu_time_left = 0; rpc->p_cycles = 0; rpc->p_kcall_cycles = 0; rpc->p_kipc_cycles = 0; rpc->p_signal_received = 0; /* If the parent is a privileged process, take away the privileges from the * child process and inhibit it from running by setting the NO_PRIV flag. * The caller should explicitly set the new privileges before executing. */ if (priv(rpp)->s_flags & SYS_PROC) { rpc->p_priv = priv_addr(USER_PRIV_ID); rpc->p_rts_flags |= RTS_NO_PRIV; } /* Calculate endpoint identifier, so caller knows what it is. */ m_ptr->m_krn_lsys_sys_fork.endpt = rpc->p_endpoint; m_ptr->m_krn_lsys_sys_fork.msgaddr = rpp->p_delivermsg_vir; /* Don't schedule process in VM mode until it has a new pagetable. */ if(m_ptr->m_lsys_krn_sys_fork.flags & PFF_VMINHIBIT) { RTS_SET(rpc, RTS_VMINHIBIT); } /* * Only one in group should have RTS_SIGNALED, child doesn't inherit tracing. */ RTS_UNSET(rpc, (RTS_SIGNALED | RTS_SIG_PENDING | RTS_P_STOP)); (void) sigemptyset(&rpc->p_pending); #if defined(__i386__) rpc->p_seg.p_cr3 = 0; rpc->p_seg.p_cr3_v = NULL; #elif defined(__arm__) rpc->p_seg.p_ttbr = 0; rpc->p_seg.p_ttbr_v = NULL; #endif return OK; }
/*===========================================================================* * do_privctl * *===========================================================================*/ PUBLIC int do_privctl(struct proc * caller, message * m_ptr) { /* Handle sys_privctl(). Update a process' privileges. If the process is not * yet a system process, make sure it gets its own privilege structure. */ struct proc *rp; proc_nr_t proc_nr; sys_id_t priv_id; int ipc_to_m, kcalls; int i, r; struct io_range io_range; struct mem_range mem_range; struct priv priv; int irq; /* Check whether caller is allowed to make this call. Privileged proceses * can only update the privileges of processes that are inhibited from * running by the RTS_NO_PRIV flag. This flag is set when a privileged process * forks. */ if (! (priv(caller)->s_flags & SYS_PROC)) return(EPERM); if(m_ptr->CTL_ENDPT == SELF) proc_nr = _ENDPOINT_P(caller->p_endpoint); else if(!isokendpt(m_ptr->CTL_ENDPT, &proc_nr)) return(EINVAL); rp = proc_addr(proc_nr); switch(m_ptr->CTL_REQUEST) { case SYS_PRIV_ALLOW: /* Allow process to run. Make sure its privilege structure has already * been set. */ if (!RTS_ISSET(rp, RTS_NO_PRIV) || priv(rp)->s_proc_nr == NONE) { return(EPERM); } RTS_UNSET(rp, RTS_NO_PRIV); return(OK); case SYS_PRIV_YIELD: /* Allow process to run and suspend the caller. */ if (!RTS_ISSET(rp, RTS_NO_PRIV) || priv(rp)->s_proc_nr == NONE) { return(EPERM); } RTS_SET(caller, RTS_NO_PRIV); RTS_UNSET(rp, RTS_NO_PRIV); return(OK); case SYS_PRIV_DISALLOW: /* Disallow process from running. */ if (RTS_ISSET(rp, RTS_NO_PRIV)) return(EPERM); RTS_SET(rp, RTS_NO_PRIV); return(OK); case SYS_PRIV_SET_SYS: /* Set a privilege structure of a blocked system process. */ if (! RTS_ISSET(rp, RTS_NO_PRIV)) return(EPERM); /* Check whether a static or dynamic privilege id must be allocated. */ priv_id = NULL_PRIV_ID; if (m_ptr->CTL_ARG_PTR) { /* Copy privilege structure from caller */ if((r=data_copy(caller->p_endpoint, (vir_bytes) m_ptr->CTL_ARG_PTR, KERNEL, (vir_bytes) &priv, sizeof(priv))) != OK) return r; /* See if the caller wants to assign a static privilege id. */ if(!(priv.s_flags & DYN_PRIV_ID)) { priv_id = priv.s_id; } } /* Make sure this process has its own privileges structure. This may * fail, since there are only a limited number of system processes. * Then copy privileges from the caller and restore some defaults. */ if ((i=get_priv(rp, priv_id)) != OK) { printf("do_privctl: unable to allocate priv_id %d: %d\n", priv_id, i); return(i); } priv_id = priv(rp)->s_id; /* backup privilege id */ *priv(rp) = *priv(caller); /* copy from caller */ priv(rp)->s_id = priv_id; /* restore privilege id */ priv(rp)->s_proc_nr = proc_nr; /* reassociate process nr */ for (i=0; i< NR_SYS_CHUNKS; i++) /* remove pending: */ priv(rp)->s_notify_pending.chunk[i] = 0; /* - notifications */ priv(rp)->s_int_pending = 0; /* - interrupts */ (void) sigemptyset(&priv(rp)->s_sig_pending); /* - signals */ reset_timer(&priv(rp)->s_alarm_timer); /* - alarm */ priv(rp)->s_asyntab= -1; /* - asynsends */ priv(rp)->s_asynsize= 0; /* Set defaults for privilege bitmaps. */ priv(rp)->s_flags= DSRV_F; /* privilege flags */ priv(rp)->s_trap_mask= DSRV_T; /* allowed traps */ ipc_to_m = DSRV_M; /* allowed targets */ fill_sendto_mask(rp, ipc_to_m); kcalls = DSRV_KC; /* allowed kernel calls */ for(i = 0; i < SYS_CALL_MASK_SIZE; i++) { priv(rp)->s_k_call_mask[i] = (kcalls == NO_C ? 0 : (~0)); } /* Set the default signal managers. */ priv(rp)->s_sig_mgr = DSRV_SM; priv(rp)->s_bak_sig_mgr = NONE; /* Set defaults for resources: no I/O resources, no memory resources, * no IRQs, no grant table */ priv(rp)->s_nr_io_range= 0; priv(rp)->s_nr_mem_range= 0; priv(rp)->s_nr_irq= 0; priv(rp)->s_grant_table= 0; priv(rp)->s_grant_entries= 0; /* Override defaults if the caller has supplied a privilege structure. */ if (m_ptr->CTL_ARG_PTR) { if((r = update_priv(rp, &priv)) != OK) { return r; } } return(OK); case SYS_PRIV_SET_USER: /* Set a privilege structure of a blocked user process. */ if (!RTS_ISSET(rp, RTS_NO_PRIV)) return(EPERM); /* Link the process to the privilege structure of the root user * process all the user processes share. */ priv(rp) = priv_addr(USER_PRIV_ID); return(OK); case SYS_PRIV_ADD_IO: if (RTS_ISSET(rp, RTS_NO_PRIV)) return(EPERM); /* Only system processes get I/O resources? */ if (!(priv(rp)->s_flags & SYS_PROC)) return EPERM; #if 0 /* XXX -- do we need a call for this? */ if (strcmp(rp->p_name, "fxp") == 0 || strcmp(rp->p_name, "rtl8139") == 0) { printf("setting ipc_stats_target to %d\n", rp->p_endpoint); ipc_stats_target= rp->p_endpoint; } #endif /* Get the I/O range */ data_copy(caller->p_endpoint, (vir_bytes) m_ptr->CTL_ARG_PTR, KERNEL, (vir_bytes) &io_range, sizeof(io_range)); priv(rp)->s_flags |= CHECK_IO_PORT; /* Check I/O accesses */ i= priv(rp)->s_nr_io_range; if (i >= NR_IO_RANGE) { printf("do_privctl: %d already has %d i/o ranges.\n", rp->p_endpoint, i); return ENOMEM; } priv(rp)->s_io_tab[i].ior_base= io_range.ior_base; priv(rp)->s_io_tab[i].ior_limit= io_range.ior_limit; priv(rp)->s_nr_io_range++; return OK; case SYS_PRIV_ADD_MEM: if (RTS_ISSET(rp, RTS_NO_PRIV)) return(EPERM); /* Only system processes get memory resources? */ if (!(priv(rp)->s_flags & SYS_PROC)) return EPERM; /* Get the memory range */ if((r=data_copy(caller->p_endpoint, (vir_bytes) m_ptr->CTL_ARG_PTR, KERNEL, (vir_bytes) &mem_range, sizeof(mem_range))) != OK) return r; priv(rp)->s_flags |= CHECK_MEM; /* Check memory mappings */ i= priv(rp)->s_nr_mem_range; if (i >= NR_MEM_RANGE) { printf("do_privctl: %d already has %d mem ranges.\n", rp->p_endpoint, i); return ENOMEM; } priv(rp)->s_mem_tab[i].mr_base= mem_range.mr_base; priv(rp)->s_mem_tab[i].mr_limit= mem_range.mr_limit; priv(rp)->s_nr_mem_range++; return OK; case SYS_PRIV_ADD_IRQ: if (RTS_ISSET(rp, RTS_NO_PRIV)) return(EPERM); /* Only system processes get IRQs? */ if (!(priv(rp)->s_flags & SYS_PROC)) return EPERM; data_copy(caller->p_endpoint, (vir_bytes) m_ptr->CTL_ARG_PTR, KERNEL, (vir_bytes) &irq, sizeof(irq)); priv(rp)->s_flags |= CHECK_IRQ; /* Check IRQs */ i= priv(rp)->s_nr_irq; if (i >= NR_IRQ) { printf("do_privctl: %d already has %d irq's.\n", rp->p_endpoint, i); return ENOMEM; } priv(rp)->s_irq_tab[i]= irq; priv(rp)->s_nr_irq++; return OK; case SYS_PRIV_QUERY_MEM: { phys_bytes addr, limit; struct priv *sp; /* See if a certain process is allowed to map in certain physical * memory. */ addr = (phys_bytes) m_ptr->CTL_PHYSSTART; limit = addr + (phys_bytes) m_ptr->CTL_PHYSLEN - 1; if(limit < addr) return EPERM; if(!(sp = priv(rp))) return EPERM; if (!(sp->s_flags & SYS_PROC)) return EPERM; for(i = 0; i < sp->s_nr_mem_range; i++) { if(addr >= sp->s_mem_tab[i].mr_base && limit <= sp->s_mem_tab[i].mr_limit) return OK; } return EPERM; } case SYS_PRIV_UPDATE_SYS: /* Update the privilege structure of a system process. */ if(!m_ptr->CTL_ARG_PTR) return EINVAL; /* Copy privilege structure from caller */ if((r=data_copy(caller->p_endpoint, (vir_bytes) m_ptr->CTL_ARG_PTR, KERNEL, (vir_bytes) &priv, sizeof(priv))) != OK) return r; /* Override settings in existing privilege structure. */ if((r = update_priv(rp, &priv)) != OK) { return r; } return(OK); default: printf("do_privctl: bad request %d\n", m_ptr->CTL_REQUEST); return EINVAL; } }
/*===========================================================================* * kmain * *===========================================================================*/ void kmain(kinfo_t *local_cbi) { /* Start the ball rolling. */ struct boot_image *ip; /* boot image pointer */ register struct proc *rp; /* process pointer */ register int i, j; /* save a global copy of the boot parameters */ memcpy(&kinfo, local_cbi, sizeof(kinfo)); memcpy(&kmess, kinfo.kmess, sizeof(kmess)); #ifdef __arm__ /* We want to initialize serial before we do any output */ omap3_ser_init(); #endif /* We can talk now */ printf("MINIX booting\n"); /* Kernel may use bits of main memory before VM is started */ kernel_may_alloc = 1; assert(sizeof(kinfo.boot_procs) == sizeof(image)); memcpy(kinfo.boot_procs, image, sizeof(kinfo.boot_procs)); cstart(); BKL_LOCK(); DEBUGEXTRA(("main()\n")); proc_init(); if(NR_BOOT_MODULES != kinfo.mbi.mods_count) panic("expecting %d boot processes/modules, found %d", NR_BOOT_MODULES, kinfo.mbi.mods_count); /* Set up proc table entries for processes in boot image. */ for (i=0; i < NR_BOOT_PROCS; ++i) { int schedulable_proc; proc_nr_t proc_nr; int ipc_to_m, kcalls; sys_map_t map; ip = &image[i]; /* process' attributes */ DEBUGEXTRA(("initializing %s... ", ip->proc_name)); rp = proc_addr(ip->proc_nr); /* get process pointer */ ip->endpoint = rp->p_endpoint; /* ipc endpoint */ make_zero64(rp->p_cpu_time_left); if(i < NR_TASKS) /* name (tasks only) */ strlcpy(rp->p_name, ip->proc_name, sizeof(rp->p_name)); if(i >= NR_TASKS) { /* Remember this so it can be passed to VM */ multiboot_module_t *mb_mod = &kinfo.module_list[i - NR_TASKS]; ip->start_addr = mb_mod->mod_start; ip->len = mb_mod->mod_end - mb_mod->mod_start; } reset_proc_accounting(rp); /* See if this process is immediately schedulable. * In that case, set its privileges now and allow it to run. * Only kernel tasks and the root system process get to run immediately. * All the other system processes are inhibited from running by the * RTS_NO_PRIV flag. They can only be scheduled once the root system * process has set their privileges. */ proc_nr = proc_nr(rp); schedulable_proc = (iskerneln(proc_nr) || isrootsysn(proc_nr) || proc_nr == VM_PROC_NR); if(schedulable_proc) { /* Assign privilege structure. Force a static privilege id. */ (void) get_priv(rp, static_priv_id(proc_nr)); /* Priviliges for kernel tasks. */ if(proc_nr == VM_PROC_NR) { priv(rp)->s_flags = VM_F; priv(rp)->s_trap_mask = SRV_T; ipc_to_m = SRV_M; kcalls = SRV_KC; priv(rp)->s_sig_mgr = SELF; rp->p_priority = SRV_Q; rp->p_quantum_size_ms = SRV_QT; } else if(iskerneln(proc_nr)) { /* Privilege flags. */ priv(rp)->s_flags = (proc_nr == IDLE ? IDL_F : TSK_F); /* Allowed traps. */ priv(rp)->s_trap_mask = (proc_nr == CLOCK || proc_nr == SYSTEM ? CSK_T : TSK_T); ipc_to_m = TSK_M; /* allowed targets */ kcalls = TSK_KC; /* allowed kernel calls */ } /* Priviliges for the root system process. */ else { assert(isrootsysn(proc_nr)); priv(rp)->s_flags= RSYS_F; /* privilege flags */ priv(rp)->s_trap_mask= SRV_T; /* allowed traps */ ipc_to_m = SRV_M; /* allowed targets */ kcalls = SRV_KC; /* allowed kernel calls */ priv(rp)->s_sig_mgr = SRV_SM; /* signal manager */ rp->p_priority = SRV_Q; /* priority queue */ rp->p_quantum_size_ms = SRV_QT; /* quantum size */ } /* Fill in target mask. */ memset(&map, 0, sizeof(map)); if (ipc_to_m == ALL_M) { for(j = 0; j < NR_SYS_PROCS; j++) set_sys_bit(map, j); } fill_sendto_mask(rp, &map); /* Fill in kernel call mask. */ for(j = 0; j < SYS_CALL_MASK_SIZE; j++) { priv(rp)->s_k_call_mask[j] = (kcalls == NO_C ? 0 : (~0)); } } else { /* Don't let the process run for now. */ RTS_SET(rp, RTS_NO_PRIV | RTS_NO_QUANTUM); } /* Arch-specific state initialization. */ arch_boot_proc(ip, rp); /* scheduling functions depend on proc_ptr pointing somewhere. */ if(!get_cpulocal_var(proc_ptr)) get_cpulocal_var(proc_ptr) = rp; /* Process isn't scheduled until VM has set up a pagetable for it. */ if(rp->p_nr != VM_PROC_NR && rp->p_nr >= 0) { rp->p_rts_flags |= RTS_VMINHIBIT; rp->p_rts_flags |= RTS_BOOTINHIBIT; } rp->p_rts_flags |= RTS_PROC_STOP; rp->p_rts_flags &= ~RTS_SLOT_FREE; DEBUGEXTRA(("done\n")); } /* update boot procs info for VM */ memcpy(kinfo.boot_procs, image, sizeof(kinfo.boot_procs)); #define IPCNAME(n) { \ assert((n) >= 0 && (n) <= IPCNO_HIGHEST); \ assert(!ipc_call_names[n]); \ ipc_call_names[n] = #n; \ } arch_post_init(); IPCNAME(SEND); IPCNAME(RECEIVE); IPCNAME(SENDREC); IPCNAME(NOTIFY); IPCNAME(SENDNB); IPCNAME(SENDA); /* System and processes initialization */ memory_init(); DEBUGEXTRA(("system_init()... ")); system_init(); DEBUGEXTRA(("done\n")); /* The bootstrap phase is over, so we can add the physical * memory used for it to the free list. */ add_memmap(&kinfo, kinfo.bootstrap_start, kinfo.bootstrap_len); #ifdef CONFIG_SMP if (config_no_apic) { BOOT_VERBOSE(printf("APIC disabled, disables SMP, using legacy PIC\n")); smp_single_cpu_fallback(); } else if (config_no_smp) { BOOT_VERBOSE(printf("SMP disabled, using legacy PIC\n")); smp_single_cpu_fallback(); } else { smp_init(); /* * if smp_init() returns it means that it failed and we try to finish * single CPU booting */ bsp_finish_booting(); } #else /* * if configured for a single CPU, we are already on the kernel stack which we * are going to use everytime we execute kernel code. We finish booting and we * never return here */ bsp_finish_booting(); #endif NOT_REACHABLE; }
/*===========================================================================* * do_vmctl * *===========================================================================*/ PUBLIC int do_vmctl(struct proc * caller, message * m_ptr) { int proc_nr; endpoint_t ep = m_ptr->SVMCTL_WHO; struct proc *p, *rp, *target; int err; if(ep == SELF) { ep = caller->p_endpoint; } if(!isokendpt(ep, &proc_nr)) { printf("do_vmctl: unexpected endpoint %d from VM\n", ep); return EINVAL; } p = proc_addr(proc_nr); switch(m_ptr->SVMCTL_PARAM) { case VMCTL_CLEAR_PAGEFAULT: assert(RTS_ISSET(p,RTS_PAGEFAULT)); RTS_UNSET(p, RTS_PAGEFAULT); return OK; case VMCTL_MEMREQ_GET: /* Send VM the information about the memory request. */ if(!(rp = vmrequest)) return ESRCH; assert(RTS_ISSET(rp, RTS_VMREQUEST)); okendpt(rp->p_vmrequest.target, &proc_nr); target = proc_addr(proc_nr); /* Reply with request fields. */ switch(rp->p_vmrequest.req_type) { case VMPTYPE_CHECK: m_ptr->SVMCTL_MRG_TARGET = rp->p_vmrequest.target; m_ptr->SVMCTL_MRG_ADDR = rp->p_vmrequest.params.check.start; m_ptr->SVMCTL_MRG_LENGTH = rp->p_vmrequest.params.check.length; m_ptr->SVMCTL_MRG_FLAG = rp->p_vmrequest.params.check.writeflag; m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint; break; case VMPTYPE_SMAP: case VMPTYPE_SUNMAP: case VMPTYPE_COWMAP: assert(RTS_ISSET(target,RTS_VMREQTARGET)); RTS_UNSET(target, RTS_VMREQTARGET); m_ptr->SVMCTL_MRG_TARGET = rp->p_vmrequest.target; m_ptr->SVMCTL_MRG_ADDR = rp->p_vmrequest.params.map.vir_d; m_ptr->SVMCTL_MRG_EP2 = rp->p_vmrequest.params.map.ep_s; m_ptr->SVMCTL_MRG_ADDR2 = rp->p_vmrequest.params.map.vir_s; m_ptr->SVMCTL_MRG_LENGTH = rp->p_vmrequest.params.map.length; m_ptr->SVMCTL_MRG_FLAG = rp->p_vmrequest.params.map.writeflag; m_ptr->SVMCTL_MRG_REQUESTOR = (void *) rp->p_endpoint; break; default: panic("VMREQUEST wrong type"); } rp->p_vmrequest.vmresult = VMSUSPEND; /* Remove from request chain. */ vmrequest = vmrequest->p_vmrequest.nextrequestor; return rp->p_vmrequest.req_type; case VMCTL_MEMREQ_REPLY: assert(RTS_ISSET(p, RTS_VMREQUEST)); assert(p->p_vmrequest.vmresult == VMSUSPEND); okendpt(p->p_vmrequest.target, &proc_nr); target = proc_addr(proc_nr); p->p_vmrequest.vmresult = m_ptr->SVMCTL_VALUE; assert(p->p_vmrequest.vmresult != VMSUSPEND); switch(p->p_vmrequest.type) { case VMSTYPE_KERNELCALL: /* * we will have to resume execution of the kernel call * as soon the scheduler picks up this process again */ p->p_misc_flags |= MF_KCALL_RESUME; break; case VMSTYPE_DELIVERMSG: assert(p->p_misc_flags & MF_DELIVERMSG); assert(p == target); assert(RTS_ISSET(p, RTS_VMREQUEST)); break; case VMSTYPE_MAP: assert(RTS_ISSET(p, RTS_VMREQUEST)); break; default: panic("strange request type: %d",p->p_vmrequest.type); } RTS_UNSET(p, RTS_VMREQUEST); return OK; case VMCTL_ENABLE_PAGING: if(vm_running) panic("do_vmctl: paging already enabled"); if (arch_enable_paging(caller, m_ptr) != OK) panic("do_vmctl: paging enabling failed"); return OK; case VMCTL_KERN_PHYSMAP: { int i = m_ptr->SVMCTL_VALUE; return arch_phys_map(i, (phys_bytes *) &m_ptr->SVMCTL_MAP_PHYS_ADDR, (phys_bytes *) &m_ptr->SVMCTL_MAP_PHYS_LEN, &m_ptr->SVMCTL_MAP_FLAGS); } case VMCTL_KERN_MAP_REPLY: { return arch_phys_map_reply(m_ptr->SVMCTL_VALUE, (vir_bytes) m_ptr->SVMCTL_MAP_VIR_ADDR); } case VMCTL_VMINHIBIT_SET: /* check if we must stop a process on a different CPU */ #if CONFIG_SMP if (p->p_cpu != cpuid) { smp_schedule_vminhibit(p); } else #endif RTS_SET(p, RTS_VMINHIBIT); #if CONFIG_SMP p->p_misc_flags |= MF_FLUSH_TLB; #endif return OK; case VMCTL_VMINHIBIT_CLEAR: assert(RTS_ISSET(p, RTS_VMINHIBIT)); /* * the processes is certainly not runnable, no need to tell its * cpu */ RTS_UNSET(p, RTS_VMINHIBIT); return OK; } /* Try architecture-specific vmctls. */ return arch_do_vmctl(m_ptr, p); }
/*===========================================================================* * main * *===========================================================================*/ PUBLIC int main(void) { /* Start the ball rolling. */ struct boot_image *ip; /* boot image pointer */ register struct proc *rp; /* process pointer */ register int i, j; size_t argsz; /* size of arguments passed to crtso on stack */ BKL_LOCK(); /* Global value to test segment sanity. */ magictest = MAGICTEST; DEBUGEXTRA(("main()\n")); proc_init(); /* Set up proc table entries for processes in boot image. The stacks * of the servers have been added to the data segment by the monitor, so * the stack pointer is set to the end of the data segment. */ for (i=0; i < NR_BOOT_PROCS; ++i) { int schedulable_proc; proc_nr_t proc_nr; int ipc_to_m, kcalls; sys_map_t map; ip = &image[i]; /* process' attributes */ DEBUGEXTRA(("initializing %s... ", ip->proc_name)); rp = proc_addr(ip->proc_nr); /* get process pointer */ ip->endpoint = rp->p_endpoint; /* ipc endpoint */ make_zero64(rp->p_cpu_time_left); strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */ reset_proc_accounting(rp); /* See if this process is immediately schedulable. * In that case, set its privileges now and allow it to run. * Only kernel tasks and the root system process get to run immediately. * All the other system processes are inhibited from running by the * RTS_NO_PRIV flag. They can only be scheduled once the root system * process has set their privileges. */ proc_nr = proc_nr(rp); schedulable_proc = (iskerneln(proc_nr) || isrootsysn(proc_nr)); if(schedulable_proc) { /* Assign privilege structure. Force a static privilege id. */ (void) get_priv(rp, static_priv_id(proc_nr)); /* Priviliges for kernel tasks. */ if(iskerneln(proc_nr)) { /* Privilege flags. */ priv(rp)->s_flags = (proc_nr == IDLE ? IDL_F : TSK_F); /* Allowed traps. */ priv(rp)->s_trap_mask = (proc_nr == CLOCK || proc_nr == SYSTEM ? CSK_T : TSK_T); ipc_to_m = TSK_M; /* allowed targets */ kcalls = TSK_KC; /* allowed kernel calls */ } /* Priviliges for the root system process. */ else if(isrootsysn(proc_nr)) { priv(rp)->s_flags= RSYS_F; /* privilege flags */ priv(rp)->s_trap_mask= SRV_T; /* allowed traps */ ipc_to_m = SRV_M; /* allowed targets */ kcalls = SRV_KC; /* allowed kernel calls */ priv(rp)->s_sig_mgr = SRV_SM; /* signal manager */ rp->p_priority = SRV_Q; /* priority queue */ rp->p_quantum_size_ms = SRV_QT; /* quantum size */ } /* Priviliges for ordinary process. */ else { NOT_REACHABLE; } /* Fill in target mask. */ memset(&map, 0, sizeof(map)); if (ipc_to_m == ALL_M) { for(j = 0; j < NR_SYS_PROCS; j++) set_sys_bit(map, j); } fill_sendto_mask(rp, &map); /* Fill in kernel call mask. */ for(j = 0; j < SYS_CALL_MASK_SIZE; j++) { priv(rp)->s_k_call_mask[j] = (kcalls == NO_C ? 0 : (~0)); } } else { /* Don't let the process run for now. */ RTS_SET(rp, RTS_NO_PRIV | RTS_NO_QUANTUM); } rp->p_memmap[T].mem_vir = ABS2CLICK(ip->memmap.text_vaddr); rp->p_memmap[T].mem_phys = ABS2CLICK(ip->memmap.text_paddr); rp->p_memmap[T].mem_len = ABS2CLICK(ip->memmap.text_bytes); rp->p_memmap[D].mem_vir = ABS2CLICK(ip->memmap.data_vaddr); rp->p_memmap[D].mem_phys = ABS2CLICK(ip->memmap.data_paddr); rp->p_memmap[D].mem_len = ABS2CLICK(ip->memmap.data_bytes); rp->p_memmap[S].mem_phys = ABS2CLICK(ip->memmap.data_paddr + ip->memmap.data_bytes + ip->memmap.stack_bytes); rp->p_memmap[S].mem_vir = ABS2CLICK(ip->memmap.data_vaddr + ip->memmap.data_bytes + ip->memmap.stack_bytes); rp->p_memmap[S].mem_len = 0; /* Set initial register values. The processor status word for tasks * is different from that of other processes because tasks can * access I/O; this is not allowed to less-privileged processes */ rp->p_reg.pc = ip->memmap.entry; rp->p_reg.psw = (iskerneln(proc_nr)) ? INIT_TASK_PSW : INIT_PSW; /* Initialize the server stack pointer. Take it down three words * to give crtso.s something to use as "argc", "argv" and "envp". */ if (isusern(proc_nr)) { /* user-space process? */ rp->p_reg.sp = (rp->p_memmap[S].mem_vir + rp->p_memmap[S].mem_len) << CLICK_SHIFT; argsz = 3 * sizeof(reg_t); rp->p_reg.sp -= argsz; phys_memset(rp->p_reg.sp - (rp->p_memmap[S].mem_vir << CLICK_SHIFT) + (rp->p_memmap[S].mem_phys << CLICK_SHIFT), 0, argsz); } /* scheduling functions depend on proc_ptr pointing somewhere. */ if(!get_cpulocal_var(proc_ptr)) get_cpulocal_var(proc_ptr) = rp; /* If this process has its own page table, VM will set the * PT up and manage it. VM will signal the kernel when it has * done this; until then, don't let it run. */ if(ip->flags & PROC_FULLVM) rp->p_rts_flags |= RTS_VMINHIBIT; rp->p_rts_flags |= RTS_PROC_STOP; rp->p_rts_flags &= ~RTS_SLOT_FREE; alloc_segments(rp); DEBUGEXTRA(("done\n")); } #define IPCNAME(n) { \ assert((n) >= 0 && (n) <= IPCNO_HIGHEST); \ assert(!ipc_call_names[n]); \ ipc_call_names[n] = #n; \ } IPCNAME(SEND); IPCNAME(RECEIVE); IPCNAME(SENDREC); IPCNAME(NOTIFY); IPCNAME(SENDNB); IPCNAME(SENDA); /* Architecture-dependent initialization. */ DEBUGEXTRA(("arch_init()... ")); arch_init(); DEBUGEXTRA(("done\n")); /* System and processes initialization */ DEBUGEXTRA(("system_init()... ")); system_init(); DEBUGEXTRA(("done\n")); #ifdef CONFIG_SMP if (config_no_apic) { BOOT_VERBOSE(printf("APIC disabled, disables SMP, using legacy PIC\n")); smp_single_cpu_fallback(); } else if (config_no_smp) { BOOT_VERBOSE(printf("SMP disabled, using legacy PIC\n")); smp_single_cpu_fallback(); } else { smp_init(); /* * if smp_init() returns it means that it failed and we try to finish * single CPU booting */ bsp_finish_booting(); } #else /* * if configured for a single CPU, we are already on the kernel stack which we * are going to use everytime we execute kernel code. We finish booting and we * never return here */ bsp_finish_booting(); #endif NOT_REACHABLE; return 1; }
void main(void) { /* Start the ball rolling. */ struct boot_image *ip; /* boot image pointer */ register struct proc *rp; /* process pointer */ register struct priv *sp; /* privilege structure pointer */ register int i, j; int hdrindex; /* index to array of a.out headers */ phys_clicks text_base; vir_clicks text_clicks, data_clicks, st_clicks; reg_t ktsb; /* kernel task stack base */ struct exec *e_hdr = 0; /* for a copy of an a.out header */ /* Global value to test segment sanity. */ magictest = MAGICTEST; /* Clear the process table. Anounce each slot as empty and set up mappings * for proc_addr() and proc_nr() macros. Do the same for the table with * privilege structures for the system processes. */ for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) { rp->p_rts_flags = RTS_SLOT_FREE; /* initialize free slot */ #ifdef CONFIG_DEBUG_KERNEL_SCHED_CHECK rp->p_magic = PMAGIC; #endif rp->p_nr = i; /* proc number from ptr */ rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */ } for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) { sp->s_proc_nr = ENDPT_NONE; /* initialize as free */ sp->s_id = i; /* priv structure index */ ppriv_addr[i] = sp; /* priv ptr from number */ } /* Set up proc table entries for processes in boot image. The stacks of the * kernel tasks are initialized to an array in data space. The stacks * of the servers have been added to the data segment by the monitor, so * the stack pointer is set to the end of the data segment. All the * processes are in low memory on the 8086. On the 386 only the kernel * is in low memory, the rest is loaded in extended memory. */ /* Task stacks. */ ktsb = (reg_t) t_stack; for (i=0; i < NR_BOOT_PROCS; ++i) { int schedulable_proc, proc_nr; int ipc_to_m, kcalls; ip = &image[i]; /* process' attributes */ rp = proc_addr(ip->proc_nr); /* get process pointer */ ip->endpoint = rp->p_endpoint; /* ipc endpoint */ rp->p_max_priority = ip->priority; /* max scheduling priority */ rp->p_priority = ip->priority; /* current priority */ rp->p_quantum_size = ip->quantum; /* quantum size in ticks */ rp->p_ticks_left = ip->quantum; /* current credit */ strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */ /* See if this process is immediately schedulable. * In that case, set its privileges now and allow it to run. * Only kernel tasks and the root system process get to run immediately. * All the other system processes are inhibited from running by the * RTS_NO_PRIV flag. They can only be scheduled once the root system * process has set their privileges. */ proc_nr = proc_nr(rp); schedulable_proc = (iskerneln(proc_nr) || isrootsysn(proc_nr)); if(schedulable_proc) { /* Assign privilege structure. Force a static privilege id. */ (void) get_priv(rp, static_priv_id(proc_nr)); /* Priviliges for kernel tasks. */ if(iskerneln(proc_nr)) { /* Privilege flags. */ priv(rp)->s_flags = (proc_nr == IDLE ? IDL_F : TSK_F); /* Allowed traps. */ priv(rp)->s_trap_mask = (proc_nr == CLOCK || proc_nr == SYSTEM ? CSK_T : TSK_T); ipc_to_m = TSK_M; /* allowed targets */ kcalls = TSK_KC; /* allowed kernel calls */ } else if(isrootsysn(proc_nr)) { /* Priviliges for the root system process. */ priv(rp)->s_flags= RSYS_F; /* privilege flags */ priv(rp)->s_trap_mask= RSYS_T; /* allowed traps */ ipc_to_m = RSYS_M; /* allowed targets */ kcalls = RSYS_KC; /* allowed kernel calls */ } /* Fill in target mask. */ for (j=0; j < NR_SYS_PROCS; j++) { if (ipc_to_m & (1 << j)) set_sendto_bit(rp, j); else unset_sendto_bit(rp, j); } /* Fill in kernel call mask. */ for(j = 0; j < CALL_MASK_SIZE; j++) { priv(rp)->s_k_call_mask[j] = (kcalls == NO_C ? 0 : (~0)); } } else { /*Don't let the process run for now. */ RTS_SET(rp, RTS_NO_PRIV); } if (iskerneln(proc_nr)) { /* part of the kernel? */ if (ip->stksize > 0) { /* HARDWARE stack size is 0 */ rp->p_priv->s_stack_guard = (reg_t *) ktsb; *rp->p_priv->s_stack_guard = STACK_GUARD; } ktsb += ip->stksize; /* point to high end of stack */ rp->p_reg.sp = ktsb; /* this task's initial stack ptr */ hdrindex = 0; /* all use the first a.out header */ } else { hdrindex = 1 + i-NR_TASKS; /* system/user processes */ } /* Architecture-specific way to find out aout header of this * boot process. */ e_hdr = arch_get_aout_header(hdrindex); /* Convert addresses to clicks and build process memory map */ text_base = e_hdr->a_syms >> CLICK_SHIFT; st_clicks= (e_hdr->a_total + CLICK_SIZE-1) >> CLICK_SHIFT; data_clicks = (e_hdr->a_text + e_hdr->a_data + e_hdr->a_bss + CLICK_SIZE-1) >> CLICK_SHIFT; text_clicks = 0; rp->p_memmap[T].mem_phys = text_base; rp->p_memmap[T].mem_len = text_clicks; rp->p_memmap[D].mem_phys = text_base + text_clicks; rp->p_memmap[D].mem_len = data_clicks; rp->p_memmap[S].mem_phys = text_base + text_clicks + st_clicks; rp->p_memmap[S].mem_vir = st_clicks; rp->p_memmap[S].mem_len = 0; /* Patch (override) the non-kernel process' entry points in image table. The * image table is located in kernel/kernel_syms.c. The kernel processes like * IDLE, SYSTEM, CLOCK, HARDWARE are not changed because they are part of kernel * and the entry points are set at compilation time. In case of IDLE or HARDWARE * the entry point can be ignored becasue they never run (set RTS_PROC_STOP). */ if (!iskerneln(proc_nr(rp))) ip->initial_pc = (task_t*)e_hdr->a_entry; /* Set initial register values. The processor status word for tasks * is different from that of other processes because tasks can * access I/O; this is not allowed to less-privileged processes */ rp->p_reg.pc = (reg_t) ip->initial_pc; rp->p_reg.psw = (iskerneln(proc_nr)) ? INIT_TASK_PSW : INIT_PSW; /* Initialize the server stack pointer. Take it down one word * to give crtso.s something to use as "argc","argv" and "envp". */ if (isusern(proc_nr)) { /* user-space process? */ rp->p_reg.sp = (rp->p_memmap[S].mem_vir + rp->p_memmap[S].mem_len) << CLICK_SHIFT; rp->p_reg.sp -= 3*sizeof(reg_t); } /* scheduling functions depend on proc_ptr pointing somewhere. */ if(!proc_ptr) proc_ptr = rp; /* If this process has its own page table, VM will set the * PT up and manage it. VM will signal the kernel when it has * done this; until then, don't let it run. */ if(ip->flags & PROC_FULLVM) RTS_SET(rp, RTS_VMINHIBIT); /* IDLE & HARDWARE task is never put on a run queue as it is * never ready to run. */ if (rp->p_nr == HARDWARE) RTS_SET(rp, RTS_PROC_STOP); if (rp->p_nr == IDLE) RTS_SET(rp, RTS_PROC_STOP); RTS_UNSET(rp, RTS_SLOT_FREE); /* remove RTS_SLOT_FREE and schedule */ alloc_segments(rp); } /* for */ /* Architecture-dependent initialization. */ arch_init(); #ifdef CONFIG_DEBUG_KERNEL_STATS_PROFILE sprofiling = 0; /* we're not profiling until instructed to */ #endif cprof_procs_no = 0; /* init nr of hash table slots used */ #ifdef CONFIG_IDLE_TSC idle_tsc = cvu64(0); #endif vm_running = 0; krandom.random_sources = RANDOM_SOURCES; krandom.random_elements = RANDOM_ELEMENTS; /* Nucleos is now ready. All boot image processes are on the ready queue. * Return to the assembly code to start running the current process. */ bill_ptr = proc_addr(IDLE); /* it has to point somewhere */ announce(); /* print Nucleos startup banner */ /* * enable timer interrupts and clock task on the boot CPU */ if (boot_cpu_init_timer(system_hz)) { kernel_panic("FATAL : failed to initialize timer interrupts, " "cannot continue without any clock source!", NO_NUM); } /* Warnings for sanity checks that take time. These warnings are printed * so it's a clear warning no full release should be done with them * enabled. */ #ifdef CONFIG_DEBUG_KERNEL_SCHED_CHECK FIXME("CONFIG_DEBUG_KERNEL_SCHED_CHECK enabled"); #endif #ifdef CONFIG_DEBUG_KERNEL_VMASSERT FIXME("CONFIG_DEBUG_KERNEL_VMASSERT enabled"); #endif #ifdef CONFIG_DEBUG_PROC_CHECK FIXME("PROC check enabled"); #endif restart(); }
PRIVATE void pagefault( struct proc *pr, struct exception_frame * frame, int is_nested) { int in_physcopy = 0; reg_t pagefaultcr2; message m_pagefault; int err; assert(frame); pagefaultcr2 = read_cr2(); #if 0 printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n", pr->p_endpoint, pagefaultcr2, pr->p_seg.p_cr3, read_cr3()); #endif if(pr->p_seg.p_cr3) { assert(pr->p_seg.p_cr3 == read_cr3()); } in_physcopy = (frame->eip > (vir_bytes) phys_copy) && (frame->eip < (vir_bytes) phys_copy_fault); if((is_nested || iskernelp(pr)) && catch_pagefaults && in_physcopy) { #if 0 printf("pf caught! addr 0x%lx\n", pagefaultcr2); #endif if (is_nested) { frame->eip = (reg_t) phys_copy_fault_in_kernel; } else { pr->p_reg.pc = (reg_t) phys_copy_fault; pr->p_reg.retreg = pagefaultcr2; } return; } if(is_nested) { panic("pagefault in kernel at pc 0x%lx address 0x%lx", frame->eip, pagefaultcr2); } /* System processes that don't have their own page table can't * have page faults. VM does have its own page table but also * can't have page faults (because VM has to handle them). */ if((pr->p_endpoint <= INIT_PROC_NR && !(pr->p_misc_flags & MF_FULLVM)) || pr->p_endpoint == VM_PROC_NR) { /* Page fault we can't / don't want to * handle. */ printf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, is_nested %d\n", pr->p_endpoint, pr->p_name, pr->p_reg.pc, pagefaultcr2, frame->errcode, is_nested); proc_stacktrace(pr); printf("pc of pagefault: 0x%lx\n", frame->eip); panic("page fault in system process: %d", pr->p_endpoint); return; } /* Don't schedule this process until pagefault is handled. */ assert(pr->p_seg.p_cr3 == read_cr3()); assert(!RTS_ISSET(pr, RTS_PAGEFAULT)); RTS_SET(pr, RTS_PAGEFAULT); /* tell Vm about the pagefault */ m_pagefault.m_source = pr->p_endpoint; m_pagefault.m_type = VM_PAGEFAULT; m_pagefault.VPF_ADDR = pagefaultcr2; m_pagefault.VPF_FLAGS = frame->errcode; if ((err = mini_send(pr, VM_PROC_NR, &m_pagefault, FROM_KERNEL))) { panic("WARNING: pagefault: mini_send returned %d\n", err); } return; }
/*===========================================================================* * main * *===========================================================================*/ PUBLIC void main() { /* Start the ball rolling. */ struct boot_image *ip; /* boot image pointer */ register struct proc *rp; /* process pointer */ register struct priv *sp; /* privilege structure pointer */ register int i, j, s; int hdrindex; /* index to array of a.out headers */ phys_clicks text_base; vir_clicks text_clicks, data_clicks, st_clicks; reg_t ktsb; /* kernel task stack base */ struct exec e_hdr; /* for a copy of an a.out header */ /* Architecture-dependent initialization. */ arch_init(); /* Global value to test segment sanity. */ magictest = MAGICTEST; /* Clear the process table. Anounce each slot as empty and set up mappings * for proc_addr() and proc_nr() macros. Do the same for the table with * privilege structures for the system processes. */ for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) { rp->p_rts_flags = SLOT_FREE; /* initialize free slot */ #if DEBUG_SCHED_CHECK rp->p_magic = PMAGIC; #endif rp->p_nr = i; /* proc number from ptr */ rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */ } for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) { sp->s_proc_nr = NONE; /* initialize as free */ sp->s_id = i; /* priv structure index */ ppriv_addr[i] = sp; /* priv ptr from number */ } /* Set up proc table entries for processes in boot image. The stacks of the * kernel tasks are initialized to an array in data space. The stacks * of the servers have been added to the data segment by the monitor, so * the stack pointer is set to the end of the data segment. All the * processes are in low memory on the 8086. On the 386 only the kernel * is in low memory, the rest is loaded in extended memory. */ /* Task stacks. */ ktsb = (reg_t) t_stack; for (i=0; i < NR_BOOT_PROCS; ++i) { int ci; bitchunk_t fv; ip = &image[i]; /* process' attributes */ rp = proc_addr(ip->proc_nr); /* get process pointer */ ip->endpoint = rp->p_endpoint; /* ipc endpoint */ rp->p_max_priority = ip->priority; /* max scheduling priority */ rp->p_priority = ip->priority; /* current priority */ rp->p_quantum_size = ip->quantum; /* quantum size in ticks */ rp->p_ticks_left = ip->quantum; /* current credit */ strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */ (void) get_priv(rp, (ip->flags & SYS_PROC)); /* assign structure */ priv(rp)->s_flags = ip->flags; /* process flags */ priv(rp)->s_trap_mask = ip->trap_mask; /* allowed traps */ /* Warn about violations of the boot image table order consistency. */ if (priv_id(rp) != s_nr_to_id(ip->proc_nr) && (ip->flags & SYS_PROC)) kprintf("Warning: boot image table has wrong process order\n"); /* Initialize call mask bitmap from unordered set. * A single SYS_ALL_CALLS is a special case - it * means all calls are allowed. */ if(ip->nr_k_calls == 1 && ip->k_calls[0] == SYS_ALL_CALLS) fv = ~0; /* fill call mask */ else fv = 0; /* clear call mask */ for(ci = 0; ci < CALL_MASK_SIZE; ci++) /* fill or clear call mask */ priv(rp)->s_k_call_mask[ci] = fv; if(!fv) /* not all full? enter calls bit by bit */ for(ci = 0; ci < ip->nr_k_calls; ci++) SET_BIT(priv(rp)->s_k_call_mask, ip->k_calls[ci]-KERNEL_CALL); for (j = 0; j < NR_SYS_PROCS && j < BITCHUNK_BITS; j++) if (ip->ipc_to & (1 << j)) set_sendto_bit(rp, j); /* restrict targets */ if (iskerneln(proc_nr(rp))) { /* part of the kernel? */ if (ip->stksize > 0) { /* HARDWARE stack size is 0 */ rp->p_priv->s_stack_guard = (reg_t *) ktsb; *rp->p_priv->s_stack_guard = STACK_GUARD; } ktsb += ip->stksize; /* point to high end of stack */ rp->p_reg.sp = ktsb; /* this task's initial stack ptr */ hdrindex = 0; /* all use the first a.out header */ } else { hdrindex = 1 + i-NR_TASKS; /* servers, drivers, INIT */ } /* Architecture-specific way to find out aout header of this * boot process. */ arch_get_aout_headers(hdrindex, &e_hdr); /* Convert addresses to clicks and build process memory map */ text_base = e_hdr.a_syms >> CLICK_SHIFT; text_clicks = (e_hdr.a_text + CLICK_SIZE-1) >> CLICK_SHIFT; data_clicks = (e_hdr.a_data+e_hdr.a_bss + CLICK_SIZE-1) >> CLICK_SHIFT; st_clicks= (e_hdr.a_total + CLICK_SIZE-1) >> CLICK_SHIFT; if (!(e_hdr.a_flags & A_SEP)) { data_clicks= (e_hdr.a_text+e_hdr.a_data+e_hdr.a_bss + CLICK_SIZE-1) >> CLICK_SHIFT; text_clicks = 0; /* common I&D */ } rp->p_memmap[T].mem_phys = text_base; rp->p_memmap[T].mem_len = text_clicks; rp->p_memmap[D].mem_phys = text_base + text_clicks; rp->p_memmap[D].mem_len = data_clicks; rp->p_memmap[S].mem_phys = text_base + text_clicks + st_clicks; rp->p_memmap[S].mem_vir = st_clicks; rp->p_memmap[S].mem_len = 0; /* Set initial register values. The processor status word for tasks * is different from that of other processes because tasks can * access I/O; this is not allowed to less-privileged processes */ rp->p_reg.pc = (reg_t) ip->initial_pc; rp->p_reg.psw = (iskernelp(rp)) ? INIT_TASK_PSW : INIT_PSW; /* Initialize the server stack pointer. Take it down one word * to give crtso.s something to use as "argc". */ if (isusern(proc_nr(rp))) { /* user-space process? */ rp->p_reg.sp = (rp->p_memmap[S].mem_vir + rp->p_memmap[S].mem_len) << CLICK_SHIFT; rp->p_reg.sp -= sizeof(reg_t); } /* scheduling functions depend on proc_ptr pointing somewhere. */ if(!proc_ptr) proc_ptr = rp; /* If this process has its own page table, VM will set the * PT up and manage it. VM will signal the kernel when it has * done this; until then, don't let it run. */ if(priv(rp)->s_flags & PROC_FULLVM) RTS_SET(rp, VMINHIBIT); /* Set ready. The HARDWARE task is never ready. */ if (rp->p_nr == HARDWARE) RTS_SET(rp, PROC_STOP); RTS_UNSET(rp, SLOT_FREE); /* remove SLOT_FREE and schedule */ alloc_segments(rp); }
/*==========================================================================* * do_trace * *==========================================================================*/ PUBLIC int do_trace(struct proc * caller, message * m_ptr) { /* Handle the debugging commands supported by the ptrace system call * The commands are: * T_STOP stop the process * T_OK enable tracing by parent for this process * T_GETINS return value from instruction space * T_GETDATA return value from data space * T_GETUSER return value from user process table * T_SETINS set value in instruction space * T_SETDATA set value in data space * T_SETUSER set value in user process table * T_RESUME resume execution * T_EXIT exit * T_STEP set trace bit * T_SYSCALL trace system call * T_ATTACH attach to an existing process * T_DETACH detach from a traced process * T_SETOPT set trace options * T_GETRANGE get range of values * T_SETRANGE set range of values * * The T_OK, T_ATTACH, T_EXIT, and T_SETOPT commands are handled completely by * the process manager. T_GETRANGE and T_SETRANGE use sys_vircopy(). All others * come here. */ register struct proc *rp; vir_bytes tr_addr = (vir_bytes) m_ptr->CTL_ADDRESS; long tr_data = m_ptr->CTL_DATA; int tr_request = m_ptr->CTL_REQUEST; int tr_proc_nr_e = m_ptr->CTL_ENDPT, tr_proc_nr; unsigned char ub; int i; #define COPYTOPROC(seg, addr, myaddr, length) { \ struct vir_addr fromaddr, toaddr; \ int r; \ fromaddr.proc_nr_e = KERNEL; \ toaddr.proc_nr_e = tr_proc_nr_e; \ fromaddr.offset = (myaddr); \ toaddr.offset = (addr); \ fromaddr.segment = D; \ toaddr.segment = (seg); \ if((r=virtual_copy_vmcheck(caller, &fromaddr, \ &toaddr, length)) != OK) { \ printf("Can't copy in sys_trace: %d\n", r);\ return r;\ } \ } #define COPYFROMPROC(seg, addr, myaddr, length) { \ struct vir_addr fromaddr, toaddr; \ int r; \ fromaddr.proc_nr_e = tr_proc_nr_e; \ toaddr.proc_nr_e = KERNEL; \ fromaddr.offset = (addr); \ toaddr.offset = (myaddr); \ fromaddr.segment = (seg); \ toaddr.segment = D; \ if((r=virtual_copy_vmcheck(caller, &fromaddr, \ &toaddr, length)) != OK) { \ printf("Can't copy in sys_trace: %d\n", r);\ return r;\ } \ } if(!isokendpt(tr_proc_nr_e, &tr_proc_nr)) return(EINVAL); if (iskerneln(tr_proc_nr)) return(EPERM); rp = proc_addr(tr_proc_nr); if (isemptyp(rp)) return(EINVAL); switch (tr_request) { case T_STOP: /* stop process */ RTS_SET(rp, RTS_P_STOP); rp->p_reg.psw &= ~TRACEBIT; /* clear trace bit */ rp->p_misc_flags &= ~MF_SC_TRACE; /* clear syscall trace flag */ return(OK); case T_GETINS: /* return value from instruction space */ COPYFROMPROC(T, tr_addr, (vir_bytes) &tr_data, sizeof(long)); m_ptr->CTL_DATA = tr_data; break; case T_GETDATA: /* return value from data space */ COPYFROMPROC(D, tr_addr, (vir_bytes) &tr_data, sizeof(long)); m_ptr->CTL_DATA= tr_data; break; case T_GETUSER: /* return value from process table */ if ((tr_addr & (sizeof(long) - 1)) != 0) return(EFAULT); if (tr_addr <= sizeof(struct proc) - sizeof(long)) { m_ptr->CTL_DATA = *(long *) ((char *) rp + (int) tr_addr); break; } /* The process's proc struct is followed by its priv struct. * The alignment here should be unnecessary, but better safe.. */ i = sizeof(long) - 1; tr_addr -= (sizeof(struct proc) + i) & ~i; if (tr_addr > sizeof(struct priv) - sizeof(long)) return(EFAULT); m_ptr->CTL_DATA = *(long *) ((char *) rp->p_priv + (int) tr_addr); break; case T_SETINS: /* set value in instruction space */ COPYTOPROC(T, tr_addr, (vir_bytes) &tr_data, sizeof(long)); m_ptr->CTL_DATA = 0; break; case T_SETDATA: /* set value in data space */ COPYTOPROC(D, tr_addr, (vir_bytes) &tr_data, sizeof(long)); m_ptr->CTL_DATA = 0; break; case T_SETUSER: /* set value in process table */ if ((tr_addr & (sizeof(reg_t) - 1)) != 0 || tr_addr > sizeof(struct stackframe_s) - sizeof(reg_t)) return(EFAULT); i = (int) tr_addr; #if (_MINIX_CHIP == _CHIP_INTEL) /* Altering segment registers might crash the kernel when it * tries to load them prior to restarting a process, so do * not allow it. */ if (i == (int) &((struct proc *) 0)->p_reg.cs || i == (int) &((struct proc *) 0)->p_reg.ds || i == (int) &((struct proc *) 0)->p_reg.es || #if _WORD_SIZE == 4 i == (int) &((struct proc *) 0)->p_reg.gs || i == (int) &((struct proc *) 0)->p_reg.fs || #endif i == (int) &((struct proc *) 0)->p_reg.ss) return(EFAULT); #endif if (i == (int) &((struct proc *) 0)->p_reg.psw) /* only selected bits are changeable */ SETPSW(rp, tr_data); else *(reg_t *) ((char *) &rp->p_reg + i) = (reg_t) tr_data; m_ptr->CTL_DATA = 0; break; case T_DETACH: /* detach tracer */ rp->p_misc_flags &= ~MF_SC_ACTIVE; /* fall through */ case T_RESUME: /* resume execution */ RTS_UNSET(rp, RTS_P_STOP); m_ptr->CTL_DATA = 0; break; case T_STEP: /* set trace bit */ rp->p_reg.psw |= TRACEBIT; RTS_UNSET(rp, RTS_P_STOP); m_ptr->CTL_DATA = 0; break; case T_SYSCALL: /* trace system call */ rp->p_misc_flags |= MF_SC_TRACE; RTS_UNSET(rp, RTS_P_STOP); m_ptr->CTL_DATA = 0; break; case T_READB_INS: /* get value from instruction space */ COPYFROMPROC(T, tr_addr, (vir_bytes) &ub, 1); m_ptr->CTL_DATA = ub; break; case T_WRITEB_INS: /* set value in instruction space */ ub = (unsigned char) (tr_data & 0xff); COPYTOPROC(T, tr_addr, (vir_bytes) &ub, 1); m_ptr->CTL_DATA = 0; break; default: return(EINVAL); } return(OK); }