/*===========================================================================* * stacktrace * *===========================================================================*/ PUBLIC void proc_stacktrace(struct proc *whichproc) { reg_t v_bp, v_pc, v_hbp; int iskernel; v_bp = whichproc->p_reg.fp; iskernel = iskernelp(whichproc); printf("%-8.8s %6d 0x%lx ", whichproc->p_name, whichproc->p_endpoint, whichproc->p_reg.pc); while(v_bp) { #define PRCOPY(pr, pv, v, n) \ (iskernel ? (memcpy((char *) v, (char *) pv, n), OK) : \ data_copy(pr->p_endpoint, pv, KERNEL, (vir_bytes) (v), n)) if(PRCOPY(whichproc, v_bp, &v_hbp, sizeof(v_hbp)) != OK) { printf("(v_bp 0x%lx ?)", v_bp); break; } if(PRCOPY(whichproc, v_bp + sizeof(v_pc), &v_pc, sizeof(v_pc)) != OK) { printf("(v_pc 0x%lx ?)", v_bp + sizeof(v_pc)); break; } printf("0x%lx ", (unsigned long) v_pc); if(v_hbp != 0 && v_hbp <= v_bp) { printf("(hbp %lx ?)", v_hbp); break; } v_bp = v_hbp; } printf("\n"); }
/*===========================================================================* * proc_stacktrace_execute * *===========================================================================*/ PRIVATE void proc_stacktrace_execute(struct proc *whichproc, reg_t v_bp, reg_t pc) { reg_t v_hbp; int iskernel; int n = 0; iskernel = iskernelp(whichproc); printf("%-8.8s %6d 0x%lx ", whichproc->p_name, whichproc->p_endpoint, pc); while(v_bp) { reg_t v_pc; #define PRCOPY(pr, pv, v, n) \ (iskernel ? (memcpy((char *) v, (char *) pv, n), OK) : \ data_copy(pr->p_endpoint, pv, KERNEL, (vir_bytes) (v), n)) if(PRCOPY(whichproc, v_bp, &v_hbp, sizeof(v_hbp)) != OK) { printf("(v_bp 0x%lx ?)", v_bp); break; } if(PRCOPY(whichproc, v_bp + sizeof(v_pc), &v_pc, sizeof(v_pc)) != OK) { printf("(v_pc 0x%lx ?)", v_bp + sizeof(v_pc)); break; } printf("0x%lx ", (unsigned long) v_pc); if(v_hbp != 0 && v_hbp <= v_bp) { printf("(hbp %lx ?)", v_hbp); break; } v_bp = v_hbp; if(n++ > 50) { printf("(truncated after %d steps) ", n); break; } } printf("\n"); }
/* This function sets up a mapping from within the kernel's address * space to any other area of memory, either straight physical * memory (pr == NULL) or a process view of memory, in 4MB windows. * I.e., it maps in 4MB chunks of virtual (or physical) address space * to 4MB chunks of kernel virtual address space. * * It recognizes pr already being in memory as a special case (no * mapping required). * * The target (i.e. in-kernel) mapping area is one of the freepdes[] * VM has earlier already told the kernel about that is available. It is * identified as the 'pde' parameter. This value can be chosen freely * by the caller, as long as it is in range (i.e. 0 or higher and corresonds * to a known freepde slot). It is up to the caller to keep track of which * freepde's are in use, and to determine which ones are free to use. * * The logical number supplied by the caller is translated into an actual * pde number to be used, and a pointer to it (linear address) is returned * for actual use by phys_copy or memset. */ static phys_bytes createpde( const struct proc *pr, /* Requested process, NULL for physical. */ const phys_bytes linaddr,/* Address after segment translation. */ phys_bytes *bytes, /* Size of chunk, function may truncate it. */ int free_pde_idx, /* index of the free slot to use */ int *changed /* If mapping is made, this is set to 1. */ ) { u32_t pdeval; phys_bytes offset; int pde; assert(free_pde_idx >= 0 && free_pde_idx < nfreepdes); pde = freepdes[free_pde_idx]; assert(pde >= 0 && pde < 1024); if(pr && ((pr == get_cpulocal_var(ptproc)) || iskernelp(pr))) { /* Process memory is requested, and * it's a process that is already in current page table, or * the kernel, which is always there. * Therefore linaddr is valid directly, with the requested * size. */ return linaddr; } if(pr) { /* Requested address is in a process that is not currently * accessible directly. Grab the PDE entry of that process' * page table that corresponds to the requested address. */ assert(pr->p_seg.p_cr3_v); pdeval = pr->p_seg.p_cr3_v[I386_VM_PDE(linaddr)]; } else { /* Requested address is physical. Make up the PDE entry. */ pdeval = (linaddr & I386_VM_ADDR_MASK_4MB) | I386_VM_BIGPAGE | I386_VM_PRESENT | I386_VM_WRITE | I386_VM_USER; } /* Write the pde value that we need into a pde that the kernel * can access, into the currently loaded page table so it becomes * visible. */ assert(get_cpulocal_var(ptproc)->p_seg.p_cr3_v); if(get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] != pdeval) { get_cpulocal_var(ptproc)->p_seg.p_cr3_v[pde] = pdeval; *changed = 1; } /* Memory is now available, but only the 4MB window of virtual * address space that we have mapped; calculate how much of * the requested range is visible and return that in *bytes, * if that is less than the requested range. */ offset = linaddr & I386_VM_OFFSET_MASK_4MB; /* Offset in 4MB window. */ *bytes = MIN(*bytes, I386_BIG_PAGE_SIZE - offset); /* Return the linear address of the start of the new mapping. */ return I386_BIG_PAGE_SIZE*pde + offset; }
/*===========================================================================* * main * *===========================================================================*/ PUBLIC void main() { /* Start the ball rolling. */ struct boot_image *ip; /* boot image pointer */ register struct proc *rp; /* process pointer */ register struct priv *sp; /* privilege structure pointer */ register int i, s; int hdrindex; /* index to array of a.out headers */ phys_clicks text_base; vir_clicks text_clicks, data_clicks; reg_t ktsb; /* kernel task stack base */ struct exec e_hdr; /* for a copy of an a.out header */ /* Initialize the interrupt controller. */ intr_init(1); /* Clear the process table. Anounce each slot as empty and set up mappings * for proc_addr() and proc_nr() macros. Do the same for the table with * privilege structures for the system processes. */ for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) { rp->p_rts_flags = SLOT_FREE; /* initialize free slot */ rp->p_nr = i; /* proc number from ptr */ (pproc_addr + NR_TASKS)[i] = rp; /* proc ptr from number */ } for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) { sp->s_proc_nr = NONE; /* initialize as free */ sp->s_id = i; /* priv structure index */ ppriv_addr[i] = sp; /* priv ptr from number */ } /* Set up proc table entries for processes in boot image. The stacks of the * kernel tasks are initialized to an array in data space. The stacks * of the servers have been added to the data segment by the monitor, so * the stack pointer is set to the end of the data segment. All the * processes are in low memory on the 8086. On the 386 only the kernel * is in low memory, the rest is loaded in extended memory. */ /* Task stacks. */ ktsb = (reg_t) t_stack; for (i=0; i < NR_BOOT_PROCS; ++i) { ip = &image[i]; /* process' attributes */ rp = proc_addr(ip->proc_nr); /* get process pointer */ rp->p_max_priority = ip->priority; /* max scheduling priority */ rp->p_priority = ip->priority; /* current priority */ rp->p_quantum_size = ip->quantum; /* quantum size in ticks */ rp->p_ticks_left = ip->quantum; /* current credit */ strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */ (void) get_priv(rp, (ip->flags & SYS_PROC)); /* assign structure */ priv(rp)->s_flags = ip->flags; /* process flags */ priv(rp)->s_trap_mask = ip->trap_mask; /* allowed traps */ priv(rp)->s_call_mask = ip->call_mask; /* kernel call mask */ priv(rp)->s_ipc_to.chunk[0] = ip->ipc_to; /* restrict targets */ if (iskerneln(proc_nr(rp))) { /* part of the kernel? */ if (ip->stksize > 0) { /* HARDWARE stack size is 0 */ rp->p_priv->s_stack_guard = (reg_t *) ktsb; *rp->p_priv->s_stack_guard = STACK_GUARD; } ktsb += ip->stksize; /* point to high end of stack */ rp->p_reg.sp = ktsb; /* this task's initial stack ptr */ text_base = kinfo.code_base >> CLICK_SHIFT; /* processes that are in the kernel */ hdrindex = 0; /* all use the first a.out header */ } else { hdrindex = 1 + i-NR_TASKS; /* servers, drivers, INIT */ } /* The bootstrap loader created an array of the a.out headers at * absolute address 'aout'. Get one element to e_hdr. */ phys_copy(aout + hdrindex * A_MINHDR, vir2phys(&e_hdr), (phys_bytes) A_MINHDR); /* Convert addresses to clicks and build process memory map */ text_base = e_hdr.a_syms >> CLICK_SHIFT; text_clicks = (e_hdr.a_text + CLICK_SIZE-1) >> CLICK_SHIFT; if (!(e_hdr.a_flags & A_SEP)) text_clicks = 0; /* common I&D */ data_clicks = (e_hdr.a_total + CLICK_SIZE-1) >> CLICK_SHIFT; rp->p_memmap[T].mem_phys = text_base; rp->p_memmap[T].mem_len = text_clicks; rp->p_memmap[D].mem_phys = text_base + text_clicks; rp->p_memmap[D].mem_len = data_clicks; rp->p_memmap[S].mem_phys = text_base + text_clicks + data_clicks; rp->p_memmap[S].mem_vir = data_clicks; /* empty - stack is in data */ /* Set initial register values. The processor status word for tasks * is different from that of other processes because tasks can * access I/O; this is not allowed to less-privileged processes */ rp->p_reg.pc = (reg_t) ip->initial_pc; rp->p_reg.psw = (iskernelp(rp)) ? INIT_TASK_PSW : INIT_PSW; /* Initialize the server stack pointer. Take it down one word * to give crtso.s something to use as "argc". */ if (isusern(proc_nr(rp))) { /* user-space process? */ rp->p_reg.sp = (rp->p_memmap[S].mem_vir + rp->p_memmap[S].mem_len) << CLICK_SHIFT; rp->p_reg.sp -= sizeof(reg_t); } /* Set ready. The HARDWARE task is never ready. */ if (rp->p_nr != HARDWARE) { rp->p_rts_flags = 0; /* runnable if no flags */ lock_enqueue(rp); /* add to scheduling queues */ } else { rp->p_rts_flags = NO_MAP; /* prevent from running */ } /* Code and data segments must be allocated in protected mode. */ alloc_segments(rp); }
/*===========================================================================* * main * *===========================================================================*/ PUBLIC void main() { /* Start the ball rolling. */ struct boot_image *ip; /* boot image pointer */ register struct proc *rp; /* process pointer */ register struct priv *sp; /* privilege structure pointer */ register int i, s; // a.out 头部数组的索引. int hdrindex; /* index to array of a.out headers */ phys_clicks text_base; vir_clicks text_clicks, data_clicks; // 内核任务栈的基地址(低端) reg_t ktsb; /* kernel task stack base */ // 用来放置 a.out 头部的一个副本. struct exec e_hdr; /* for a copy of an a.out header */ /* Initialize the interrupt controller. */ // 初始化 8259 中断控制器芯片. intr_init(1); /* Clear the process table. Anounce each slot as empty and set up mappings * for proc_addr() and proc_nr() macros. Do the same for the table with * privilege structures for the system processes. */ // 初如化进程表与进程指针表. // BEG_PROC_ADDR: 进程表地址; for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) { // 将进程表中每一项都设置为空闲. rp->p_rts_flags = SLOT_FREE; /* initialize free slot */ // 进程号, i 的初值为 -NR_TASKS, 可见系统任务拥有负的进程号 rp->p_nr = i; /* proc number from ptr */ // 建立进程数组与进程指针数组之间的映射关系 (pproc_addr + NR_TASKS)[i] = rp; /* proc ptr from number */ } // 初始化优先级表 for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) { sp->s_proc_nr = NONE; /* initialize as free */ sp->s_id = i; /* priv structure index */ // 建立特权级表与特权级指针表之间的映射关系 ppriv_addr[i] = sp; /* priv ptr from number */ } /* Set up proc table entries for tasks and servers. The stacks of the * kernel tasks are initialized to an array in data space. The stacks * of the servers have been added to the data segment by the monitor, so * the stack pointer is set to the end of the data segment. All the * processes are in low memory on the 8086. On the 386 only the kernel * is in low memory, the rest is loaded in extended memory. */ /* * 为任务和服务进程设置进程表项. 内核任务的栈被初始化成一个在数据空间中的 * 数组. 服务进程的栈已经由控制器添加到数据段中, 所有它们的栈指针开始时 * 指向数据段的末尾. 所有的进程都在 8086 的低内存. 对于 386, 只有内核在 * 低内存, 剩下的都在扩展内存中. */ /* Task stacks. */ /* 任务栈 */ ktsb = (reg_t) t_stack; // 为那些包含在系统引导映像文件中的程序分配进程表项. for (i=0; i < NR_BOOT_PROCS; ++i) { ip = &image[i]; /* process' attributes */ // 获取进程指针 rp = proc_addr(ip->proc_nr); /* get process pointer */ // 最大调度优先级 rp->p_max_priority = ip->priority; /* max scheduling priority */ // 当前调度优先级 rp->p_priority = ip->priority; /* current priority */ // 时间片原子值 rp->p_quantum_size = ip->quantum; /* quantum size in ticks */ // 剩余时间片 rp->p_ticks_left = ip->quantum; /* current credit */ // 将程序名复制到进程表项中 strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */ // 为进程分配一个特权级结构体, 即 从系统特权级表中分配一项 (void) get_priv(rp, (ip->flags & SYS_PROC)); /* assign structure */ // 初始化特权级结构体的标志. priv(rp)->s_flags = ip->flags; /* process flags */ // 初始化特权级结构体的 允许的系统调用陷井 priv(rp)->s_trap_mask = ip->trap_mask; /* allowed traps */ priv(rp)->s_call_mask = ip->call_mask; /* kernel call mask */ // 初始化进程的消息发送位图 priv(rp)->s_ipc_to.chunk[0] = ip->ipc_to; /* restrict targets */ // 如果进程是内核任务 if (iskerneln(proc_nr(rp))) { /* part of the kernel? */ // 如果进程的栈大小大于 0, 设置进程的栈警戒字, if (ip->stksize > 0) { /* HARDWARE stack size is 0 */ // 设置内核任务栈警戒字指针. rp->p_priv->s_stack_guard = (reg_t *) ktsb; // 指针运算符(->) 要比取值运行符 (*) 的优先级要高. // 等价于: // *(rp->p_priv->s_stack_guard) = STACK_GUARD // 效果是在栈的最顶端(在低地址)放置一个特殊值, // 这个值就是栈警戒字. *rp->p_priv->s_stack_guard = STACK_GUARD; } ktsb += ip->stksize; /* point to high end of stack */ // 初始进程的栈指针 rp->p_reg.sp = ktsb; /* this task's initial stack ptr */ // kinfo ??? // 内核代码的基地址右移 CLICK_SHIFT 位, 赋给 text_base. text_base = kinfo.code_base >> CLICK_SHIFT; /* processes that are in the kernel */ // 内核任务使用同一个 a.out 头部信息 hdrindex = 0; /* all use the first a.out header */ } else { // 非内核任务, 计算它的 a.out 头部数组索引, 因为 0 号项 // 留给了内核任务, 所以需 加 1. hdrindex = 1 + i-NR_TASKS; /* servers, drivers, INIT */ } /* The bootstrap loader created an array of the a.out headers at * absolute address 'aout'. Get one element to e_hdr. */ /* * 引导加载程序会在绝对地址 'aout' 处放置一个 a.out 头部数组. * 从中取一项复制到 e_hdr. */ phys_copy(aout + hdrindex * A_MINHDR, vir2phys(&e_hdr), (phys_bytes) A_MINHDR); /* Convert addresses to clicks and build process memory map */ /* 将地址转换为以 click 为单位, 并建立进程内存映射 */ // 既然这里要设置 text_base, 那 146 行附近的 // text_base = kinfo.code_base >> CLICK_SHIFT; // 岂不是多余的?? // 将 a.out 头部的符号表大小右移 CLICK_SHIFT 位,赋给 text_base. text_base = e_hdr.a_syms >> CLICK_SHIFT; // 计算程序文本段大小, 以 click 为单位, 上取整. text_clicks = (e_hdr.a_text + CLICK_SIZE-1) >> CLICK_SHIFT; // 如果 a.out 头部指明它的 I/D 是合并的 ??? if (!(e_hdr.a_flags & A_SEP)) text_clicks = 0; /* common I&D */ // 计算程序占用的内存量, 以 click 为单位, 上取整. data_clicks = (e_hdr.a_total + CLICK_SIZE-1) >> CLICK_SHIFT; // 初始化进程的内存映射数据结构 rp->p_memmap[T].mem_phys = text_base; rp->p_memmap[T].mem_len = text_clicks; rp->p_memmap[D].mem_phys = text_base + text_clicks; rp->p_memmap[D].mem_len = data_clicks; rp->p_memmap[S].mem_phys = text_base + text_clicks + data_clicks; rp->p_memmap[S].mem_vir = data_clicks; /* empty - stack is in data */ /* Set initial register values. The processor status word for tasks * is different from that of other processes because tasks can * access I/O; this is not allowed to less-privileged processes */ /* * 设置寄存器的初始值. 与其他进程相比, 内核任务的处理器状态字 * 稍有不同, 因为内核任务可以访问 I/O; 而对于非特权进来来说, 这是不 * 允许的. */ // 初始化进程的 PC 和 processor status word. rp->p_reg.pc = (reg_t) ip->initial_pc; rp->p_reg.psw = (iskernelp(rp)) ? INIT_TASK_PSW : INIT_PSW; /* Initialize the server stack pointer. Take it down one word * to give crtso.s something to use as "argc". */ /* * 初始化服务器进程的栈指针. 下移一个字的空间, 使得 crtso.s 有 * 空间放置 "argc". */ if (isusern(proc_nr(rp))) { /* user-space process? */ rp->p_reg.sp = (rp->p_memmap[S].mem_vir + rp->p_memmap[S].mem_len) << CLICK_SHIFT; rp->p_reg.sp -= sizeof(reg_t); } /* Set ready. The HARDWARE task is never ready. */ if (rp->p_nr != HARDWARE) { // 如果进程不是 HARDWARE, 清空进程标志, 并加入调度队列. rp->p_rts_flags = 0; /* runnable if no flags */ lock_enqueue(rp); /* add to scheduling queues */ } else { // 对于 HARDWARE 任务, 则阻止其运行. ??? rp->p_rts_flags = NO_MAP; /* prevent from running */ } /* Code and data segments must be allocated in protected mode. */ /* 数据与代码段必须在保护模式下分配 */ alloc_segments(rp); }
PRIVATE void pagefault( struct proc *pr, struct exception_frame * frame, int is_nested) { int in_physcopy = 0; reg_t pagefaultcr2; message m_pagefault; int err; assert(frame); pagefaultcr2 = read_cr2(); #if 0 printf("kernel: pagefault in pr %d, addr 0x%lx, his cr3 0x%lx, actual cr3 0x%lx\n", pr->p_endpoint, pagefaultcr2, pr->p_seg.p_cr3, read_cr3()); #endif if(pr->p_seg.p_cr3) { assert(pr->p_seg.p_cr3 == read_cr3()); } in_physcopy = (frame->eip > (vir_bytes) phys_copy) && (frame->eip < (vir_bytes) phys_copy_fault); if((is_nested || iskernelp(pr)) && catch_pagefaults && in_physcopy) { #if 0 printf("pf caught! addr 0x%lx\n", pagefaultcr2); #endif if (is_nested) { frame->eip = (reg_t) phys_copy_fault_in_kernel; } else { pr->p_reg.pc = (reg_t) phys_copy_fault; pr->p_reg.retreg = pagefaultcr2; } return; } if(is_nested) { panic("pagefault in kernel at pc 0x%lx address 0x%lx", frame->eip, pagefaultcr2); } /* System processes that don't have their own page table can't * have page faults. VM does have its own page table but also * can't have page faults (because VM has to handle them). */ if((pr->p_endpoint <= INIT_PROC_NR && !(pr->p_misc_flags & MF_FULLVM)) || pr->p_endpoint == VM_PROC_NR) { /* Page fault we can't / don't want to * handle. */ printf("pagefault for process %d ('%s'), pc = 0x%x, addr = 0x%x, flags = 0x%x, is_nested %d\n", pr->p_endpoint, pr->p_name, pr->p_reg.pc, pagefaultcr2, frame->errcode, is_nested); proc_stacktrace(pr); printf("pc of pagefault: 0x%lx\n", frame->eip); panic("page fault in system process: %d", pr->p_endpoint); return; } /* Don't schedule this process until pagefault is handled. */ assert(pr->p_seg.p_cr3 == read_cr3()); assert(!RTS_ISSET(pr, RTS_PAGEFAULT)); RTS_SET(pr, RTS_PAGEFAULT); /* tell Vm about the pagefault */ m_pagefault.m_source = pr->p_endpoint; m_pagefault.m_type = VM_PAGEFAULT; m_pagefault.VPF_ADDR = pagefaultcr2; m_pagefault.VPF_FLAGS = frame->errcode; if ((err = mini_send(pr, VM_PROC_NR, &m_pagefault, FROM_KERNEL))) { panic("WARNING: pagefault: mini_send returned %d\n", err); } return; }
/*===========================================================================* * exception * *===========================================================================*/ PUBLIC void exception_handler(int is_nested, struct exception_frame * frame) { /* An exception or unexpected interrupt has occurred. */ struct ex_s { char *msg; int signum; int minprocessor; }; static struct ex_s ex_data[] = { { "Divide error", SIGFPE, 86 }, { "Debug exception", SIGTRAP, 86 }, { "Nonmaskable interrupt", SIGBUS, 86 }, { "Breakpoint", SIGEMT, 86 }, { "Overflow", SIGFPE, 86 }, { "Bounds check", SIGFPE, 186 }, { "Invalid opcode", SIGILL, 186 }, { "Coprocessor not available", SIGFPE, 186 }, { "Double fault", SIGBUS, 286 }, { "Coprocessor segment overrun", SIGSEGV, 286 }, { "Invalid TSS", SIGSEGV, 286 }, { "Segment not present", SIGSEGV, 286 }, { "Stack exception", SIGSEGV, 286 }, /* STACK_FAULT already used */ { "General protection", SIGSEGV, 286 }, { "Page fault", SIGSEGV, 386 }, /* not close */ { NULL, SIGILL, 0 }, /* probably software trap */ { "Coprocessor error", SIGFPE, 386 }, { "Alignment check", SIGBUS, 386 }, { "Machine check", SIGBUS, 386 }, { "SIMD exception", SIGFPE, 386 }, }; register struct ex_s *ep; struct proc *saved_proc; /* Save proc_ptr, because it may be changed by debug statements. */ saved_proc = proc_ptr; ep = &ex_data[frame->vector]; if (frame->vector == 2) { /* spurious NMI on some machines */ printf("got spurious NMI\n"); return; } /* * handle special cases for nested problems as they might be tricky or filter * them out quickly if the traps are not nested */ if (is_nested) { /* * if a problem occured while copying a message from userspace because * of a wrong pointer supplied by userland, handle it the only way we * can handle it ... */ if (((void*)frame->eip >= (void*)copy_msg_to_user && (void*)frame->eip <= (void*)__copy_msg_to_user_end) || ((void*)frame->eip >= (void*)copy_msg_from_user && (void*)frame->eip <= (void*)__copy_msg_from_user_end)) { switch(frame->vector) { /* these error are expected */ case PAGE_FAULT_VECTOR: case PROTECTION_VECTOR: frame->eip = (reg_t) __user_copy_msg_pointer_failure; return; default: panic("Copy involving a user pointer failed unexpectedly!"); } } } if(frame->vector == PAGE_FAULT_VECTOR) { pagefault(saved_proc, frame, is_nested); return; } /* If an exception occurs while running a process, the is_nested variable * will be zero. Exceptions in interrupt handlers or system traps will make * is_nested non-zero. */ if (is_nested == 0 && ! iskernelp(saved_proc)) { #if 0 { printf( "vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n", frame->vector, (unsigned long)frame->errcode, (unsigned long)frame->eip, frame->cs, (unsigned long)frame->eflags); printseg("cs: ", 1, saved_proc, frame->cs); printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds); if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) { printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss); } proc_stacktrace(saved_proc); } #endif cause_sig(proc_nr(saved_proc), ep->signum); return; } /* Exception in system code. This is not supposed to happen. */ if (ep->msg == NULL || machine.processor < ep->minprocessor) printf("\nIntel-reserved exception %d\n", frame->vector); else printf("\n%s\n", ep->msg); printf("is_nested = %d ", is_nested); printf("vec_nr= %d, trap_errno= 0x%x, eip= 0x%x, " "cs= 0x%x, eflags= 0x%x trap_esp 0x%08x\n", frame->vector, frame->errcode, frame->eip, frame->cs, frame->eflags, frame); printf("KERNEL registers :\n"); printf( "\t%%eax 0x%08x %%ebx 0x%08x %%ecx 0x%08x %%edx 0x%08x\n" "\t%%esp 0x%08x %%ebp 0x%08x %%esi 0x%08x %%edi 0x%08x\n", ((u32_t *)frame)[-1], ((u32_t *)frame)[-2], ((u32_t *)frame)[-3], ((u32_t *)frame)[-4], ((u32_t *)frame)[-5], ((u32_t *)frame)[-6], ((u32_t *)frame)[-7], ((u32_t *)frame)[-8] ); printseg("ker cs: ", 1, NULL, frame->cs); printseg("ker ds: ", 0, NULL, DS_SELECTOR); /* TODO should we enable this only when compiled for some debug mode? */ if (saved_proc) { printf("scheduled was: process %d (%s), ", proc_nr(saved_proc), saved_proc->p_name); printf("pc = %u:0x%x\n", (unsigned) saved_proc->p_reg.cs, (unsigned) saved_proc->p_reg.pc); proc_stacktrace(saved_proc); panic("Unhandled kernel exception"); } else { /* in an early stage of boot process we don't have processes yet */ panic("exception in kernel while booting"); } }
/*===========================================================================* * exception * *===========================================================================*/ PUBLIC void exception_handler(int is_nested, struct exception_frame * frame) { /* An exception or unexpected interrupt has occurred. */ register struct ex_s *ep; struct proc *saved_proc; /* Save proc_ptr, because it may be changed by debug statements. */ saved_proc = get_cpulocal_var(proc_ptr); ep = &ex_data[frame->vector]; if (frame->vector == 2) { /* spurious NMI on some machines */ printf("got spurious NMI\n"); return; } /* * handle special cases for nested problems as they might be tricky or filter * them out quickly if the traps are not nested */ if (is_nested) { /* * if a problem occured while copying a message from userspace because * of a wrong pointer supplied by userland, handle it the only way we * can handle it ... */ if (((void*)frame->eip >= (void*)copy_msg_to_user && (void*)frame->eip <= (void*)__copy_msg_to_user_end) || ((void*)frame->eip >= (void*)copy_msg_from_user && (void*)frame->eip <= (void*)__copy_msg_from_user_end)) { switch(frame->vector) { /* these error are expected */ case PAGE_FAULT_VECTOR: case PROTECTION_VECTOR: frame->eip = (reg_t) __user_copy_msg_pointer_failure; return; default: panic("Copy involving a user pointer failed unexpectedly!"); } } /* Pass any error resulting from restoring FPU state, as a FPU * exception to the process. */ if (((void*)frame->eip >= (void*)fxrstor && (void *)frame->eip <= (void*)__fxrstor_end) || ((void*)frame->eip >= (void*)frstor && (void *)frame->eip <= (void*)__frstor_end)) { frame->eip = (reg_t) __frstor_failure; return; } } if(frame->vector == PAGE_FAULT_VECTOR) { pagefault(saved_proc, frame, is_nested); return; } /* If an exception occurs while running a process, the is_nested variable * will be zero. Exceptions in interrupt handlers or system traps will make * is_nested non-zero. */ if (is_nested == 0 && ! iskernelp(saved_proc)) { #if 0 { printf( "vec_nr= %d, trap_errno= 0x%lx, eip= 0x%lx, cs= 0x%x, eflags= 0x%lx\n", frame->vector, (unsigned long)frame->errcode, (unsigned long)frame->eip, frame->cs, (unsigned long)frame->eflags); printseg("cs: ", 1, saved_proc, frame->cs); printseg("ds: ", 0, saved_proc, saved_proc->p_reg.ds); if(saved_proc->p_reg.ds != saved_proc->p_reg.ss) { printseg("ss: ", 0, saved_proc, saved_proc->p_reg.ss); } proc_stacktrace(saved_proc); } #endif cause_sig(proc_nr(saved_proc), ep->signum); return; } /* Exception in system code. This is not supposed to happen. */ inkernel_disaster(saved_proc, frame, ep, is_nested); panic("return from inkernel_disaster"); }
/*===========================================================================* * main * *===========================================================================*/ PUBLIC void main() { /* Start the ball rolling. */ struct boot_image *ip; /* boot image pointer */ register struct proc *rp; /* process pointer */ register struct priv *sp; /* privilege structure pointer */ register int i, j, s; int hdrindex; /* index to array of a.out headers */ phys_clicks text_base; vir_clicks text_clicks, data_clicks, st_clicks; reg_t ktsb; /* kernel task stack base */ struct exec e_hdr; /* for a copy of an a.out header */ /* Architecture-dependent initialization. */ arch_init(); /* Global value to test segment sanity. */ magictest = MAGICTEST; /* Clear the process table. Anounce each slot as empty and set up mappings * for proc_addr() and proc_nr() macros. Do the same for the table with * privilege structures for the system processes. */ for (rp = BEG_PROC_ADDR, i = -NR_TASKS; rp < END_PROC_ADDR; ++rp, ++i) { rp->p_rts_flags = SLOT_FREE; /* initialize free slot */ #if DEBUG_SCHED_CHECK rp->p_magic = PMAGIC; #endif rp->p_nr = i; /* proc number from ptr */ rp->p_endpoint = _ENDPOINT(0, rp->p_nr); /* generation no. 0 */ } for (sp = BEG_PRIV_ADDR, i = 0; sp < END_PRIV_ADDR; ++sp, ++i) { sp->s_proc_nr = NONE; /* initialize as free */ sp->s_id = i; /* priv structure index */ ppriv_addr[i] = sp; /* priv ptr from number */ } /* Set up proc table entries for processes in boot image. The stacks of the * kernel tasks are initialized to an array in data space. The stacks * of the servers have been added to the data segment by the monitor, so * the stack pointer is set to the end of the data segment. All the * processes are in low memory on the 8086. On the 386 only the kernel * is in low memory, the rest is loaded in extended memory. */ /* Task stacks. */ ktsb = (reg_t) t_stack; for (i=0; i < NR_BOOT_PROCS; ++i) { int ci; bitchunk_t fv; ip = &image[i]; /* process' attributes */ rp = proc_addr(ip->proc_nr); /* get process pointer */ ip->endpoint = rp->p_endpoint; /* ipc endpoint */ rp->p_max_priority = ip->priority; /* max scheduling priority */ rp->p_priority = ip->priority; /* current priority */ rp->p_quantum_size = ip->quantum; /* quantum size in ticks */ rp->p_ticks_left = ip->quantum; /* current credit */ strncpy(rp->p_name, ip->proc_name, P_NAME_LEN); /* set process name */ (void) get_priv(rp, (ip->flags & SYS_PROC)); /* assign structure */ priv(rp)->s_flags = ip->flags; /* process flags */ priv(rp)->s_trap_mask = ip->trap_mask; /* allowed traps */ /* Warn about violations of the boot image table order consistency. */ if (priv_id(rp) != s_nr_to_id(ip->proc_nr) && (ip->flags & SYS_PROC)) kprintf("Warning: boot image table has wrong process order\n"); /* Initialize call mask bitmap from unordered set. * A single SYS_ALL_CALLS is a special case - it * means all calls are allowed. */ if(ip->nr_k_calls == 1 && ip->k_calls[0] == SYS_ALL_CALLS) fv = ~0; /* fill call mask */ else fv = 0; /* clear call mask */ for(ci = 0; ci < CALL_MASK_SIZE; ci++) /* fill or clear call mask */ priv(rp)->s_k_call_mask[ci] = fv; if(!fv) /* not all full? enter calls bit by bit */ for(ci = 0; ci < ip->nr_k_calls; ci++) SET_BIT(priv(rp)->s_k_call_mask, ip->k_calls[ci]-KERNEL_CALL); for (j = 0; j < NR_SYS_PROCS && j < BITCHUNK_BITS; j++) if (ip->ipc_to & (1 << j)) set_sendto_bit(rp, j); /* restrict targets */ if (iskerneln(proc_nr(rp))) { /* part of the kernel? */ if (ip->stksize > 0) { /* HARDWARE stack size is 0 */ rp->p_priv->s_stack_guard = (reg_t *) ktsb; *rp->p_priv->s_stack_guard = STACK_GUARD; } ktsb += ip->stksize; /* point to high end of stack */ rp->p_reg.sp = ktsb; /* this task's initial stack ptr */ hdrindex = 0; /* all use the first a.out header */ } else { hdrindex = 1 + i-NR_TASKS; /* servers, drivers, INIT */ } /* Architecture-specific way to find out aout header of this * boot process. */ arch_get_aout_headers(hdrindex, &e_hdr); /* Convert addresses to clicks and build process memory map */ text_base = e_hdr.a_syms >> CLICK_SHIFT; text_clicks = (e_hdr.a_text + CLICK_SIZE-1) >> CLICK_SHIFT; data_clicks = (e_hdr.a_data+e_hdr.a_bss + CLICK_SIZE-1) >> CLICK_SHIFT; st_clicks= (e_hdr.a_total + CLICK_SIZE-1) >> CLICK_SHIFT; if (!(e_hdr.a_flags & A_SEP)) { data_clicks= (e_hdr.a_text+e_hdr.a_data+e_hdr.a_bss + CLICK_SIZE-1) >> CLICK_SHIFT; text_clicks = 0; /* common I&D */ } rp->p_memmap[T].mem_phys = text_base; rp->p_memmap[T].mem_len = text_clicks; rp->p_memmap[D].mem_phys = text_base + text_clicks; rp->p_memmap[D].mem_len = data_clicks; rp->p_memmap[S].mem_phys = text_base + text_clicks + st_clicks; rp->p_memmap[S].mem_vir = st_clicks; rp->p_memmap[S].mem_len = 0; /* Set initial register values. The processor status word for tasks * is different from that of other processes because tasks can * access I/O; this is not allowed to less-privileged processes */ rp->p_reg.pc = (reg_t) ip->initial_pc; rp->p_reg.psw = (iskernelp(rp)) ? INIT_TASK_PSW : INIT_PSW; /* Initialize the server stack pointer. Take it down one word * to give crtso.s something to use as "argc". */ if (isusern(proc_nr(rp))) { /* user-space process? */ rp->p_reg.sp = (rp->p_memmap[S].mem_vir + rp->p_memmap[S].mem_len) << CLICK_SHIFT; rp->p_reg.sp -= sizeof(reg_t); } /* scheduling functions depend on proc_ptr pointing somewhere. */ if(!proc_ptr) proc_ptr = rp; /* If this process has its own page table, VM will set the * PT up and manage it. VM will signal the kernel when it has * done this; until then, don't let it run. */ if(priv(rp)->s_flags & PROC_FULLVM) RTS_SET(rp, VMINHIBIT); /* Set ready. The HARDWARE task is never ready. */ if (rp->p_nr == HARDWARE) RTS_SET(rp, PROC_STOP); RTS_UNSET(rp, SLOT_FREE); /* remove SLOT_FREE and schedule */ alloc_segments(rp); }