Example #1
0
void
trap(struct trapframe *frame)
{
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	int i = 0, ucode = 0, code;
	u_int type;
	register_t addr = 0;
	vm_offset_t eva;
	ksiginfo_t ksi;
#ifdef POWERFAIL_NMI
	static int lastalert = 0;
#endif

	PCPU_INC(cnt.v_trap);
	type = frame->tf_trapno;

#ifdef SMP
	/* Handler for NMI IPIs used for stopping CPUs. */
	if (type == T_NMI) {
	         if (ipi_nmi_handler() == 0)
	                   goto out;
	}
#endif /* SMP */

#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		goto out;
	}
#endif

	if (type == T_RESERVED) {
		trap_fatal(frame, 0);
		goto out;
	}

#ifdef	HWPMC_HOOKS
	/*
	 * CPU PMCs interrupt using an NMI so we check for that first.
	 * If the HWPMC module is active, 'pmc_hook' will point to
	 * the function to be called.  A return value of '1' from the
	 * hook means that the NMI was handled by it and that we can
	 * return immediately.
	 */
	if (type == T_NMI && pmc_intr &&
	    (*pmc_intr)(PCPU_GET(cpuid), frame))
	    goto out;
#endif

	if (type == T_MCHK) {
		if (!mca_intr())
			trap_fatal(frame, 0);
		goto out;
	}

#ifdef KDTRACE_HOOKS
	/*
	 * A trap can occur while DTrace executes a probe. Before
	 * executing the probe, DTrace blocks re-scheduling and sets
	 * a flag in it's per-cpu flags to indicate that it doesn't
	 * want to fault. On returning from the probe, the no-fault
	 * flag is cleared and finally re-scheduling is enabled.
	 *
	 * If the DTrace kernel module has registered a trap handler,
	 * call it and if it returns non-zero, assume that it has
	 * handled the trap and modified the trap frame so that this
	 * function can return normally.
	 */
	if (type == T_DTRACE_PROBE || type == T_DTRACE_RET ||
	    type == T_BPTFLT) {
		struct reg regs;

		fill_frame_regs(frame, &regs);
		if (type == T_DTRACE_PROBE &&
		    dtrace_fasttrap_probe_ptr != NULL &&
		    dtrace_fasttrap_probe_ptr(&regs) == 0)
			goto out;
		if (type == T_BPTFLT &&
		    dtrace_pid_probe_ptr != NULL &&
		    dtrace_pid_probe_ptr(&regs) == 0)
			goto out;
		if (type == T_DTRACE_RET &&
		    dtrace_return_probe_ptr != NULL &&
		    dtrace_return_probe_ptr(&regs) == 0)
			goto out;
	}
	if ((type == T_PROTFLT || type == T_PAGEFLT) &&
	    dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
		goto out;
#endif

	if ((frame->tf_eflags & PSL_I) == 0) {
		/*
		 * Buggy application or kernel code has disabled
		 * interrupts and then trapped.  Enabling interrupts
		 * now is wrong, but it is better than running with
		 * interrupts disabled until they are accidentally
		 * enabled later.
		 */
		if (ISPL(frame->tf_cs) == SEL_UPL || (frame->tf_eflags & PSL_VM))
			uprintf(
			    "pid %ld (%s): trap %d with interrupts disabled\n",
			    (long)curproc->p_pid, curthread->td_name, type);
		else if (type != T_BPTFLT && type != T_TRCTRAP &&
			 frame->tf_eip != (int)cpu_switch_load_gs) {
			/*
			 * XXX not quite right, since this may be for a
			 * multiple fault in user mode.
			 */
			printf("kernel trap %d with interrupts disabled\n",
			    type);
			/*
			 * Page faults need interrupts disabled until later,
			 * and we shouldn't enable interrupts while holding
			 * a spin lock or if servicing an NMI.
			 */
			if (type != T_NMI && type != T_PAGEFLT &&
			    td->td_md.md_spinlock_count == 0)
				enable_intr();
		}
	}
	eva = 0;
	code = frame->tf_err;
	if (type == T_PAGEFLT) {
		/*
		 * For some Cyrix CPUs, %cr2 is clobbered by
		 * interrupts.  This problem is worked around by using
		 * an interrupt gate for the pagefault handler.  We
		 * are finally ready to read %cr2 and then must
		 * reenable interrupts.
		 *
		 * If we get a page fault while in a critical section, then
		 * it is most likely a fatal kernel page fault.  The kernel
		 * is already going to panic trying to get a sleep lock to
		 * do the VM lookup, so just consider it a fatal trap so the
		 * kernel can print out a useful trap message and even get
		 * to the debugger.
		 *
		 * If we get a page fault while holding a non-sleepable
		 * lock, then it is most likely a fatal kernel page fault.
		 * If WITNESS is enabled, then it's going to whine about
		 * bogus LORs with various VM locks, so just skip to the
		 * fatal trap handling directly.
		 */
		eva = rcr2();
		if (td->td_critnest != 0 ||
		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
		    "Kernel page fault") != 0)
			trap_fatal(frame, eva);
		else
			enable_intr();
	}

        if ((ISPL(frame->tf_cs) == SEL_UPL) ||
	    ((frame->tf_eflags & PSL_VM) && 
		!(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL))) {
		/* user trap */

		td->td_pticks = 0;
		td->td_frame = frame;
		addr = frame->tf_eip;
		if (td->td_ucred != p->p_ucred) 
			cred_update_thread(td);

		switch (type) {
		case T_PRIVINFLT:	/* privileged instruction fault */
			i = SIGILL;
			ucode = ILL_PRVOPC;
			break;

		case T_BPTFLT:		/* bpt instruction fault */
		case T_TRCTRAP:		/* trace trap */
			enable_intr();
			frame->tf_eflags &= ~PSL_T;
			i = SIGTRAP;
			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
#ifdef DEV_NPX
			ucode = npxtrap();
			if (ucode == -1)
				goto userout;
#else
			ucode = 0;
#endif
			i = SIGFPE;
			break;

			/*
			 * The following two traps can happen in
			 * vm86 mode, and, if so, we want to handle
			 * them specially.
			 */
		case T_PROTFLT:		/* general protection fault */
		case T_STKFLT:		/* stack fault */
			if (frame->tf_eflags & PSL_VM) {
				i = vm86_emulate((struct vm86frame *)frame);
				if (i == 0)
					goto user;
				break;
			}
			i = SIGBUS;
			ucode = (type == T_PROTFLT) ? BUS_OBJERR : BUS_ADRERR;
			break;
		case T_SEGNPFLT:	/* segment not present fault */
			i = SIGBUS;
			ucode = BUS_ADRERR;
			break;
		case T_TSSFLT:		/* invalid TSS fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_DOUBLEFLT:	/* double fault */
		default:
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;

		case T_PAGEFLT:		/* page fault */

			i = trap_pfault(frame, TRUE, eva);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
			if (i == -2) {
				/*
				 * The f00f hack workaround has triggered, so
				 * treat the fault as an illegal instruction 
				 * (T_PRIVINFLT) instead of a page fault.
				 */
				type = frame->tf_trapno = T_PRIVINFLT;

				/* Proceed as in that case. */
				ucode = ILL_PRVOPC;
				i = SIGILL;
				break;
			}
#endif
			if (i == -1)
				goto userout;
			if (i == 0)
				goto user;

			if (i == SIGSEGV)
				ucode = SEGV_MAPERR;
			else {
				if (prot_fault_translation == 0) {
					/*
					 * Autodetect.
					 * This check also covers the images
					 * without the ABI-tag ELF note.
					 */
					if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
					    && p->p_osrel >= P_OSREL_SIGSEGV) {
						i = SIGSEGV;
						ucode = SEGV_ACCERR;
					} else {
						i = SIGBUS;
						ucode = BUS_PAGE_FAULT;
					}
				} else if (prot_fault_translation == 1) {
					/*
					 * Always compat mode.
					 */
					i = SIGBUS;
					ucode = BUS_PAGE_FAULT;
				} else {
					/*
					 * Always SIGSEGV mode.
					 */
					i = SIGSEGV;
					ucode = SEGV_ACCERR;
				}
			}
			addr = eva;
			break;

		case T_DIVIDE:		/* integer divide fault */
			ucode = FPE_INTDIV;
			i = SIGFPE;
			break;

#ifdef DEV_ISA
		case T_NMI:
#ifdef POWERFAIL_NMI
#ifndef TIMER_FREQ
#  define TIMER_FREQ 1193182
#endif
			if (time_second - lastalert > 10) {
				log(LOG_WARNING, "NMI: power fail\n");
				sysbeep(880, hz);
				lastalert = time_second;
			}
			goto userout;
#else /* !POWERFAIL_NMI */
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto userout;
			} else if (panic_on_nmi)
				panic("NMI indicates hardware failure");
			break;
#endif /* POWERFAIL_NMI */
#endif /* DEV_ISA */

		case T_OFLOW:		/* integer overflow fault */
			ucode = FPE_INTOVF;
			i = SIGFPE;
			break;

		case T_BOUND:		/* bounds check fault */
			ucode = FPE_FLTSUB;
			i = SIGFPE;
			break;

		case T_DNA:
#ifdef DEV_NPX
			KASSERT(PCB_USER_FPU(td->td_pcb),
			    ("kernel FPU ctx has leaked"));
			/* transparent fault (due to context switch "late") */
			if (npxdna())
				goto userout;
#endif
			uprintf("pid %d killed due to lack of floating point\n",
				p->p_pid);
			i = SIGKILL;
			ucode = 0;
			break;

		case T_FPOPFLT:		/* FPU operand fetch fault */
			ucode = ILL_COPROC;
			i = SIGILL;
			break;

		case T_XMMFLT:		/* SIMD floating-point exception */
			ucode = 0; /* XXX */
			i = SIGFPE;
			break;
		}
	} else {
		/* kernel trap */

		KASSERT(cold || td->td_ucred != NULL,
		    ("kernel trap doesn't have ucred"));
		switch (type) {
		case T_PAGEFLT:			/* page fault */
			(void) trap_pfault(frame, FALSE, eva);
			goto out;

		case T_DNA:
#ifdef DEV_NPX
			KASSERT(!PCB_USER_FPU(td->td_pcb),
			    ("Unregistered use of FPU in kernel"));
			if (npxdna())
				goto out;
#endif
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
		case T_XMMFLT:		/* SIMD floating-point exception */
		case T_FPOPFLT:		/* FPU operand fetch fault */
			/*
			 * XXXKIB for now disable any FPU traps in kernel
			 * handler registration seems to be overkill
			 */
			trap_fatal(frame, 0);
			goto out;

			/*
			 * The following two traps can happen in
			 * vm86 mode, and, if so, we want to handle
			 * them specially.
			 */
		case T_PROTFLT:		/* general protection fault */
		case T_STKFLT:		/* stack fault */
			if (frame->tf_eflags & PSL_VM) {
				i = vm86_emulate((struct vm86frame *)frame);
				if (i != 0)
					/*
					 * returns to original process
					 */
					vm86_trap((struct vm86frame *)frame);
				goto out;
			}
			if (type == T_STKFLT)
				break;

			/* FALL THROUGH */

		case T_SEGNPFLT:	/* segment not present fault */
			if (PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)
				break;

			/*
			 * Invalid %fs's and %gs's can be created using
			 * procfs or PT_SETREGS or by invalidating the
			 * underlying LDT entry.  This causes a fault
			 * in kernel mode when the kernel attempts to
			 * switch contexts.  Lose the bad context
			 * (XXX) so that we can continue, and generate
			 * a signal.
			 */
			if (frame->tf_eip == (int)cpu_switch_load_gs) {
				PCPU_GET(curpcb)->pcb_gs = 0;
#if 0				
				PROC_LOCK(p);
				kern_psignal(p, SIGBUS);
				PROC_UNLOCK(p);
#endif				
				goto out;
			}

			if (td->td_intr_nesting_level != 0)
				break;

			/*
			 * Invalid segment selectors and out of bounds
			 * %eip's and %esp's can be set up in user mode.
			 * This causes a fault in kernel mode when the
			 * kernel tries to return to user mode.  We want
			 * to get this fault so that we can fix the
			 * problem here and not have to check all the
			 * selectors and pointers when the user changes
			 * them.
			 */
			if (frame->tf_eip == (int)doreti_iret) {
				frame->tf_eip = (int)doreti_iret_fault;
				goto out;
			}
			if (frame->tf_eip == (int)doreti_popl_ds) {
				frame->tf_eip = (int)doreti_popl_ds_fault;
				goto out;
			}
			if (frame->tf_eip == (int)doreti_popl_es) {
				frame->tf_eip = (int)doreti_popl_es_fault;
				goto out;
			}
			if (frame->tf_eip == (int)doreti_popl_fs) {
				frame->tf_eip = (int)doreti_popl_fs_fault;
				goto out;
			}
			if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
				frame->tf_eip =
				    (int)PCPU_GET(curpcb)->pcb_onfault;
				goto out;
			}
			break;

		case T_TSSFLT:
			/*
			 * PSL_NT can be set in user mode and isn't cleared
			 * automatically when the kernel is entered.  This
			 * causes a TSS fault when the kernel attempts to
			 * `iret' because the TSS link is uninitialized.  We
			 * want to get this fault so that we can fix the
			 * problem here and not every time the kernel is
			 * entered.
			 */
			if (frame->tf_eflags & PSL_NT) {
				frame->tf_eflags &= ~PSL_NT;
				goto out;
			}
			break;

		case T_TRCTRAP:	 /* trace trap */
			if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
				/*
				 * We've just entered system mode via the
				 * syscall lcall.  Continue single stepping
				 * silently until the syscall handler has
				 * saved the flags.
				 */
				goto out;
			}
			if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
				/*
				 * The syscall handler has now saved the
				 * flags.  Stop single stepping it.
				 */
				frame->tf_eflags &= ~PSL_T;
				goto out;
			}
			/*
			 * Ignore debug register trace traps due to
			 * accesses in the user's address space, which
			 * can happen under several conditions such as
			 * if a user sets a watchpoint on a buffer and
			 * then passes that buffer to a system call.
			 * We still want to get TRCTRAPS for addresses
			 * in kernel space because that is useful when
			 * debugging the kernel.
			 */
			if (user_dbreg_trap() && 
			   !(PCPU_GET(curpcb)->pcb_flags & PCB_VM86CALL)) {
				/*
				 * Reset breakpoint bits because the
				 * processor doesn't
				 */
				load_dr6(rdr6() & 0xfffffff0);
				goto out;
			}
			/*
			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
			 */
		case T_BPTFLT:
			/*
			 * If KDB is enabled, let it handle the debugger trap.
			 * Otherwise, debugger traps "can't happen".
			 */
#ifdef KDB
			if (kdb_trap(type, 0, frame))
				goto out;
#endif
			break;

#ifdef DEV_ISA
		case T_NMI:
#ifdef POWERFAIL_NMI
			if (time_second - lastalert > 10) {
				log(LOG_WARNING, "NMI: power fail\n");
				sysbeep(880, hz);
				lastalert = time_second;
			}
			goto out;
#else /* !POWERFAIL_NMI */
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto out;
			} else if (panic_on_nmi == 0)
				goto out;
			/* FALLTHROUGH */
#endif /* POWERFAIL_NMI */
#endif /* DEV_ISA */
		}

		trap_fatal(frame, eva);
		goto out;
	}

	/* Translate fault for emulators (e.g. Linux) */
	if (*p->p_sysent->sv_transtrap)
		i = (*p->p_sysent->sv_transtrap)(i, type);

	ksiginfo_init_trap(&ksi);
	ksi.ksi_signo = i;
	ksi.ksi_code = ucode;
	ksi.ksi_addr = (void *)addr;
	ksi.ksi_trapno = type;
	trapsignal(td, &ksi);

#ifdef DEBUG
	if (type <= MAX_TRAP_MSG) {
		uprintf("fatal process exception: %s",
			trap_msg[type]);
		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
			uprintf(", fault VA = 0x%lx", (u_long)eva);
		uprintf("\n");
	}
#endif

user:
	userret(td, frame);
	mtx_assert(&Giant, MA_NOTOWNED);
	KASSERT(PCB_USER_FPU(td->td_pcb),
	    ("Return from trap with kernel FPU ctx leaked"));
userout:
out:
	return;
}
Example #2
0
void
init_x86_64(paddr_t first_avail)
{
	extern void consinit(void);
	extern struct extent *iomem_ex;
	struct region_descriptor region;
	struct mem_segment_descriptor *ldt_segp;
	int x, first16q, ist;
	u_int64_t seg_start, seg_end;
	u_int64_t seg_start1, seg_end1;

	cpu_init_msrs(&cpu_info_primary);

	proc0.p_addr = proc0paddr;
	cpu_info_primary.ci_curpcb = &proc0.p_addr->u_pcb;

	x86_bus_space_init();

	consinit();	/* XXX SHOULD NOT BE DONE HERE */

	/*
	 * Initailize PAGE_SIZE-dependent variables.
	 */
	uvm_setpagesize();

#if 0
	uvmexp.ncolors = 2;
#endif
 
	/*
	 * Boot arguments are in a single page specified by /boot.
	 *
	 * We require the "new" vector form, as well as memory ranges
	 * to be given in bytes rather than KB.
	 *
	 * locore copies the data into bootinfo[] for us.
	 */
	if ((bootapiver & (BAPIV_VECTOR | BAPIV_BMEMMAP)) ==
	    (BAPIV_VECTOR | BAPIV_BMEMMAP)) {
		if (bootinfo_size >= sizeof(bootinfo))
			panic("boot args too big");

		getbootinfo(bootinfo, bootinfo_size);
	} else
		panic("invalid /boot");

	avail_start = PAGE_SIZE; /* BIOS leaves data in low memory */
				 /* and VM system doesn't work with phys 0 */
#ifdef MULTIPROCESSOR
	if (avail_start < MP_TRAMPOLINE + PAGE_SIZE)
		avail_start = MP_TRAMPOLINE + PAGE_SIZE;
#endif

	/*
	 * Call pmap initialization to make new kernel address space.
	 * We must do this before loading pages into the VM system.
	 */
	pmap_bootstrap(VM_MIN_KERNEL_ADDRESS,
	    IOM_END + trunc_page(KBTOB(biosextmem)));

	if (avail_start != PAGE_SIZE)
		pmap_prealloc_lowmem_ptps();

	if (mem_cluster_cnt == 0) {
		/*
		 * Allocate the physical addresses used by RAM from the iomem
		 * extent map.  This is done before the addresses are
		 * page rounded just to make sure we get them all.
		 */
		if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem),
		    EX_NOWAIT)) {
			/* XXX What should we do? */
			printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM "
			    "IOMEM EXTENT MAP!\n");
		}
		mem_clusters[0].start = 0;
		mem_clusters[0].size = trunc_page(KBTOB(biosbasemem));
		physmem += atop(mem_clusters[0].size);
		if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem),
		    EX_NOWAIT)) {
			/* XXX What should we do? */
			printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM "
			    "IOMEM EXTENT MAP!\n");
		}
#if 0
#if NISADMA > 0
		/*
		 * Some motherboards/BIOSes remap the 384K of RAM that would
		 * normally be covered by the ISA hole to the end of memory
		 * so that it can be used.  However, on a 16M system, this
		 * would cause bounce buffers to be allocated and used.
		 * This is not desirable behaviour, as more than 384K of
		 * bounce buffers might be allocated.  As a work-around,
		 * we round memory down to the nearest 1M boundary if
		 * we're using any isadma devices and the remapped memory
		 * is what puts us over 16M.
		 */
		if (biosextmem > (15*1024) && biosextmem < (16*1024)) {
			char pbuf[9];

			format_bytes(pbuf, sizeof(pbuf),
			    biosextmem - (15*1024));
			printf("Warning: ignoring %s of remapped memory\n",
			    pbuf);
			biosextmem = (15*1024);
		}
#endif
#endif
		mem_clusters[1].start = IOM_END;
		mem_clusters[1].size = trunc_page(KBTOB(biosextmem));
		physmem += atop(mem_clusters[1].size);

		mem_cluster_cnt = 2;

		avail_end = IOM_END + trunc_page(KBTOB(biosextmem));
	}

	/*
	 * If we have 16M of RAM or less, just put it all on
	 * the default free list.  Otherwise, put the first
	 * 16M of RAM on a lower priority free list (so that
	 * all of the ISA DMA'able memory won't be eaten up
	 * first-off).
	 */
	if (avail_end <= (16 * 1024 * 1024))
		first16q = VM_FREELIST_DEFAULT;
	else
		first16q = VM_FREELIST_FIRST16;

	/* Make sure the end of the space used by the kernel is rounded. */
	first_avail = round_page(first_avail);
	kern_end = KERNBASE + first_avail;

	/*
	 * Now, load the memory clusters (which have already been
	 * rounded and truncated) into the VM system.
	 *
	 * NOTE: WE ASSUME THAT MEMORY STARTS AT 0 AND THAT THE KERNEL
	 * IS LOADED AT IOM_END (1M).
	 */
	for (x = 0; x < mem_cluster_cnt; x++) {
		seg_start = mem_clusters[x].start;
		seg_end = mem_clusters[x].start + mem_clusters[x].size;
		seg_start1 = 0;
		seg_end1 = 0;

		if (seg_start > 0xffffffffULL) {
			printf("skipping %lld bytes of memory above 4GB\n",
			    seg_end - seg_start);
			continue;
		}
		if (seg_end > 0x100000000ULL) {
			printf("skipping %lld bytes of memory above 4GB\n",
			    seg_end - 0x100000000ULL);
			seg_end = 0x100000000ULL;
		}

		/*
		 * Skip memory before our available starting point.
		 */
		if (seg_end <= avail_start)
			continue;

		if (avail_start >= seg_start && avail_start < seg_end) {
			if (seg_start != 0)
				panic("init_x86_64: memory doesn't start at 0");
			seg_start = avail_start;
			if (seg_start == seg_end)
				continue;
		}

		/*
		 * If this segment contains the kernel, split it
		 * in two, around the kernel.
		 */
		if (seg_start <= IOM_END && first_avail <= seg_end) {
			seg_start1 = first_avail;
			seg_end1 = seg_end;
			seg_end = IOM_END;
		}

		/* First hunk */
		if (seg_start != seg_end) {
			if (seg_start <= (16 * 1024 * 1024) &&
			    first16q != VM_FREELIST_DEFAULT) {
				u_int64_t tmp;

				if (seg_end > (16 * 1024 * 1024))
					tmp = (16 * 1024 * 1024);
				else
					tmp = seg_end;
#if DEBUG_MEMLOAD
				printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
				    (unsigned long long)seg_start,
				    (unsigned long long)tmp,
				    atop(seg_start), atop(tmp));
#endif
				uvm_page_physload(atop(seg_start),
				    atop(tmp), atop(seg_start),
				    atop(tmp), first16q);
				seg_start = tmp;
			}

			if (seg_start != seg_end) {
#if DEBUG_MEMLOAD
				printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
				    (unsigned long long)seg_start,
				    (unsigned long long)seg_end,
				    atop(seg_start), atop(seg_end));
#endif
				uvm_page_physload(atop(seg_start),
				    atop(seg_end), atop(seg_start),
				    atop(seg_end), VM_FREELIST_DEFAULT);
			}
		}

		/* Second hunk */
		if (seg_start1 != seg_end1) {
			if (seg_start1 <= (16 * 1024 * 1024) &&
			    first16q != VM_FREELIST_DEFAULT) {
				u_int64_t tmp;

				if (seg_end1 > (16 * 1024 * 1024))
					tmp = (16 * 1024 * 1024);
				else
					tmp = seg_end1;
#if DEBUG_MEMLOAD
				printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
				    (unsigned long long)seg_start1,
				    (unsigned long long)tmp,
				    atop(seg_start1), atop(tmp));
#endif
				uvm_page_physload(atop(seg_start1),
				    atop(tmp), atop(seg_start1),
				    atop(tmp), first16q);
				seg_start1 = tmp;
			}

			if (seg_start1 != seg_end1) {
#if DEBUG_MEMLOAD
				printf("loading 0x%qx-0x%qx (0x%lx-0x%lx)\n",
				    (unsigned long long)seg_start1,
				    (unsigned long long)seg_end1,
				    atop(seg_start1), atop(seg_end1));
#endif
				uvm_page_physload(atop(seg_start1),
				    atop(seg_end1), atop(seg_start1),
				    atop(seg_end1), VM_FREELIST_DEFAULT);
			}
		}
	}

	/*
	 * Steal memory for the message buffer (at end of core).
	 */
	{
		struct vm_physseg *vps = NULL;
		psize_t sz = round_page(MSGBUFSIZE);
		psize_t reqsz = sz;

		for (x = 0; x < vm_nphysseg; x++) {
			vps = &vm_physmem[x];
			if (ptoa(vps->avail_end) == avail_end)
				break;
		}
		if (x == vm_nphysseg)
			panic("init_x86_64: can't find end of memory");

		/* Shrink so it'll fit in the last segment. */
		if ((vps->avail_end - vps->avail_start) < atop(sz))
			sz = ptoa(vps->avail_end - vps->avail_start);

		vps->avail_end -= atop(sz);
		vps->end -= atop(sz);
		msgbuf_paddr = ptoa(vps->avail_end);

		/* Remove the last segment if it now has no pages. */
		if (vps->start == vps->end) {
			for (vm_nphysseg--; x < vm_nphysseg; x++)
				vm_physmem[x] = vm_physmem[x + 1];
		}

		/* Now find where the new avail_end is. */
		for (avail_end = 0, x = 0; x < vm_nphysseg; x++)
			if (vm_physmem[x].avail_end > avail_end)
				avail_end = vm_physmem[x].avail_end;
		avail_end = ptoa(avail_end);

		/* Warn if the message buffer had to be shrunk. */
		if (sz != reqsz)
			printf("WARNING: %ld bytes not available for msgbuf "
			    "in last cluster (%ld used)\n", reqsz, sz);
	}

	/*
	 * XXXfvdl todo: acpi wakeup code.
	 */

	pmap_growkernel(VM_MIN_KERNEL_ADDRESS + 32 * 1024 * 1024);

	pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE);
	pmap_kenter_pa(idt_vaddr + PAGE_SIZE, idt_paddr + PAGE_SIZE,
	    VM_PROT_READ|VM_PROT_WRITE);

	pmap_kenter_pa(lo32_vaddr, lo32_paddr, VM_PROT_READ|VM_PROT_WRITE);

	idt = (struct gate_descriptor *)idt_vaddr;
	gdtstore = (char *)(idt + NIDT);
	ldtstore = gdtstore + DYNSEL_START;

	/* make gdt gates and memory segments */
	set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0, 0xfffff, SDT_MEMERA,
	    SEL_KPL, 1, 0, 1);

	set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0, 0xfffff, SDT_MEMRWA,
	    SEL_KPL, 1, 0, 1);

	set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore, LDT_SIZE - 1,
	    SDT_SYSLDT, SEL_KPL, 0);

	set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0,
	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1);

	set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0,
	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1);

	/* make ldt gates and memory segments */
	setgate((struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
	    &IDTVEC(oosyscall), 0, SDT_SYS386CGT, SEL_UPL,
	    GSEL(GCODE_SEL, SEL_KPL));

	*(struct mem_segment_descriptor *)(ldtstore + LUCODE_SEL) =
	    *GDT_ADDR_MEM(gdtstore, GUCODE_SEL);
	*(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) =
	    *GDT_ADDR_MEM(gdtstore, GUDATA_SEL);

	/*
	 * 32 bit GDT entries.
	 */

	set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0,
	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0);

	set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0,
	    atop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);

	/*
	 * 32 bit LDT entries.
	 */
	ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUCODE32_SEL);
	set_mem_segment(ldt_segp, 0, atop(VM_MAXUSER_ADDRESS32) - 1,
	    SDT_MEMERA, SEL_UPL, 1, 1, 0);
	ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUDATA32_SEL);
	set_mem_segment(ldt_segp, 0, atop(VM_MAXUSER_ADDRESS32) - 1,
	    SDT_MEMRWA, SEL_UPL, 1, 1, 0);

	/*
	 * Other entries.
	 */
	memcpy((struct gate_descriptor *)(ldtstore + LSOL26CALLS_SEL),
	    (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
	    sizeof (struct gate_descriptor));
	memcpy((struct gate_descriptor *)(ldtstore + LBSDICALLS_SEL),
	    (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
	    sizeof (struct gate_descriptor));

	/* exceptions */
	for (x = 0; x < 32; x++) {
		ist = (x == 8) ? 1 : 0;
		setgate(&idt[x], IDTVEC(exceptions)[x], ist, SDT_SYS386IGT,
		    (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
		    GSEL(GCODE_SEL, SEL_KPL));
		idt_allocmap[x] = 1;
	}

	/* new-style interrupt gate for syscalls */
	setgate(&idt[128], &IDTVEC(osyscall), 0, SDT_SYS386IGT, SEL_UPL,
	    GSEL(GCODE_SEL, SEL_KPL));
	idt_allocmap[128] = 1;

	setregion(&region, gdtstore, DYNSEL_START - 1);
	lgdt(&region);

	cpu_init_idt();

#ifdef DDB
	db_machine_init();
	ddb_init();
	if (boothowto & RB_KDB)
		Debugger();
#endif
#ifdef KGDB
	kgdb_port_init();
	if (boothowto & RB_KDB) {
		kgdb_debug_init = 1;
		kgdb_connect(1);
	}
#endif

	intr_default_setup();

	softintr_init();
	splraise(IPL_IPI);
	enable_intr();

        /* Make sure maxproc is sane */ 
        if (maxproc > cpu_maxproc())
                maxproc = cpu_maxproc();
}
void
hibernate_enable_intr_machdep(void)
{
	enable_intr();
}
Example #4
0
void
trap(struct trapframe *frame)
{
#ifdef KDTRACE_HOOKS
	struct reg regs;
#endif
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	int i = 0, ucode = 0, code;
	u_int type;
	register_t addr = 0;
	ksiginfo_t ksi;

	PCPU_INC(cnt.v_trap);
	type = frame->tf_trapno;

#ifdef SMP
	/* Handler for NMI IPIs used for stopping CPUs. */
	if (type == T_NMI) {
	         if (ipi_nmi_handler() == 0)
	                   goto out;
	}
#endif /* SMP */

#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		goto out;
	}
#endif

	if (type == T_RESERVED) {
		trap_fatal(frame, 0);
		goto out;
	}

#ifdef	HWPMC_HOOKS
	/*
	 * CPU PMCs interrupt using an NMI.  If the PMC module is
	 * active, pass the 'rip' value to the PMC module's interrupt
	 * handler.  A return value of '1' from the handler means that
	 * the NMI was handled by it and we can return immediately.
	 */
	if (type == T_NMI && pmc_intr &&
	    (*pmc_intr)(PCPU_GET(cpuid), frame))
		goto out;
#endif

	if (type == T_MCHK) {
		mca_intr();
		goto out;
	}

#ifdef KDTRACE_HOOKS
	/*
	 * A trap can occur while DTrace executes a probe. Before
	 * executing the probe, DTrace blocks re-scheduling and sets
	 * a flag in its per-cpu flags to indicate that it doesn't
	 * want to fault. On returning from the probe, the no-fault
	 * flag is cleared and finally re-scheduling is enabled.
	 */
	if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, type))
		goto out;
#endif

	if ((frame->tf_rflags & PSL_I) == 0) {
		/*
		 * Buggy application or kernel code has disabled
		 * interrupts and then trapped.  Enabling interrupts
		 * now is wrong, but it is better than running with
		 * interrupts disabled until they are accidentally
		 * enabled later.
		 */
		if (ISPL(frame->tf_cs) == SEL_UPL)
			uprintf(
			    "pid %ld (%s): trap %d with interrupts disabled\n",
			    (long)curproc->p_pid, curthread->td_name, type);
		else if (type != T_NMI && type != T_BPTFLT &&
		    type != T_TRCTRAP) {
			/*
			 * XXX not quite right, since this may be for a
			 * multiple fault in user mode.
			 */
			printf("kernel trap %d with interrupts disabled\n",
			    type);

			/*
			 * We shouldn't enable interrupts while holding a
			 * spin lock.
			 */
			if (td->td_md.md_spinlock_count == 0)
				enable_intr();
		}
	}

	code = frame->tf_err;

        if (ISPL(frame->tf_cs) == SEL_UPL) {
		/* user trap */

		td->td_pticks = 0;
		td->td_frame = frame;
		addr = frame->tf_rip;
		if (td->td_ucred != p->p_ucred) 
			cred_update_thread(td);

		switch (type) {
		case T_PRIVINFLT:	/* privileged instruction fault */
			i = SIGILL;
			ucode = ILL_PRVOPC;
			break;

		case T_BPTFLT:		/* bpt instruction fault */
		case T_TRCTRAP:		/* trace trap */
			enable_intr();
#ifdef KDTRACE_HOOKS
			if (type == T_BPTFLT) {
				fill_frame_regs(frame, &regs);
				if (dtrace_pid_probe_ptr != NULL &&
				    dtrace_pid_probe_ptr(&regs) == 0)
					goto out;
			}
#endif
			frame->tf_rflags &= ~PSL_T;
			i = SIGTRAP;
			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
			ucode = fputrap_x87();
			if (ucode == -1)
				goto userout;
			i = SIGFPE;
			break;

		case T_PROTFLT:		/* general protection fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_STKFLT:		/* stack fault */
		case T_SEGNPFLT:	/* segment not present fault */
			i = SIGBUS;
			ucode = BUS_ADRERR;
			break;
		case T_TSSFLT:		/* invalid TSS fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_ALIGNFLT:
			i = SIGBUS;
			ucode = BUS_ADRALN;
			break;
		case T_DOUBLEFLT:	/* double fault */
		default:
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;

		case T_PAGEFLT:		/* page fault */
			addr = frame->tf_addr;
			i = trap_pfault(frame, TRUE);
			if (i == -1)
				goto userout;
			if (i == 0)
				goto user;

			if (i == SIGSEGV)
				ucode = SEGV_MAPERR;
			else {
				if (prot_fault_translation == 0) {
					/*
					 * Autodetect.
					 * This check also covers the images
					 * without the ABI-tag ELF note.
					 */
					if (SV_CURPROC_ABI() == SV_ABI_FREEBSD
					    && p->p_osrel >= P_OSREL_SIGSEGV) {
						i = SIGSEGV;
						ucode = SEGV_ACCERR;
					} else {
						i = SIGBUS;
						ucode = BUS_PAGE_FAULT;
					}
				} else if (prot_fault_translation == 1) {
					/*
					 * Always compat mode.
					 */
					i = SIGBUS;
					ucode = BUS_PAGE_FAULT;
				} else {
					/*
					 * Always SIGSEGV mode.
					 */
					i = SIGSEGV;
					ucode = SEGV_ACCERR;
				}
			}
			break;

		case T_DIVIDE:		/* integer divide fault */
			ucode = FPE_INTDIV;
			i = SIGFPE;
			break;

#ifdef DEV_ISA
		case T_NMI:
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto userout;
			} else if (panic_on_nmi)
				panic("NMI indicates hardware failure");
			break;
#endif /* DEV_ISA */

		case T_OFLOW:		/* integer overflow fault */
			ucode = FPE_INTOVF;
			i = SIGFPE;
			break;

		case T_BOUND:		/* bounds check fault */
			ucode = FPE_FLTSUB;
			i = SIGFPE;
			break;

		case T_DNA:
			/* transparent fault (due to context switch "late") */
			KASSERT(PCB_USER_FPU(td->td_pcb),
			    ("kernel FPU ctx has leaked"));
			fpudna();
			goto userout;

		case T_FPOPFLT:		/* FPU operand fetch fault */
			ucode = ILL_COPROC;
			i = SIGILL;
			break;

		case T_XMMFLT:		/* SIMD floating-point exception */
			ucode = fputrap_sse();
			if (ucode == -1)
				goto userout;
			i = SIGFPE;
			break;
#ifdef KDTRACE_HOOKS
		case T_DTRACE_RET:
			enable_intr();
			fill_frame_regs(frame, &regs);
			if (dtrace_return_probe_ptr != NULL &&
			    dtrace_return_probe_ptr(&regs) == 0)
				goto out;
			break;
#endif
		}
	} else {
		/* kernel trap */

		KASSERT(cold || td->td_ucred != NULL,
		    ("kernel trap doesn't have ucred"));
		switch (type) {
		case T_PAGEFLT:			/* page fault */
			(void) trap_pfault(frame, FALSE);
			goto out;

		case T_DNA:
			KASSERT(!PCB_USER_FPU(td->td_pcb),
			    ("Unregistered use of FPU in kernel"));
			fpudna();
			goto out;

		case T_ARITHTRAP:	/* arithmetic trap */
		case T_XMMFLT:		/* SIMD floating-point exception */
		case T_FPOPFLT:		/* FPU operand fetch fault */
			/*
			 * For now, supporting kernel handler
			 * registration for FPU traps is overkill.
			 */
			trap_fatal(frame, 0);
			goto out;

		case T_STKFLT:		/* stack fault */
		case T_PROTFLT:		/* general protection fault */
		case T_SEGNPFLT:	/* segment not present fault */
			if (td->td_intr_nesting_level != 0)
				break;

			/*
			 * Invalid segment selectors and out of bounds
			 * %rip's and %rsp's can be set up in user mode.
			 * This causes a fault in kernel mode when the
			 * kernel tries to return to user mode.  We want
			 * to get this fault so that we can fix the
			 * problem here and not have to check all the
			 * selectors and pointers when the user changes
			 * them.
			 */
			if (frame->tf_rip == (long)doreti_iret) {
				frame->tf_rip = (long)doreti_iret_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_ds) {
				frame->tf_rip = (long)ds_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_es) {
				frame->tf_rip = (long)es_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_fs) {
				frame->tf_rip = (long)fs_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_gs) {
				frame->tf_rip = (long)gs_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_gsbase) {
				frame->tf_rip = (long)gsbase_load_fault;
				goto out;
			}
			if (frame->tf_rip == (long)ld_fsbase) {
				frame->tf_rip = (long)fsbase_load_fault;
				goto out;
			}
			if (curpcb->pcb_onfault != NULL) {
				frame->tf_rip = (long)curpcb->pcb_onfault;
				goto out;
			}
			break;

		case T_TSSFLT:
			/*
			 * PSL_NT can be set in user mode and isn't cleared
			 * automatically when the kernel is entered.  This
			 * causes a TSS fault when the kernel attempts to
			 * `iret' because the TSS link is uninitialized.  We
			 * want to get this fault so that we can fix the
			 * problem here and not every time the kernel is
			 * entered.
			 */
			if (frame->tf_rflags & PSL_NT) {
				frame->tf_rflags &= ~PSL_NT;
				goto out;
			}
			break;

		case T_TRCTRAP:	 /* trace trap */
			/*
			 * Ignore debug register trace traps due to
			 * accesses in the user's address space, which
			 * can happen under several conditions such as
			 * if a user sets a watchpoint on a buffer and
			 * then passes that buffer to a system call.
			 * We still want to get TRCTRAPS for addresses
			 * in kernel space because that is useful when
			 * debugging the kernel.
			 */
			if (user_dbreg_trap()) {
				/*
				 * Reset breakpoint bits because the
				 * processor doesn't
				 */
				/* XXX check upper bits here */
				load_dr6(rdr6() & 0xfffffff0);
				goto out;
			}
			/*
			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
			 */
		case T_BPTFLT:
			/*
			 * If KDB is enabled, let it handle the debugger trap.
			 * Otherwise, debugger traps "can't happen".
			 */
#ifdef KDB
			if (kdb_trap(type, 0, frame))
				goto out;
#endif
			break;

#ifdef DEV_ISA
		case T_NMI:
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto out;
			} else if (panic_on_nmi == 0)
				goto out;
			/* FALLTHROUGH */
#endif /* DEV_ISA */
		}

		trap_fatal(frame, 0);
		goto out;
	}

	/* Translate fault for emulators (e.g. Linux) */
	if (*p->p_sysent->sv_transtrap)
		i = (*p->p_sysent->sv_transtrap)(i, type);

	ksiginfo_init_trap(&ksi);
	ksi.ksi_signo = i;
	ksi.ksi_code = ucode;
	ksi.ksi_trapno = type;
	ksi.ksi_addr = (void *)addr;
	if (uprintf_signal) {
		uprintf("pid %d comm %s: signal %d err %lx code %d type %d "
		    "addr 0x%lx rsp 0x%lx rip 0x%lx "
		    "<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
		    p->p_pid, p->p_comm, i, frame->tf_err, ucode, type, addr,
		    frame->tf_rsp, frame->tf_rip,
		    fubyte((void *)(frame->tf_rip + 0)),
		    fubyte((void *)(frame->tf_rip + 1)),
		    fubyte((void *)(frame->tf_rip + 2)),
		    fubyte((void *)(frame->tf_rip + 3)),
		    fubyte((void *)(frame->tf_rip + 4)),
		    fubyte((void *)(frame->tf_rip + 5)),
		    fubyte((void *)(frame->tf_rip + 6)),
		    fubyte((void *)(frame->tf_rip + 7)));
	}
	KASSERT((read_rflags() & PSL_I) != 0, ("interrupts disabled"));
	trapsignal(td, &ksi);

user:
	userret(td, frame);
	KASSERT(PCB_USER_FPU(td->td_pcb),
	    ("Return from trap with kernel FPU ctx leaked"));
userout:
out:
	return;
}
Example #5
0
/* HVM mode suspension. */
static void
xctrl_suspend()
{
#ifdef SMP
	cpuset_t cpu_suspend_map;
#endif
	int suspend_cancelled;

	EVENTHANDLER_INVOKE(power_suspend);

	if (smp_started) {
		thread_lock(curthread);
		sched_bind(curthread, 0);
		thread_unlock(curthread);
	}
	KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));

	/*
	 * Clear our XenStore node so the toolstack knows we are
	 * responding to the suspend request.
	 */
	xs_write(XST_NIL, "control", "shutdown", "");

	/*
	 * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
	 * drivers need this.
	 */
	mtx_lock(&Giant);
	if (DEVICE_SUSPEND(root_bus) != 0) {
		mtx_unlock(&Giant);
		printf("%s: device_suspend failed\n", __func__);
		return;
	}
	mtx_unlock(&Giant);

#ifdef SMP
	CPU_ZERO(&cpu_suspend_map);	/* silence gcc */
	if (smp_started) {
		/*
		 * Suspend other CPUs. This prevents IPIs while we
		 * are resuming, and will allow us to reset per-cpu
		 * vcpu_info on resume.
		 */
		cpu_suspend_map = all_cpus;
		CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
		if (!CPU_EMPTY(&cpu_suspend_map))
			suspend_cpus(cpu_suspend_map);
	}
#endif

	/*
	 * Prevent any races with evtchn_interrupt() handler.
	 */
	disable_intr();
	intr_suspend();
	xen_hvm_suspend();

	suspend_cancelled = HYPERVISOR_suspend(0);

	xen_hvm_resume(suspend_cancelled != 0);
	intr_resume(suspend_cancelled != 0);
	enable_intr();

	/*
	 * Reset grant table info.
	 */
	gnttab_resume();

#ifdef SMP
	if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
		/*
		 * Now that event channels have been initialized,
		 * resume CPUs.
		 */
		resume_cpus(cpu_suspend_map);
	}
#endif

	/*
	 * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
	 * similar.
	 */
	mtx_lock(&Giant);
	DEVICE_RESUME(root_bus);
	mtx_unlock(&Giant);

	if (smp_started) {
		thread_lock(curthread);
		sched_unbind(curthread);
		thread_unlock(curthread);
	}

	EVENTHANDLER_INVOKE(power_resume);

	if (bootverbose)
		printf("System resumed after suspension\n");

}
Example #6
0
void
trap(struct trapframe *frame)
{
	struct thread *td = curthread;
	struct proc *p = td->td_proc;
	int i = 0, ucode = 0, code;
	u_int type;
	register_t addr = 0;
	ksiginfo_t ksi;

	PCPU_INC(cnt.v_trap);
	type = frame->tf_trapno;

#ifdef SMP
#ifdef STOP_NMI
	/* Handler for NMI IPIs used for stopping CPUs. */
	if (type == T_NMI) {
	         if (ipi_nmi_handler() == 0)
	                   goto out;
	}
#endif /* STOP_NMI */
#endif /* SMP */

#ifdef KDB
	if (kdb_active) {
		kdb_reenter();
		goto out;
	}
#endif

#ifdef	HWPMC_HOOKS
	/*
	 * CPU PMCs interrupt using an NMI.  If the PMC module is
	 * active, pass the 'rip' value to the PMC module's interrupt
	 * handler.  A return value of '1' from the handler means that
	 * the NMI was handled by it and we can return immediately.
	 */
	if (type == T_NMI && pmc_intr &&
	    (*pmc_intr)(PCPU_GET(cpuid), frame))
		goto out;
#endif

	if (type == T_MCHK) {
		if (!mca_intr())
			trap_fatal(frame, 0);
		goto out;
	}

#ifdef KDTRACE_HOOKS
	/*
	 * A trap can occur while DTrace executes a probe. Before
	 * executing the probe, DTrace blocks re-scheduling and sets
	 * a flag in it's per-cpu flags to indicate that it doesn't
	 * want to fault. On returning from the the probe, the no-fault
	 * flag is cleared and finally re-scheduling is enabled.
	 *
	 * If the DTrace kernel module has registered a trap handler,
	 * call it and if it returns non-zero, assume that it has
	 * handled the trap and modified the trap frame so that this
	 * function can return normally.
	 */
	if (dtrace_trap_func != NULL)
		if ((*dtrace_trap_func)(frame, type))
			goto out;
#endif

	if ((frame->tf_rflags & PSL_I) == 0) {
		/*
		 * Buggy application or kernel code has disabled
		 * interrupts and then trapped.  Enabling interrupts
		 * now is wrong, but it is better than running with
		 * interrupts disabled until they are accidentally
		 * enabled later.
		 */
		if (ISPL(frame->tf_cs) == SEL_UPL)
			printf(
			    "pid %ld (%s): trap %d with interrupts disabled\n",
			    (long)curproc->p_pid, curproc->p_comm, type);
		else if (type != T_NMI && type != T_BPTFLT &&
		    type != T_TRCTRAP) {
			/*
			 * XXX not quite right, since this may be for a
			 * multiple fault in user mode.
			 */
			printf("kernel trap %d with interrupts disabled\n",
			    type);
			/*
			 * We shouldn't enable interrupts while holding a
			 * spin lock or servicing an NMI.
			 */
			if (type != T_NMI && td->td_md.md_spinlock_count == 0)
				enable_intr();
		}
	}

	code = frame->tf_err;
	if (type == T_PAGEFLT) {
		/*
		 * If we get a page fault while in a critical section, then
		 * it is most likely a fatal kernel page fault.  The kernel
		 * is already going to panic trying to get a sleep lock to
		 * do the VM lookup, so just consider it a fatal trap so the
		 * kernel can print out a useful trap message and even get
		 * to the debugger.
		 *
		 * If we get a page fault while holding a non-sleepable
		 * lock, then it is most likely a fatal kernel page fault.
		 * If WITNESS is enabled, then it's going to whine about
		 * bogus LORs with various VM locks, so just skip to the
		 * fatal trap handling directly.
		 */
		if (td->td_critnest != 0 ||
		    WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
		    "Kernel page fault") != 0)
			trap_fatal(frame, frame->tf_addr);
	}

        if (ISPL(frame->tf_cs) == SEL_UPL) {
		/* user trap */

		td->td_pticks = 0;
		td->td_frame = frame;
		addr = frame->tf_rip;
		if (td->td_ucred != p->p_ucred) 
			cred_update_thread(td);

		switch (type) {
		case T_PRIVINFLT:	/* privileged instruction fault */
			i = SIGILL;
			ucode = ILL_PRVOPC;
			break;

		case T_BPTFLT:		/* bpt instruction fault */
		case T_TRCTRAP:		/* trace trap */
			enable_intr();
			frame->tf_rflags &= ~PSL_T;
			i = SIGTRAP;
			ucode = (type == T_TRCTRAP ? TRAP_TRACE : TRAP_BRKPT);
			break;

		case T_ARITHTRAP:	/* arithmetic trap */
			ucode = fputrap();
			if (ucode == -1)
				goto userout;
			i = SIGFPE;
			break;

		case T_PROTFLT:		/* general protection fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_STKFLT:		/* stack fault */
		case T_SEGNPFLT:	/* segment not present fault */
			i = SIGBUS;
			ucode = BUS_ADRERR;
			break;
		case T_TSSFLT:		/* invalid TSS fault */
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;
		case T_DOUBLEFLT:	/* double fault */
		default:
			i = SIGBUS;
			ucode = BUS_OBJERR;
			break;

		case T_PAGEFLT:		/* page fault */
			addr = frame->tf_addr;
#ifdef KSE
			if (td->td_pflags & TDP_SA)
				thread_user_enter(td);
#endif
			i = trap_pfault(frame, TRUE);
			if (i == -1)
				goto userout;
			if (i == 0)
				goto user;

			if (i == SIGSEGV)
				ucode = SEGV_MAPERR;
			else {
				if (prot_fault_translation == 0) {
					/*
					 * Autodetect.
					 * This check also covers the images
					 * without the ABI-tag ELF note.
					 */
					if ((curproc->p_sysent ==
					    &elf64_freebsd_sysvec
#ifdef COMPAT_IA32
					    || curproc->p_sysent ==
					    &ia32_freebsd_sysvec
#endif
					    ) && p->p_osrel >= 700004) {
						i = SIGSEGV;
						ucode = SEGV_ACCERR;
					} else {
						i = SIGBUS;
						ucode = BUS_PAGE_FAULT;
					}
				} else if (prot_fault_translation == 1) {
					/*
					 * Always compat mode.
					 */
					i = SIGBUS;
					ucode = BUS_PAGE_FAULT;
				} else {
					/*
					 * Always SIGSEGV mode.
					 */
					i = SIGSEGV;
					ucode = SEGV_ACCERR;
				}
			}
			break;

		case T_DIVIDE:		/* integer divide fault */
			ucode = FPE_INTDIV;
			i = SIGFPE;
			break;

#ifdef DEV_ISA
		case T_NMI:
			/* machine/parity/power fail/"kitchen sink" faults */
			/* XXX Giant */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto userout;
			} else if (panic_on_nmi)
				panic("NMI indicates hardware failure");
			break;
#endif /* DEV_ISA */

		case T_OFLOW:		/* integer overflow fault */
			ucode = FPE_INTOVF;
			i = SIGFPE;
			break;

		case T_BOUND:		/* bounds check fault */
			ucode = FPE_FLTSUB;
			i = SIGFPE;
			break;

		case T_DNA:
			/* transparent fault (due to context switch "late") */
			fpudna();
			goto userout;

		case T_FPOPFLT:		/* FPU operand fetch fault */
			ucode = ILL_COPROC;
			i = SIGILL;
			break;

		case T_XMMFLT:		/* SIMD floating-point exception */
			ucode = 0; /* XXX */
			i = SIGFPE;
			break;
		}
	} else {
		/* kernel trap */

		KASSERT(cold || td->td_ucred != NULL,
		    ("kernel trap doesn't have ucred"));
		switch (type) {
		case T_PAGEFLT:			/* page fault */
			(void) trap_pfault(frame, FALSE);
			goto out;

		case T_DNA:
			/*
			 * The kernel is apparently using fpu for copying.
			 * XXX this should be fatal unless the kernel has
			 * registered such use.
			 */
			fpudna();
			printf("fpudna in kernel mode!\n");
			goto out;

		case T_STKFLT:		/* stack fault */
			break;

		case T_PROTFLT:		/* general protection fault */
		case T_SEGNPFLT:	/* segment not present fault */
			if (td->td_intr_nesting_level != 0)
				break;

			/*
			 * Invalid segment selectors and out of bounds
			 * %rip's and %rsp's can be set up in user mode.
			 * This causes a fault in kernel mode when the
			 * kernel tries to return to user mode.  We want
			 * to get this fault so that we can fix the
			 * problem here and not have to check all the
			 * selectors and pointers when the user changes
			 * them.
			 */
			if (frame->tf_rip == (long)doreti_iret) {
				frame->tf_rip = (long)doreti_iret_fault;
				goto out;
			}
			if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
				frame->tf_rip =
				    (long)PCPU_GET(curpcb)->pcb_onfault;
				goto out;
			}
			break;

		case T_TSSFLT:
			/*
			 * PSL_NT can be set in user mode and isn't cleared
			 * automatically when the kernel is entered.  This
			 * causes a TSS fault when the kernel attempts to
			 * `iret' because the TSS link is uninitialized.  We
			 * want to get this fault so that we can fix the
			 * problem here and not every time the kernel is
			 * entered.
			 */
			if (frame->tf_rflags & PSL_NT) {
				frame->tf_rflags &= ~PSL_NT;
				goto out;
			}
			break;

		case T_TRCTRAP:	 /* trace trap */
			/*
			 * Ignore debug register trace traps due to
			 * accesses in the user's address space, which
			 * can happen under several conditions such as
			 * if a user sets a watchpoint on a buffer and
			 * then passes that buffer to a system call.
			 * We still want to get TRCTRAPS for addresses
			 * in kernel space because that is useful when
			 * debugging the kernel.
			 */
			if (user_dbreg_trap()) {
				/*
				 * Reset breakpoint bits because the
				 * processor doesn't
				 */
				/* XXX check upper bits here */
				load_dr6(rdr6() & 0xfffffff0);
				goto out;
			}
			/*
			 * FALLTHROUGH (TRCTRAP kernel mode, kernel address)
			 */
		case T_BPTFLT:
			/*
			 * If KDB is enabled, let it handle the debugger trap.
			 * Otherwise, debugger traps "can't happen".
			 */
#ifdef KDB
			if (kdb_trap(type, 0, frame))
				goto out;
#endif
			break;

#ifdef DEV_ISA
		case T_NMI:
			/* XXX Giant */
			/* machine/parity/power fail/"kitchen sink" faults */
			if (isa_nmi(code) == 0) {
#ifdef KDB
				/*
				 * NMI can be hooked up to a pushbutton
				 * for debugging.
				 */
				if (kdb_on_nmi) {
					printf ("NMI ... going to debugger\n");
					kdb_trap(type, 0, frame);
				}
#endif /* KDB */
				goto out;
			} else if (panic_on_nmi == 0)
				goto out;
			/* FALLTHROUGH */
#endif /* DEV_ISA */
		}

		trap_fatal(frame, 0);
		goto out;
	}

	/* Translate fault for emulators (e.g. Linux) */
	if (*p->p_sysent->sv_transtrap)
		i = (*p->p_sysent->sv_transtrap)(i, type);

	ksiginfo_init_trap(&ksi);
	ksi.ksi_signo = i;
	ksi.ksi_code = ucode;
	ksi.ksi_trapno = type;
	ksi.ksi_addr = (void *)addr;
	trapsignal(td, &ksi);

#ifdef DEBUG
	if (type <= MAX_TRAP_MSG) {
		uprintf("fatal process exception: %s",
			trap_msg[type]);
		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
			uprintf(", fault VA = 0x%lx", frame->tf_addr);
		uprintf("\n");
	}
#endif

user:
	userret(td, frame);
	mtx_assert(&Giant, MA_NOTOWNED);
userout:
out:
	return;
}
Example #7
0
vmxnet3::vmxnet3(hw::PCI_Device& d, const uint16_t mtu) :
    Link(Link_protocol{{this, &vmxnet3::transmit}, mac()}, bufstore_),
    m_pcidev(d), m_mtu(mtu), bufstore_{1024, buffer_size_for_mtu(mtu)}
{
  INFO("vmxnet3", "Driver initializing (rev=%#x)", d.rev_id());
  assert(d.rev_id() == REVISION_ID);

  // find and store capabilities
  d.parse_capabilities();
  // find BARs etc.
  d.probe_resources();

  if (d.msix_cap())
  {
    d.init_msix();
    uint8_t msix_vectors = d.get_msix_vectors();
    INFO2("[x] Device has %u MSI-X vectors", msix_vectors);
    assert(msix_vectors >= 3);
    if (msix_vectors > 2 + NUM_RX_QUEUES) msix_vectors = 2 + NUM_RX_QUEUES;

    for (int i = 0; i < msix_vectors; i++)
    {
      auto irq = Events::get().subscribe(nullptr);
      this->irqs.push_back(irq);
      d.setup_msix_vector(SMP::cpu_id(), IRQ_BASE + irq);
    }

    Events::get().subscribe(irqs[0], {this, &vmxnet3::msix_evt_handler});
    Events::get().subscribe(irqs[1], {this, &vmxnet3::msix_xmit_handler});
    for (int q = 0; q < NUM_RX_QUEUES; q++)
    Events::get().subscribe(irqs[2 + q], {this, &vmxnet3::msix_recv_handler});
  }
  else {
    assert(0 && "This driver does not support legacy IRQs");
  }

  // dma areas
  this->iobase = d.get_bar(PCI_BAR_VD);
  assert(this->iobase);
  this->ptbase = d.get_bar(PCI_BAR_PT);
  assert(this->ptbase);

  // verify and select version
  bool ok = check_version();
  assert(ok);

  // reset device
  ok = reset();
  assert(ok);

  // get mac address
  retrieve_hwaddr();

  // check link status
  auto link_spd = check_link();
  if (link_spd) {
    INFO2("Link up at %u Mbps", link_spd);
  }
  else {
    INFO2("LINK DOWN! :(");
    return;
  }

  // set MAC
  set_hwaddr(this->hw_addr);

  // initialize DMA areas
  this->dma = (vmxnet3_dma*) memalign(VMXNET3_DMA_ALIGN, sizeof(vmxnet3_dma));
  memset(this->dma, 0, sizeof(vmxnet3_dma));

  auto& queues = dma->queues;
  // setup tx queues
  queues.tx.cfg.desc_address = (uintptr_t) &dma->tx_desc;
  queues.tx.cfg.comp_address = (uintptr_t) &dma->tx_comp;
  queues.tx.cfg.num_desc     = vmxnet3::NUM_TX_DESC;
  queues.tx.cfg.num_comp     = VMXNET3_NUM_TX_COMP;
  queues.tx.cfg.intr_index   = 1;
  // temp rxq buffer storage
  memset(tx.buffers, 0, sizeof(tx.buffers));

  // setup rx queues
  for (int q = 0; q < NUM_RX_QUEUES; q++)
  {
    memset(rx[q].buffers, 0, sizeof(rx[q].buffers));
    rx[q].desc0 = &dma->rx0_desc[0];
    rx[q].desc1 = &dma->rx1_desc[0];
    rx[q].comp  = &dma->rx_comp[0];
    rx[q].index = q;

    auto& queue = queues.rx[q];
    queue.cfg.desc_address[0] = (uintptr_t) rx[q].desc0;
    queue.cfg.desc_address[1] = (uintptr_t) rx[q].desc1;
    queue.cfg.comp_address    = (uintptr_t) rx[q].comp;
    queue.cfg.num_desc[0]  = vmxnet3::NUM_RX_DESC;
    queue.cfg.num_desc[1]  = vmxnet3::NUM_RX_DESC;
    queue.cfg.num_comp     = VMXNET3_NUM_RX_COMP;
    queue.cfg.driver_data_len = sizeof(vmxnet3_rx_desc)
                          + 2 * sizeof(vmxnet3_rx_desc);
    queue.cfg.intr_index = 2 + q;
  }

  auto& shared = dma->shared;
  // setup shared physical area
  shared.magic = VMXNET3_REV1_MAGIC;
  shared.misc.guest_info.arch =
      (sizeof(void*) == 4) ? GOS_BITS_32_BITS : GOS_BITS_64_BITS;
  shared.misc.guest_info.type = GOS_TYPE_LINUX;
  shared.misc.version         = VMXNET3_VERSION_MAGIC;
  shared.misc.version_support     = 1;
  shared.misc.upt_version_support = 1;
  shared.misc.upt_features        = UPT1_F_RXVLAN;
  shared.misc.driver_data_address = (uintptr_t) &dma;
  shared.misc.queue_desc_address  = (uintptr_t) &dma->queues;
  shared.misc.driver_data_len     = sizeof(vmxnet3_dma);
  shared.misc.queue_desc_len      = sizeof(vmxnet3_queues);
  shared.misc.mtu = packet_len(); // 60-9000
  shared.misc.num_tx_queues  = 1;
  shared.misc.num_rx_queues  = NUM_RX_QUEUES;
  shared.interrupt.mask_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
  shared.interrupt.num_intrs = 2 + NUM_RX_QUEUES;
  shared.interrupt.event_intr_index = 0;
  memset(shared.interrupt.moderation_level, UPT1_IML_ADAPTIVE, VMXNET3_MAX_INTRS);
  shared.interrupt.control   = 0x1; // disable all
  shared.rx_filter.mode =
      VMXNET3_RXM_UCAST | VMXNET3_RXM_BCAST | VMXNET3_RXM_ALL_MULTI;

  // location of shared area to device
  uintptr_t shabus = (uintptr_t) &shared;
  mmio_write32(this->iobase + 0x10, shabus); // shared low
  mmio_write32(this->iobase + 0x18, 0x0);    // shared high

  // activate device
  int status = command(VMXNET3_CMD_ACTIVATE_DEV);
  if (status) {
    assert(0 && "Failed to activate device");
  }

  // initialize and fill RX queue...
  for (int q = 0; q < NUM_RX_QUEUES; q++)
  {
    refill(rx[q]);
  }

  // deferred transmit
  this->deferred_irq = Events::get().subscribe(handle_deferred);

  // enable interrupts
  enable_intr(0);
  enable_intr(1);
  for (int q = 0; q < NUM_RX_QUEUES; q++)
      enable_intr(2 + q);
}
Example #8
0
void timer_init() {
  timer->c[1] = timer->clo + 0x100000;
  enable_intr(timer_intr);
};