Example #1
0
/*
 * Make sure any stale selectors are cleared from the segment registers
 * by putting KDS_SEL (the kernel's default %ds gdt selector) into them.
 * This is necessary because the kernel itself does not use %es, %fs, nor
 * %ds. (%cs and %ss are necessary, and are set up by the kernel - along with
 * %gs - to point to the current cpu struct.) If we enter kmdb while in the
 * kernel and resume with a stale ldt or brandz selector sitting there in a
 * segment register, kmdb will #gp fault if the stale selector points to,
 * for example, an ldt in the context of another process.
 *
 * WARNING: Intel and AMD chips behave differently when storing
 * the null selector into %fs and %gs while in long mode. On AMD
 * chips fsbase and gsbase are not cleared. But on Intel chips, storing
 * a null selector into %fs or %gs has the side effect of clearing
 * fsbase or gsbase. For that reason we use KDS_SEL, which has
 * consistent behavor between AMD and Intel.
 *
 * Caller responsible for preventing cpu migration.
 */
void
reset_sregs(void)
{
	ulong_t kgsbase = (ulong_t)CPU;

	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);

	cli();
	__set_gs(KGS_SEL);

	/*
	 * restore kernel gsbase
	 */
#if defined(__xpv)
	xen_set_segment_base(SEGBASE_GS_KERNEL, kgsbase);
#else
	wrmsr(MSR_AMD_GSBASE, kgsbase);
#endif

	sti();

	__set_ds(KDS_SEL);
	__set_es(0 | SEL_KPL);	/* selector RPL not ring 0 on hypervisor */
	__set_fs(KFS_SEL);
}
Example #2
0
/*
 *	switch_to(x,y) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
 * The return value (in %ax) will be the "prev" task after
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
	struct tss_struct *tss = init_tss + cpu;
	fpu_switch_t fpu;

	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

	fpu = switch_fpu_prepare(prev_p, next_p, cpu);

	/*
	 * Reload esp0.
	 */
	load_sp0(tss, next);

	/*
	 * Save away %gs. No need to save %fs, as it was saved on the
	 * stack on entry.  No need to save %es and %ds, as those are
	 * always kernel segments while inside the kernel.  Doing this
	 * before setting the new TLS descriptors avoids the situation
	 * where we temporarily have non-reloadable segments in %fs
	 * and %gs.  This could be an issue if the NMI handler ever
	 * used %fs or %gs (it does not today), or if the kernel is
	 * running inside of a hypervisor layer.
	 */
	lazy_save_gs(prev->gs);

#ifdef CONFIG_PAX_MEMORY_UDEREF
	__set_fs(task_thread_info(next_p)->addr_limit);
#endif

	/*
	 * Load the per-thread Thread-Local Storage descriptor.
	 */
	load_TLS(next, cpu);

	/*
	 * Restore IOPL if needed.  In normal use, the flags restore
	 * in the switch assembly will handle this.  But if the kernel
	 * is running virtualized at a non-zero CPL, the popf will
	 * not restore flags, so it must be done in a separate step.
	 */
	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
		set_iopl_mask(next->iopl);

	/*
	 * Now maybe handle debug registers and/or IO bitmaps
	 */
	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
		__switch_to_xtra(prev_p, next_p, tss);

	switch_kmaps(prev_p, next_p);

	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_end_context_switch(next_p);

	this_cpu_write(current_task, next_p);
	this_cpu_write(current_tinfo, &next_p->tinfo);

	/*
	 * Restore %gs if needed (which is common)
	 */
	if (prev->gs | next->gs)
		lazy_load_gs(next->gs);

	switch_fpu_finish(next_p, fpu);

	return prev_p;
}
Example #3
0
/*
 * Update the segment registers with new values from the pcb.
 *
 * We have to do this carefully, and in the following order,
 * in case any of the selectors points at a bogus descriptor.
 * If they do, we'll catch trap with on_trap and return 1.
 * returns 0 on success.
 *
 * This is particularly tricky for %gs.
 * This routine must be executed under a cli.
 */
int
update_sregs(struct regs *rp,  klwp_t *lwp)
{
	pcb_t *pcb = &lwp->lwp_pcb;
	ulong_t	kgsbase;
	on_trap_data_t	otd;
	int rc = 0;

	if (!on_trap(&otd, OT_SEGMENT_ACCESS)) {

#if defined(__xpv)
		/*
		 * On the hyervisor this is easy. The hypercall below will
		 * swapgs and load %gs with the user selector. If the user
		 * selector is bad the hypervisor will catch the fault and
		 * load %gs with the null selector instead. Either way the
		 * kernel's gsbase is not damaged.
		 */
		kgsbase = (ulong_t)CPU;
		if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL,
		    pcb->pcb_gs) != 0) {
				no_trap();
				return (1);
		}

		rp->r_gs = pcb->pcb_gs;
		ASSERT((cpu_t *)kgsbase == CPU);

#else	/* __xpv */

		/*
		 * A little more complicated running native.
		 */
		kgsbase = (ulong_t)CPU;
		__set_gs(pcb->pcb_gs);

		/*
		 * If __set_gs fails it's because the new %gs is a bad %gs,
		 * we'll be taking a trap but with the original %gs and %gsbase
		 * undamaged (i.e. pointing at curcpu).
		 *
		 * We've just mucked up the kernel's gsbase.  Oops.  In
		 * particular we can't take any traps at all.  Make the newly
		 * computed gsbase be the hidden gs via __swapgs, and fix
		 * the kernel's gsbase back again. Later, when we return to
		 * userland we'll swapgs again restoring gsbase just loaded
		 * above.
		 */
		__swapgs();
		rp->r_gs = pcb->pcb_gs;

		/*
		 * restore kernel's gsbase
		 */
		wrmsr(MSR_AMD_GSBASE, kgsbase);

#endif	/* __xpv */

		/*
		 * Only override the descriptor base address if
		 * r_gs == LWPGS_SEL or if r_gs == NULL. A note on
		 * NULL descriptors -- 32-bit programs take faults
		 * if they deference NULL descriptors; however,
		 * when 64-bit programs load them into %fs or %gs,
		 * they DONT fault -- only the base address remains
		 * whatever it was from the last load.   Urk.
		 *
		 * XXX - note that lwp_setprivate now sets %fs/%gs to the
		 * null selector for 64 bit processes. Whereas before
		 * %fs/%gs were set to LWP(FS|GS)_SEL regardless of
		 * the process's data model. For now we check for both
		 * values so that the kernel can also support the older
		 * libc. This should be ripped out at some point in the
		 * future.
		 */
		if (pcb->pcb_gs == LWPGS_SEL || pcb->pcb_gs == 0) {
#if defined(__xpv)
			if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER,
			    pcb->pcb_gsbase)) {
				no_trap();
				return (1);
			}
#else
			wrmsr(MSR_AMD_KGSBASE, pcb->pcb_gsbase);
#endif
		}

		__set_ds(pcb->pcb_ds);
		rp->r_ds = pcb->pcb_ds;

		__set_es(pcb->pcb_es);
		rp->r_es = pcb->pcb_es;

		__set_fs(pcb->pcb_fs);
		rp->r_fs = pcb->pcb_fs;

		/*
		 * Same as for %gs
		 */
		if (pcb->pcb_fs == LWPFS_SEL || pcb->pcb_fs == 0) {
#if defined(__xpv)
			if (HYPERVISOR_set_segment_base(SEGBASE_FS,
			    pcb->pcb_fsbase)) {
				no_trap();
				return (1);
			}
#else
			wrmsr(MSR_AMD_FSBASE, pcb->pcb_fsbase);
#endif
		}

	} else {
		cli();
		rc = 1;
	}
	no_trap();
	return (rc);
}