Exemple #1
0
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
	int cpu;

	set_user_gs(regs, 0);

	regs->fs		= 0;
	set_fs(USER_DS);
	regs->ds		= __USER_DS;
	regs->es		= __USER_DS;
	regs->ss		= __USER_DS;
	regs->cs		= __USER_CS;
	regs->ip		= new_ip;
	regs->sp		= new_sp;

	cpu = get_cpu();
	load_user_cs_desc(cpu, current->mm);
	put_cpu();

	/*
	 * Free the old FP and other extended state
	 */
	free_thread_xstate(current);
}
Exemple #2
0
fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
{
	unsigned long cpu;

	cpu = get_cpu();
	if (current->active_mm)
		load_user_cs_desc(cpu, current->active_mm);

	if (!cpu_isset(cpu, flush_cpumask))
		goto out;
		/* 
		 * This was a BUG() but until someone can quote me the
		 * line from the intel manual that guarantees an IPI to
		 * multiple CPUs is retried _only_ on the erroring CPUs
		 * its staying as a return
		 *
		 * BUG();
		 */
		 
	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
			if (flush_va == FLUSH_ALL)
				local_flush_tlb();
			else
				__flush_tlb_one(flush_va);
		} else
			leave_mm(cpu);
	}
	ack_APIC_irq();
	smp_mb__before_clear_bit();
	cpu_clear(cpu, flush_cpumask);
	smp_mb__after_clear_bit();
out:
	put_cpu_no_resched();
}
Exemple #3
0
static void modify_cs(struct mm_struct *mm, unsigned long limit)
{
	mm->context.exec_limit = limit;
	set_user_cs(&mm->context.user_cs, limit);
	if (mm == current->mm) {
		int cpu;

		cpu = get_cpu();
		load_user_cs_desc(cpu, mm);
		put_cpu();
	}
}
Exemple #4
0
/*
 * lazy-check for CS validity on exec-shield binaries:
 *
 * the original non-exec stack patch was written by
 * Solar Designer <solar at openwall.com>. Thanks!
 */
static int
check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
{
	struct desc_struct *desc1, *desc2;
	struct vm_area_struct *vma;
	unsigned long limit;

	if (current->mm == NULL)
		return 0;

	limit = -1UL;
	if (current->mm->context.exec_limit != -1UL) {
		limit = PAGE_SIZE;
		spin_lock(&current->mm->page_table_lock);
		for (vma = current->mm->mmap; vma; vma = vma->vm_next)
			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
				limit = vma->vm_end;
		vma = get_gate_vma(current->mm);
		if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
			limit = vma->vm_end;
		spin_unlock(&current->mm->page_table_lock);
		if (limit >= TASK_SIZE)
			limit = -1UL;
		current->mm->context.exec_limit = limit;
	}
	set_user_cs(&current->mm->context.user_cs, limit);

	desc1 = &current->mm->context.user_cs;
	desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;

	if (__compare_user_cs_desc(desc1, desc2)) {
		/*
		 * The CS was not in sync - reload it and retry the
		 * instruction. If the instruction still faults then
		 * we won't hit this branch next time around.
		 */
		if (print_fatal_signals >= 2) {
			printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
				error_code, error_code/8, regs->ip,
				smp_processor_id());
			printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
				current->mm->context.exec_limit,
				desc1->a, desc1->b, desc2->a, desc2->b);
		}

		load_user_cs_desc(cpu, current->mm);

		return 1;
	}

	return 0;
}
Exemple #5
0
asmlinkage
#endif
void smp_invalidate_interrupt(struct pt_regs *regs)
{
	unsigned int cpu;
	unsigned int sender;
	union smp_flush_state *f;

	cpu = smp_processor_id();

#ifdef CONFIG_X86_32
	if (current->active_mm)
		load_user_cs_desc(cpu, current->active_mm);
#endif

	/*
	 * orig_rax contains the negated interrupt vector.
	 * Use that to determine where the sender put the data.
	 */
	sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
	f = &flush_state[sender];

	if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
		goto out;
		/*
		 * This was a BUG() but until someone can quote me the
		 * line from the intel manual that guarantees an IPI to
		 * multiple CPUs is retried _only_ on the erroring CPUs
		 * its staying as a return
		 *
		 * BUG();
		 */

	if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
		if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
			if (f->flush_va == TLB_FLUSH_ALL)
				local_flush_tlb();
			else
				__flush_tlb_one(f->flush_va);
		} else
			leave_mm(cpu);
	}
out:
	ack_APIC_irq();
	smp_mb__before_clear_bit();
	cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
	smp_mb__after_clear_bit();
	inc_irq_stat(irq_tlb_count);
}
Exemple #6
0
/*
 *	switch_to(x,yn) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
 * The return value (in %ax) will be the "prev" task after
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
__notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
	struct tss_struct *tss = &per_cpu(init_tss, cpu);

	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */

	__unlazy_fpu(prev_p);
	if (next_p->mm)
		load_user_cs_desc(cpu, next_p->mm);

	/* we're going to use this soon, after a few expensive things */
	if (next_p->fpu_counter > 5)
		prefetch(next->xstate);

	/*
	 * Reload esp0.
	 */
	load_sp0(tss, next);

	/*
	 * Save away %gs. No need to save %fs, as it was saved on the
	 * stack on entry.  No need to save %es and %ds, as those are
	 * always kernel segments while inside the kernel.  Doing this
	 * before setting the new TLS descriptors avoids the situation
	 * where we temporarily have non-reloadable segments in %fs
	 * and %gs.  This could be an issue if the NMI handler ever
	 * used %fs or %gs (it does not today), or if the kernel is
	 * running inside of a hypervisor layer.
	 */
	lazy_save_gs(prev->gs);

	/*
	 * Load the per-thread Thread-Local Storage descriptor.
	 */
	load_TLS(next, cpu);

	/*
	 * Restore IOPL if needed.  In normal use, the flags restore
	 * in the switch assembly will handle this.  But if the kernel
	 * is running virtualized at a non-zero CPL, the popf will
	 * not restore flags, so it must be done in a separate step.
	 */
	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
		set_iopl_mask(next->iopl);

	/*
	 * Now maybe handle debug registers and/or IO bitmaps
	 */
	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
		__switch_to_xtra(prev_p, next_p, tss);

	/*
	 * Leave lazy mode, flushing any hypercalls made here.
	 * This must be done before restoring TLS segments so
	 * the GDT and LDT are properly updated, and must be
	 * done before math_state_restore, so the TS bit is up
	 * to date.
	 */
	arch_end_context_switch(next_p);

	/* If the task has used fpu the last 5 timeslices, just do a full
	 * restore of the math state immediately to avoid the trap; the
	 * chances of needing FPU soon are obviously high now
	 *
	 * tsk_used_math() checks prevent calling math_state_restore(),
	 * which can sleep in the case of !tsk_used_math()
	 */
	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
		math_state_restore();

	/*
	 * Restore %gs if needed (which is common)
	 */
	if (prev->gs | next->gs)
		lazy_load_gs(next->gs);

	percpu_write(current_task, next_p);

	return prev_p;
}
Exemple #7
0
/*
 *	switch_to(x,yn) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
 * The return value (in %eax) will be the "prev" task after
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
#ifndef CONFIG_X86_NO_TSS
	struct tss_struct *tss = &per_cpu(init_tss, cpu);
#endif
	struct physdev_set_iopl iopl_op;
	struct physdev_set_iobitmap iobmp_op;
	multicall_entry_t _mcl[8], *mcl = _mcl;

	/* XEN NOTE: FS/GS saved in switch_mm(), not here. */

	/*
	 * This is basically '__unlazy_fpu', except that we queue a
	 * multicall to indicate FPU task switch, rather than
	 * synchronously trapping to Xen.
	 */
	if (prev_p->thread_info->status & TS_USEDFPU) {
		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
		mcl->op      = __HYPERVISOR_fpu_taskswitch;
		mcl->args[0] = 1;
		mcl++;
	}
#if 0 /* lazy fpu sanity check */
	else BUG_ON(!(read_cr0() & 8));
#endif

	if (next_p->mm)
		load_user_cs_desc(cpu, next_p->mm);

	/*
	 * Reload esp0.
	 * This is load_esp0(tss, next) with a multicall.
	 */
	mcl->op      = __HYPERVISOR_stack_switch;
	mcl->args[0] = __KERNEL_DS;
	mcl->args[1] = next->esp0;
	mcl++;

	/*
	 * Load the per-thread Thread-Local Storage descriptor.
	 * This is load_TLS(next, cpu) with multicalls.
	 */
#define C(i) do {							\
	if (unlikely(next->tls_array[i].a != prev->tls_array[i].a ||	\
		     next->tls_array[i].b != prev->tls_array[i].b)) {	\
		mcl->op = __HYPERVISOR_update_descriptor;		\
		*(u64 *)&mcl->args[0] =	virt_to_machine(		\
			&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
		*(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i];	\
		mcl++;							\
	}								\
} while (0)
	C(0); C(1); C(2);
#undef C

	if (unlikely(prev->iopl != next->iopl)) {
		iopl_op.iopl = (next->iopl == 0) ? 1 : (next->iopl >> 12) & 3;
		mcl->op      = __HYPERVISOR_physdev_op;
		mcl->args[0] = PHYSDEVOP_set_iopl;
		mcl->args[1] = (unsigned long)&iopl_op;
		mcl++;
	}
Exemple #8
0
/*
 *	switch_to(x,yn) should switch tasks from x to y.
 *
 * We fsave/fwait so that an exception goes off at the right time
 * (as a call from the fsave or fwait in effect) rather than to
 * the wrong process. Lazy FP saving no longer makes any sense
 * with modern CPU's, and this simplifies a lot of things (SMP
 * and UP become the same).
 *
 * NOTE! We used to use the x86 hardware context switching. The
 * reason for not using it any more becomes apparent when you
 * try to recover gracefully from saved state that is no longer
 * valid (stale segment register values in particular). With the
 * hardware task-switch, there is no way to fix up bad state in
 * a reasonable manner.
 *
 * The fact that Intel documents the hardware task-switching to
 * be slow is a fairly red herring - this code is not noticeably
 * faster. However, there _is_ some room for improvement here,
 * so the performance issues may eventually be a valid point.
 * More important, however, is the fact that this allows us much
 * more flexibility.
 *
 * The return value (in %eax) will be the "prev" task after
 * the task-switch, and shows up in ret_from_fork in entry.S,
 * for example.
 */
struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
	struct thread_struct *prev = &prev_p->thread,
				 *next = &next_p->thread;
	int cpu = smp_processor_id();
#ifndef CONFIG_X86_NO_TSS
	struct tss_struct *tss = init_tss + cpu;
#endif
	struct physdev_set_iopl iopl_op;
	struct physdev_set_iobitmap iobmp_op;
	multicall_entry_t _mcl[8], *mcl = _mcl;

	/* XEN NOTE: FS/GS saved in switch_mm(), not here. */
	if (next_p->mm)
		load_user_cs_desc(cpu, next_p->mm);

	/*
	 * This is basically '__unlazy_fpu', except that we queue a
	 * multicall to indicate FPU task switch, rather than
	 * synchronously trapping to Xen.
	 */
	if (prev_p->thread_info->status & TS_USEDFPU) {
		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
		mcl->op      = __HYPERVISOR_fpu_taskswitch;
		mcl->args[0] = 1;
		mcl++;
	}
#if 0 /* lazy fpu sanity check */
	else BUG_ON(!(read_cr0() & 8));
#endif
 
#ifdef CONFIG_X86_HIGH_ENTRY
{
	int i;
	/*
	 * Set the ptes of the virtual stack. (NOTE: a one-page TLB flush is
	 * needed because otherwise NMIs could interrupt the
	 * user-return code with a virtual stack and stale TLBs.)
	 */
	for (i = 0; i < ARRAY_SIZE(next->stack_page); i++) {
		__kunmap_atomic_type(KM_VSTACK_TOP-i);
		__kmap_atomic(next->stack_page[i], KM_VSTACK_TOP-i);
	}
	/*
	 * NOTE: here we rely on the task being the stack as well
	 */
	next_p->thread_info->virtual_stack =
			(void *)__kmap_atomic_vaddr(KM_VSTACK_TOP);
}
#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
	/*
	 * If next was preempted on entry from userspace to kernel,
	 * and now it's on a different cpu, we need to adjust %esp.
	 * This assumes that entry.S does not copy %esp while on the
	 * virtual stack (with interrupts enabled): which is so,
	 * except within __SWITCH_KERNELSPACE itself.
	 */
	if (unlikely(next->esp >= TASK_SIZE)) {
		next->esp &= THREAD_SIZE - 1;
		next->esp |= (unsigned long) next_p->thread_info->virtual_stack;
	}
#endif
#endif
	/*
	 * Reload esp0, LDT and the page table pointer:
	 * This is load_esp0(tss, next) with a multicall.
	 */
	mcl->op      = __HYPERVISOR_stack_switch;
	mcl->args[0] = __KERNEL_DS;
	mcl->args[1] = next->esp0;
	mcl++;

	/*
	 * Load the per-thread Thread-Local Storage descriptor.
	 * This is load_TLS(next, cpu) with multicalls.
	 */
#define C(i) do {							\
	if (unlikely(next->tls_array[i].a != prev->tls_array[i].a ||	\
		     next->tls_array[i].b != prev->tls_array[i].b)) {	\
		mcl->op = __HYPERVISOR_update_descriptor;		\
		*(u64 *)&mcl->args[0] = virt_to_machine(		\
			&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
		*(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i];	\
		mcl++;							\
	}								\
} while (0)
	C(0); C(1); C(2);
#undef C

	if (unlikely(prev->io_pl != next->io_pl)) {
		iopl_op.iopl = (next->io_pl == 0) ? 1 :
			(next->io_pl >> 12) & 3;
		mcl->op      = __HYPERVISOR_physdev_op;
		mcl->args[0] = PHYSDEVOP_set_iopl;
		mcl->args[1] = (unsigned long)&iopl_op;
		mcl++;
	}