Ejemplo n.º 1
0
QVector<quint64> MemoryMap::perCpuOffsets()
{
    // Get all the data that we need to handle per_cpu variables.
    quint32 nr_cpus = 1;

    // Get the number of cpus from the dump if possible
    Variable *var = factory()->findVarByName("nr_cpu_ids");
    if (var != 0)
        nr_cpus = var->value<quint32>(_vmem);

    // Get the per_cpu offsets
    QVector<quint64> per_cpu_offset(nr_cpus, 0);

    // Get the variable
    var = factory()->findVarByName("__per_cpu_offset");
    Instance inst = var ?
                var->toInstance(_vmem, BaseType::trLexical, ksNone) :
                Instance();
    // Fill the array
    for (quint32 i = 0; i < nr_cpus; ++i) {
        if (!inst.isNull()) {
            per_cpu_offset[i] = inst.toULong();
            // Go to next array field
            inst.addToAddress(_vmem->memSpecs().sizeofLong);
        }
        else
            per_cpu_offset[i] = -1ULL;
    }

    return per_cpu_offset;

}
Ejemplo n.º 2
0
static void per_cpu_sw_state_wr(u32 cpu, int val)
{
	per_cpu(per_cpu_sw_state, cpu) = val;
	dmb();
	sync_cache_w(SHIFT_PERCPU_PTR(&per_cpu_sw_state, per_cpu_offset(cpu)));
	dsb_sev();
}
Ejemplo n.º 3
0
asmlinkage void __cpuinit secondary_start_kernel(void)
{
	struct mm_struct *mm = &init_mm;
	unsigned int cpu = smp_processor_id();

	printk("CPU%u: Booted secondary processor\n", cpu);

	atomic_inc(&mm->mm_count);
	current->active_mm = mm;
	cpumask_set_cpu(cpu, mm_cpumask(mm));

	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));

	cpu_set_reserved_ttbr0();
	flush_tlb_all();

	preempt_disable();
	trace_hardirqs_off();

	if (cpu_ops[cpu]->cpu_postboot)
		cpu_ops[cpu]->cpu_postboot();

	set_cpu_online(cpu, true);
	complete(&cpu_running);

	smp_store_cpu_info(cpu);

	notify_cpu_starting(cpu);

	local_dbg_enable();
	local_irq_enable();
	local_fiq_enable();

	cpu_startup_entry(CPUHP_ONLINE);
}
Ejemplo n.º 4
0
/*
 * cpu_suspend
 *
 * arg: argument to pass to the finisher function
 * fn: finisher function pointer
 *
 */
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
	int ret;
	unsigned long flags;

	/*
	 * From this point debug exceptions are disabled to prevent
	 * updates to mdscr register (saved and restored along with
	 * general purpose registers) from kernel debuggers.
	 */
	local_dbg_save(flags);

	/*
	 * Function graph tracer state gets incosistent when the kernel
	 * calls functions that never return (aka suspend finishers) hence
	 * disable graph tracing during their execution.
	 */
	pause_graph_tracing();

	/*
	 * mm context saved on the stack, it will be restored when
	 * the cpu comes out of reset through the identity mapped
	 * page tables, so that the thread address space is properly
	 * set-up on function return.
	 */
	ret = __cpu_suspend_enter(arg, fn);
	if (ret == 0) {
		/*
		 * We are resuming from reset with the idmap active in TTBR0_EL1.
		 * We must uninstall the idmap and restore the expected MMU
		 * state before we can possibly return to userspace.
		 */
		cpu_uninstall_idmap();

		/*
		 * Restore per-cpu offset before any kernel
		 * subsystem relying on it has a chance to run.
		 */
		set_my_cpu_offset(per_cpu_offset(smp_processor_id()));

		/*
		 * Restore HW breakpoint registers to sane values
		 * before debug exceptions are possibly reenabled
		 * through local_dbg_restore.
		 */
		if (hw_breakpoint_restore)
			hw_breakpoint_restore(NULL);
	}

	unpause_graph_tracing();

	/*
	 * Restore pstate flags. OS lock and mdscr have been already
	 * restored, so from this point onwards, debugging is fully
	 * renabled if it was enabled when core started shutdown.
	 */
	local_dbg_restore(flags);

	return ret;
}
Ejemplo n.º 5
0
/*
 * This is the secondary CPU boot entry.  We're using this CPUs
 * idle thread stack, but a set of temporary page tables.
 */
asmlinkage void __cpuinit secondary_start_kernel(void)
{
	struct mm_struct *mm = &init_mm;
	unsigned int cpu = smp_processor_id();

	/*
	 * All kernel threads share the same mm context; grab a
	 * reference and switch to it.
	 */
	atomic_inc(&mm->mm_count);
	current->active_mm = mm;
	cpumask_set_cpu(cpu, mm_cpumask(mm));

	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
	printk("CPU%u: Booted secondary processor\n", cpu);

	/*
	 * TTBR0 is only used for the identity mapping at this stage. Make it
	 * point to zero page to avoid speculatively fetching new entries.
	 */
	cpu_set_reserved_ttbr0();
	flush_tlb_all();

	preempt_disable();
	trace_hardirqs_off();

	if (cpu_ops[cpu]->cpu_postboot)
		cpu_ops[cpu]->cpu_postboot();

	/*
	 * Enable GIC and timers.
	 */

	smp_store_cpu_info(cpu);

	notify_cpu_starting(cpu);

	/*
	 * OK, now it's safe to let the boot CPU continue.  Wait for
	 * the CPU migration code to notice that the CPU is online
	 * before we continue.
	 */
	set_cpu_online(cpu, true);
	complete(&cpu_running);

	local_dbg_enable();
	/*
	 * Setup the percpu timer for this CPU.
	 */
	percpu_timer_setup();

	local_irq_enable();
	local_async_enable();

	/*
	 * OK, it's off to the idle thread for us
	 */
	cpu_startup_entry(CPUHP_ONLINE);
}
Ejemplo n.º 6
0
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
	struct desc_struct d = GDT_ENTRY_INIT(0x8092, per_cpu_offset(cpu),
					      0xFFFFF);

	write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S);
#endif
}
Ejemplo n.º 7
0
/**
 * cpu_suspend
 *
 * @arg: argument to pass to the finisher function
 */
int cpu_suspend(unsigned long arg)
{
	struct mm_struct *mm = current->active_mm;
	int ret, cpu = smp_processor_id();
	unsigned long flags;

	/*
	 * If cpu_ops have not been registered or suspend
	 * has not been initialized, cpu_suspend call fails early.
	 */
	if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
		return -EOPNOTSUPP;

	/*
	 * From this point debug exceptions are disabled to prevent
	 * updates to mdscr register (saved and restored along with
	 * general purpose registers) from kernel debuggers.
	 */
	local_dbg_save(flags);

	/*
	 * mm context saved on the stack, it will be restored when
	 * the cpu comes out of reset through the identity mapped
	 * page tables, so that the thread address space is properly
	 * set-up on function return.
	 */
	ret = __cpu_suspend(arg);
        pclog();

	if (ret == 0) {
		cpu_switch_mm(mm->pgd, mm);
		flush_tlb_all();

		/*
		 * Restore per-cpu offset before any kernel
		 * subsystem relying on it has a chance to run.
		 */
		set_my_cpu_offset(per_cpu_offset(cpu));

		/*
		 * Restore HW breakpoint registers to sane values
		 * before debug exceptions are possibly reenabled
		 * through local_dbg_restore.
		 */
		if (hw_breakpoint_restore)
			hw_breakpoint_restore(NULL);
	}

	/*
	 * Restore pstate flags. OS lock and mdscr have been already
	 * restored, so from this point onwards, debugging is fully
	 * renabled if it was enabled when core started shutdown.
	 */
	local_dbg_restore(flags);

	return ret;
}
Ejemplo n.º 8
0
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
	struct desc_struct gdt;

	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
			0x2 | DESCTYPE_S, 0x8);
	gdt.s = 1;
	write_gdt_entry(get_cpu_gdt_table(cpu),
			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);


	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
			0x2 | DESCTYPE_S | 0x40 , 0x8);

	gdt.s = 1;
	write_gdt_entry (get_cpu_gdt_table(cpu),
			 GDT_MODULE_PERCPU, &gdt, DESCTYPE_S);
#endif
}
Ejemplo n.º 9
0
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
	struct desc_struct gdt;
	unsigned long base = per_cpu_offset(cpu);

	pack_descriptor(&gdt, base, (VMALLOC_END - base - 1) >> PAGE_SHIFT,
			0x83 | DESCTYPE_S, 0xC);
	write_gdt_entry(get_cpu_gdt_table(cpu),
			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
#endif
}
Ejemplo n.º 10
0
/*
 * Same function as cpu_to_node() but used if called before the
 * per_cpu areas are setup.
 */
int early_cpu_to_node(int cpu)
{
	if (early_per_cpu_ptr(x86_cpu_to_node_map))
		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];

	if (!per_cpu_offset(cpu)) {
		printk(KERN_WARNING
			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
		dump_stack();
		return NUMA_NO_NODE;
	}
	return per_cpu(x86_cpu_to_node_map, cpu);
}
Ejemplo n.º 11
0
/**
 * acpi_suspend_lowlevel - save kernel state
 *
 * Create an identity mapped page table and copy the wakeup routine to
 * low memory.
 */
int acpi_suspend_lowlevel(void)
{
	struct wakeup_header *header =
		(struct wakeup_header *) __va(real_mode_header->wakeup_header);

	if (header->signature != WAKEUP_HEADER_SIGNATURE) {
		printk(KERN_ERR "wakeup header does not match\n");
		return -EINVAL;
	}

	header->video_mode = saved_video_mode;

#ifndef CONFIG_64BIT
	store_gdt((struct desc_ptr *)&header->pmode_gdt);

	if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
		       &header->pmode_efer_high))
		header->pmode_efer_low = header->pmode_efer_high = 0;
#endif /* !CONFIG_64BIT */

	header->pmode_cr0 = read_cr0();
	header->pmode_cr4 = read_cr4_safe();
	header->pmode_behavior = 0;
	if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
			&header->pmode_misc_en_low,
			&header->pmode_misc_en_high))
		header->pmode_behavior |=
			(1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE);
	header->realmode_flags = acpi_realmode_flags;
	header->real_magic = 0x12345678;

#ifndef CONFIG_64BIT
	header->pmode_entry = (u32)&wakeup_pmode_return;
	header->pmode_cr3 = (u32)__pa(&initial_page_table);
	saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
	stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
	early_gdt_descr.address =
			(unsigned long)get_cpu_gdt_table(smp_processor_id());
	initial_gs = per_cpu_offset(smp_processor_id());
#endif
	initial_code = (unsigned long)wakeup_long64;
       saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */

	do_suspend_lowlevel();
	return 0;
}
Ejemplo n.º 12
0
void __cpuinit numa_set_node(int cpu, int node)
{
	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);

	if (cpu_pda(cpu) && node != NUMA_NO_NODE)
		cpu_pda(cpu)->nodenumber = node;

	if (cpu_to_node_map)
		cpu_to_node_map[cpu] = node;

	else if (per_cpu_offset(cpu))
		per_cpu(x86_cpu_to_node_map, cpu) = node;

	else
		Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
}
Ejemplo n.º 13
0
/*
 * Initialize the CPU's GDT.  This is either the boot CPU doing itself
 * (still using the master per-cpu area), or a CPU doing it for a
 * secondary which will soon come up.
 */
__cpuinit void init_gdt(int cpu)
{
	struct desc_struct d, *gdt = get_cpu_gdt_table(cpu);
	unsigned long base, limit;

	base = per_cpu_offset(cpu);
	limit = PERCPU_ENOUGH_ROOM - 1;
	if (limit < 64*1024)
		pack_descriptor(&d, base, limit, 0x80 | DESCTYPE_S | 0x3, 0x4);
	else
		pack_descriptor(&d, base, limit >> PAGE_SHIFT, 0x80 | DESCTYPE_S | 0x3, 0xC);

	write_gdt_entry(gdt, GDT_ENTRY_PERCPU, &d, DESCTYPE_S);

	per_cpu(this_cpu_off, cpu) = base;
	per_cpu(cpu_number, cpu) = cpu;
}
Ejemplo n.º 14
0
/*
 * Great future plan:
 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
 * Always point %gs to its beginning
 */
void __init setup_per_cpu_areas(void)
{
	ssize_t size = PERCPU_ENOUGH_ROOM;
	char *ptr;
	int cpu;

	/* Setup cpu_pda map */
	setup_cpu_pda_map();

	/* Copy section for each CPU (we discard the original) */
	size = PERCPU_ENOUGH_ROOM;
	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
			  size);

	for_each_possible_cpu(cpu) {
#ifndef CONFIG_NEED_MULTIPLE_NODES
		ptr = alloc_bootmem_pages(size);
#else
		int node = early_cpu_to_node(cpu);
		if (!node_online(node) || !NODE_DATA(node)) {
			ptr = alloc_bootmem_pages(size);
			printk(KERN_INFO
			       "cpu %d has no node %d or node-local memory\n",
				cpu, node);
		}
		else
			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
#endif
		per_cpu_offset(cpu) = ptr - __per_cpu_start;
		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);

	}

	printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n",
		NR_CPUS, nr_cpu_ids, nr_node_ids);

	/* Setup percpu data maps */
	setup_per_cpu_maps();

	/* Setup node to cpumask map */
	setup_node_to_cpumask_map();

	/* Setup cpumask_of_cpu map */
	setup_cpumask_of_cpu();
}
Ejemplo n.º 15
0
/**
 * acpi_save_state_mem - save kernel state
 *
 * Create an identity mapped page table and copy the wakeup routine to
 * low memory.
 *
 * Note that this is too late to change acpi_wakeup_address.
 */
int acpi_save_state_mem(void)
{
	struct wakeup_header *header;

	if (!acpi_realmode) {
		printk(KERN_ERR "Could not allocate memory during boot, "
		       "S3 disabled\n");
		return -ENOMEM;
	}
	memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);

	header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
	if (header->signature != 0x51ee1111) {
		printk(KERN_ERR "wakeup header does not match\n");
		return -EINVAL;
	}

	header->video_mode = saved_video_mode;

	header->wakeup_jmp_seg = acpi_wakeup_address >> 4;

	/*
	 * Set up the wakeup GDT.  We set these up as Big Real Mode,
	 * that is, with limits set to 4 GB.  At least the Lenovo
	 * Thinkpad X61 is known to need this for the video BIOS
	 * initialization quirk to work; this is likely to also
	 * be the case for other laptops or integrated video devices.
	 */

	/* GDT[0]: GDT self-pointer */
	header->wakeup_gdt[0] =
		(u64)(sizeof(header->wakeup_gdt) - 1) +
		((u64)(acpi_wakeup_address +
			((char *)&header->wakeup_gdt - (char *)acpi_realmode))
				<< 16);
	/* GDT[1]: big real mode-like code segment */
	header->wakeup_gdt[1] =
		GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
	/* GDT[2]: big real mode-like data segment */
	header->wakeup_gdt[2] =
		GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);

#ifndef CONFIG_64BIT
	store_gdt((struct desc_ptr *)&header->pmode_gdt);

	if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
		       &header->pmode_efer_high))
		header->pmode_efer_low = header->pmode_efer_high = 0;
#endif /* !CONFIG_64BIT */

	header->pmode_cr0 = read_cr0();
	header->pmode_cr4 = read_cr4_safe();
	header->realmode_flags = acpi_realmode_flags;
	header->real_magic = 0x12345678;

#ifndef CONFIG_64BIT
	header->pmode_entry = (u32)&wakeup_pmode_return;
	header->pmode_cr3 = (u32)__pa(&initial_page_table);
	saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
	header->trampoline_segment = setup_trampoline() >> 4;
#ifdef CONFIG_SMP
	stack_start.sp = temp_stack + sizeof(temp_stack);
	early_gdt_descr.address =
			(unsigned long)get_cpu_gdt_table(smp_processor_id());
	initial_gs = per_cpu_offset(smp_processor_id());
#endif
	initial_code = (unsigned long)wakeup_long64;
       saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */

	return 0;
}
Ejemplo n.º 16
0
static int
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
	struct vcpu_guest_context *ctxt;
	struct desc_struct *gdt;
	unsigned long gdt_mfn;

	/* used to tell cpu_init() that it can proceed with initialization */
	cpumask_set_cpu(cpu, cpu_callout_mask);
	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
		return 0;

	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
	if (ctxt == NULL)
		return -ENOMEM;

	gdt = get_cpu_gdt_rw(cpu);

#ifdef CONFIG_X86_32
	ctxt->user_regs.fs = __KERNEL_PERCPU;
	ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#endif
	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));

	/*
	 * Bring up the CPU in cpu_bringup_and_idle() with the stack
	 * pointing just below where pt_regs would be if it were a normal
	 * kernel entry.
	 */
	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
	ctxt->flags = VGCF_IN_KERNEL;
	ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
	ctxt->user_regs.ds = __USER_DS;
	ctxt->user_regs.es = __USER_DS;
	ctxt->user_regs.ss = __KERNEL_DS;
	ctxt->user_regs.cs = __KERNEL_CS;
	ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);

	xen_copy_trap_info(ctxt->trap_ctxt);

	ctxt->ldt_ents = 0;

	BUG_ON((unsigned long)gdt & ~PAGE_MASK);

	gdt_mfn = arbitrary_virt_to_mfn(gdt);
	make_lowmem_page_readonly(gdt);
	make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));

	ctxt->gdt_frames[0] = gdt_mfn;
	ctxt->gdt_ents      = GDT_ENTRIES;

	/*
	 * Set SS:SP that Xen will use when entering guest kernel mode
	 * from guest user mode.  Subsequent calls to load_sp0() can
	 * change this value.
	 */
	ctxt->kernel_ss = __KERNEL_DS;
	ctxt->kernel_sp = task_top_of_stack(idle);

#ifdef CONFIG_X86_32
	ctxt->event_callback_cs     = __KERNEL_CS;
	ctxt->failsafe_callback_cs  = __KERNEL_CS;
#else
	ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
	ctxt->event_callback_eip    =
		(unsigned long)xen_hypervisor_callback;
	ctxt->failsafe_callback_eip =
		(unsigned long)xen_failsafe_callback;
	per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);

	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
		BUG();

	kfree(ctxt);
	return 0;
}
Ejemplo n.º 17
0
/*
 * cpu_suspend
 *
 * arg: argument to pass to the finisher function
 * fn: finisher function pointer
 *
 */
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
	struct mm_struct *mm = current->active_mm;
	int ret;
	unsigned long flags;

	/*
	 * From this point debug exceptions are disabled to prevent
	 * updates to mdscr register (saved and restored along with
	 * general purpose registers) from kernel debuggers.
	 */
	local_dbg_save(flags);

	/*
	 * mm context saved on the stack, it will be restored when
	 * the cpu comes out of reset through the identity mapped
	 * page tables, so that the thread address space is properly
	 * set-up on function return.
	 */
	ret = __cpu_suspend_enter(arg, fn);
	if (ret == 0) {
		/*
		 * We are resuming from reset with TTBR0_EL1 set to the
		 * idmap to enable the MMU; set the TTBR0 to the reserved
		 * page tables to prevent speculative TLB allocations, flush
		 * the local tlb and set the default tcr_el1.t0sz so that
		 * the TTBR0 address space set-up is properly restored.
		 * If the current active_mm != &init_mm we entered cpu_suspend
		 * with mappings in TTBR0 that must be restored, so we switch
		 * them back to complete the address space configuration
		 * restoration before returning.
		 */
		cpu_set_reserved_ttbr0();
		local_flush_tlb_all();
		cpu_set_default_tcr_t0sz();

		if (mm != &init_mm)
			cpu_switch_mm(mm->pgd, mm);

		/*
		 * Restore per-cpu offset before any kernel
		 * subsystem relying on it has a chance to run.
		 */
		set_my_cpu_offset(per_cpu_offset(smp_processor_id()));

		/*
		 * Restore HW breakpoint registers to sane values
		 * before debug exceptions are possibly reenabled
		 * through local_dbg_restore.
		 */
		if (hw_breakpoint_restore)
			hw_breakpoint_restore(NULL);
	}

	/*
	 * Restore pstate flags. OS lock and mdscr have been already
	 * restored, so from this point onwards, debugging is fully
	 * renabled if it was enabled when core started shutdown.
	 */
	local_dbg_restore(flags);

	return ret;
}
Ejemplo n.º 18
0
/* x86은 이 함수를 타게 된다 */
void __init setup_per_cpu_areas(void)
{
	unsigned int cpu;
	unsigned long delta;
	int rc;

	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);

	/*
	 * Allocate percpu area.  Embedding allocator is our favorite;
	 * however, on NUMA configurations, it can result in very
	 * sparse unit mapping and vmalloc area isn't spacious enough
	 * on 32bit.  Use page in that case.
	 */
#ifdef CONFIG_X86_32
	/* 32bit 한정 first chunk 가 auto인데 numa라면, page로 한다. 32bit에서
	 * embed(2mb단위) 방식은 메모리 할당면에서 ᅠᆼ 안좋기 때문 */
	if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
		pcpu_chosen_fc = PCPU_FC_PAGE;
#endif
	rc = -EINVAL;
	/* first chunk 방식이 PAGE가 아니면 auto 또는 embed인데, auto 는
	 * embed, page 순으로 시도하게 된다(결국 PCPU_FC_EMBED == PCPU_FC_AUTO) */
	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
		const size_t dyn_size = PERCPU_MODULE_RESERVE +
			PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; // 8KB + 20KB - 8KB
		size_t atom_size;

		/*
		 * On 64bit, use PMD_SIZE for atom_size so that embedded
		 * percpu areas are aligned to PMD.  This, in the future,
		 * can also allow using PMD mappings in vmalloc area.  Use
		 * PAGE_SIZE on 32bit as vmalloc space is highly contended
		 * and large vmalloc area allocs can easily fail.
		 */
#ifdef CONFIG_X86_64
		/* 64bit 일때, PS bit를 사용 PAGE 단위를 2MB로 할당하여,
		 * vmalloc의 PMD size align 된 연속적인 공간을 얻기 위해서
		 * 인 것으로 보인다. 32bit에서는 2MB 단위로 요청하면, 자꾸 
		 * 실패해서 체념한 듯.. :) */
		atom_size = PMD_SIZE; // 2MB
#else
		atom_size = PAGE_SIZE;
#endif
		/* embed 방식으로 첫번재 청크를 할당한다. */
		rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, // 8 << 10
					    dyn_size, atom_size,       // 20KB, 2MB
					    pcpu_cpu_distance,         // func
					    pcpu_fc_alloc, pcpu_fc_free);  // func, func
		if (rc < 0)
			pr_warning("%s allocator failed (%d), falling back to page size\n",
				   pcpu_fc_names[pcpu_chosen_fc], rc);
	}
	if (rc < 0)
		/* `embed`방식으로 첫번재 청크를 할당이 실패하면 `page`방식으로 할당 한다. */
		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
					   pcpu_fc_alloc, pcpu_fc_free,
					   pcpup_populate_pte);
	if (rc < 0)
		panic("cannot initialize percpu area (err=%d)", rc);

	/* alrighty, percpu areas up and running */
	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
	for_each_possible_cpu(cpu) {
    /*
     * per_cpu_offset()은 percpu variable에 더해져야만 하는 offset이다. 
     * 목적은 certain processor 까지의 거리를 위하여 존재.
     * 대부분의 아키텍쳐는 __per_cpu_offset array를 사용하지만 x86_64는 자신만의 방법이 존재
     */

    /* fc를 초기화 할 때 얻었던, unit offset에 차이값을 더해, 각각 cpu 오프셋을 구해준다 */
		per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
    /*this_cpu_off라는 포인터에다가 offset저장*/
		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
    /*cpu number도 함께 저장해준다.*/
		per_cpu(cpu_number, cpu) = cpu;
    
    /*
     * x86_64에서는 percpu_segment와 canary를 사용하지 않는다.
     * canary에 대한 설명은 http://studyfoss.egloos.com/5279959
     * 에서 찾아볼 수 있도록 한다. 
     */
		setup_percpu_segment(cpu);
		setup_stack_canary_segment(cpu);
		/*
		 * Copy data used in early init routines from the
		 * initial arrays to the per cpu data areas.  These
		 * arrays then become expendable and the *_early_ptr's
		 * are zeroed indicating that the static arrays are
		 * gone.
		 */
#ifdef CONFIG_X86_LOCAL_APIC
    /* 기존에 구했던(early) apicid를 pcpu로 이동. */
		per_cpu(x86_cpu_to_apicid, cpu) =
			early_per_cpu_map(x86_cpu_to_apicid, cpu);
		per_cpu(x86_bios_cpu_apicid, cpu) =
			early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
#ifdef CONFIG_X86_32
		per_cpu(x86_cpu_to_logical_apicid, cpu) =
			early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif
#ifdef CONFIG_X86_64
    /* 각각의 cpu에 irq stack pointer지정. gs+canary영역이 48
     * byte인데, irq_stack을 보호하기 위해 18 byte만큼을 더 둔 것으로
     * 보임(정확하지 않음) */
		per_cpu(irq_stack_ptr, cpu) =
			per_cpu(irq_stack_union.irq_stack, cpu) +
			IRQ_STACK_SIZE - 64;
#endif
#ifdef CONFIG_NUMA
    /* 기존에 구했던(early) NUMA 정보 역시 pcpu로 이동. */
		per_cpu(x86_cpu_to_node_map, cpu) =
			early_per_cpu_map(x86_cpu_to_node_map, cpu);
		/*
		 * Ensure that the boot cpu numa_node is correct when the boot
		 * cpu is on a node that doesn't have memory installed.
		 * Also cpu_up() will call cpu_to_node() for APs when
		 * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set
		 * up later with c_init aka intel_init/amd_init.
		 * So set them all (boot cpu and all APs).
		 */
		set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
		/*
		 * Up to this point, the boot CPU has been using .init.data
		 * area.  Reload any changed state for the boot CPU.
		 */
		if (!cpu)
			switch_to_new_gdt(cpu);
	}

	/* indicate the early static arrays will soon be gone */
#ifdef CONFIG_X86_LOCAL_APIC
	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
#ifdef CONFIG_X86_32
	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
#endif
#ifdef CONFIG_NUMA
	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif

	/* Setup node to cpumask map */
	setup_node_to_cpumask_map();

	/* Setup cpu initialized, callin, callout masks */
	setup_cpu_local_masks();
}
Ejemplo n.º 19
0
Archivo: smp.c Proyecto: mbgg/linux
static int __cpuinit
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
	struct vcpu_guest_context *ctxt;
	struct desc_struct *gdt;
	unsigned long gdt_mfn;

	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
		return 0;

	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
	if (ctxt == NULL)
		return -ENOMEM;

	gdt = get_cpu_gdt_table(cpu);

	ctxt->flags = VGCF_IN_KERNEL;
	ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32
	ctxt->user_regs.fs = __KERNEL_PERCPU;
	ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#else
	ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;

	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));

	if (xen_feature(XENFEAT_auto_translated_physmap) &&
	    xen_feature(XENFEAT_supervisor_mode_kernel)) {
		/* Note: PVH is not supported on x86_32. */
#ifdef CONFIG_X86_64
		ctxt->user_regs.ds = __KERNEL_DS;
		ctxt->user_regs.es = 0;
		ctxt->user_regs.gs = 0;

		/* GUEST_GDTR_BASE and */
		ctxt->u.pvh.gdtaddr = (unsigned long)gdt;
		/* GUEST_GDTR_LIMIT in the VMCS. */
		ctxt->u.pvh.gdtsz = (unsigned long)(GDT_SIZE - 1);

		ctxt->gs_base_user = (unsigned long)
					per_cpu(irq_stack_union.gs_base, cpu);
#endif
	} else {
		ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
		ctxt->user_regs.ds = __USER_DS;
		ctxt->user_regs.es = __USER_DS;

		xen_copy_trap_info(ctxt->trap_ctxt);

		ctxt->ldt_ents = 0;

		BUG_ON((unsigned long)gdt & ~PAGE_MASK);

		gdt_mfn = arbitrary_virt_to_mfn(gdt);
		make_lowmem_page_readonly(gdt);
		make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));

		ctxt->u.pv.gdt_frames[0] = gdt_mfn;
		ctxt->u.pv.gdt_ents      = GDT_ENTRIES;

		ctxt->kernel_ss = __KERNEL_DS;
		ctxt->kernel_sp = idle->thread.sp0;

#ifdef CONFIG_X86_32
		ctxt->event_callback_cs     = __KERNEL_CS;
		ctxt->failsafe_callback_cs  = __KERNEL_CS;
#endif
		ctxt->event_callback_eip    =
					(unsigned long)xen_hypervisor_callback;
		ctxt->failsafe_callback_eip =
					(unsigned long)xen_failsafe_callback;
	}
	ctxt->user_regs.cs = __KERNEL_CS;
	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);

	per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));

	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
		BUG();

	kfree(ctxt);
	return 0;
}
Ejemplo n.º 20
0
/*
 * Great future plan:
 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
 * Always point %gs to its beginning
 */
void __init setup_per_cpu_areas(void)
{
	size_t static_size = __per_cpu_end - __per_cpu_start;
	unsigned int cpu;
	unsigned long delta;
	size_t pcpu_unit_size;
	ssize_t ret;

	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);

	/*
	 * Allocate percpu area.  If PSE is supported, try to make use
	 * of large page mappings.  Please read comments on top of
	 * each allocator for details.
	 */
	ret = setup_pcpu_remap(static_size);
	if (ret < 0)
		ret = setup_pcpu_embed(static_size);
	if (ret < 0)
		ret = setup_pcpu_4k(static_size);
	if (ret < 0)
		panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
		      static_size, ret);

	pcpu_unit_size = ret;

	/* alrighty, percpu areas up and running */
	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
	for_each_possible_cpu(cpu) {
		per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
		per_cpu(cpu_number, cpu) = cpu;
		setup_percpu_segment(cpu);
		setup_stack_canary_segment(cpu);
		/*
		 * Copy data used in early init routines from the
		 * initial arrays to the per cpu data areas.  These
		 * arrays then become expendable and the *_early_ptr's
		 * are zeroed indicating that the static arrays are
		 * gone.
		 */
#ifdef CONFIG_X86_LOCAL_APIC
		per_cpu(x86_cpu_to_apicid, cpu) =
			early_per_cpu_map(x86_cpu_to_apicid, cpu);
		per_cpu(x86_bios_cpu_apicid, cpu) =
			early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
#ifdef CONFIG_X86_64
		per_cpu(irq_stack_ptr, cpu) =
			per_cpu(irq_stack_union.irq_stack, cpu) +
			IRQ_STACK_SIZE - 64;
#ifdef CONFIG_NUMA
		per_cpu(x86_cpu_to_node_map, cpu) =
			early_per_cpu_map(x86_cpu_to_node_map, cpu);
#endif
#endif
		/*
		 * Up to this point, the boot CPU has been using .data.init
		 * area.  Reload any changed state for the boot CPU.
		 */
		if (cpu == boot_cpu_id)
			switch_to_new_gdt(cpu);
	}

	/* indicate the early static arrays will soon be gone */
#ifdef CONFIG_X86_LOCAL_APIC
	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif

	/* Setup node to cpumask map */
	setup_node_to_cpumask_map();

	/* Setup cpu initialized, callin, callout masks */
	setup_cpu_local_masks();
}
Ejemplo n.º 21
0
/**
 * x86_acpi_suspend_lowlevel - save kernel state
 *
 * Create an identity mapped page table and copy the wakeup routine to
 * low memory.
 */
int x86_acpi_suspend_lowlevel(void)
{
    struct wakeup_header *header =
        (struct wakeup_header *) __va(real_mode_header->wakeup_header);

    if (header->signature != WAKEUP_HEADER_SIGNATURE) {
        printk(KERN_ERR "wakeup header does not match\n");
        return -EINVAL;
    }

    header->video_mode = saved_video_mode;

    header->pmode_behavior = 0;

#ifndef CONFIG_64BIT
    native_store_gdt((struct desc_ptr *)&header->pmode_gdt);

    /*
     * We have to check that we can write back the value, and not
     * just read it.  At least on 90 nm Pentium M (Family 6, Model
     * 13), reading an invalid MSR is not guaranteed to trap, see
     * Erratum X4 in "Intel Pentium M Processor on 90 nm Process
     * with 2-MB L2 Cache and Intel® Processor A100 and A110 on 90
     * nm process with 512-KB L2 Cache Specification Update".
     */
    if (!rdmsr_safe(MSR_EFER,
                    &header->pmode_efer_low,
                    &header->pmode_efer_high) &&
            !wrmsr_safe(MSR_EFER,
                        header->pmode_efer_low,
                        header->pmode_efer_high))
        header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
#endif /* !CONFIG_64BIT */

    header->pmode_cr0 = read_cr0();
    if (__this_cpu_read(cpu_info.cpuid_level) >= 0) {
        header->pmode_cr4 = read_cr4();
        header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4);
    }
    if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
                    &header->pmode_misc_en_low,
                    &header->pmode_misc_en_high) &&
            !wrmsr_safe(MSR_IA32_MISC_ENABLE,
                        header->pmode_misc_en_low,
                        header->pmode_misc_en_high))
        header->pmode_behavior |=
            (1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE);
    header->realmode_flags = acpi_realmode_flags;
    header->real_magic = 0x12345678;

#ifndef CONFIG_64BIT
    header->pmode_entry = (u32)&wakeup_pmode_return;
    header->pmode_cr3 = (u32)__pa_symbol(initial_page_table);
    saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
    stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
    early_gdt_descr.address =
        (unsigned long)get_cpu_gdt_table(smp_processor_id());
    initial_gs = per_cpu_offset(smp_processor_id());
#endif
    initial_code = (unsigned long)wakeup_long64;
    saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */

    do_suspend_lowlevel();
    return 0;
}
Ejemplo n.º 22
0
/**
 * acpi_suspend_lowlevel - save kernel state
 *
 * Create an identity mapped page table and copy the wakeup routine to
 * low memory.
 */
int acpi_suspend_lowlevel(void)
{
	struct wakeup_header *header;
	/* address in low memory of the wakeup routine. */
	char *acpi_realmode;

	acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code);

	header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET);
	if (header->signature != WAKEUP_HEADER_SIGNATURE) {
		printk(KERN_ERR "wakeup header does not match\n");
		return -EINVAL;
	}

	header->video_mode = saved_video_mode;

	header->wakeup_jmp_seg = acpi_wakeup_address >> 4;

	/*
	 * Set up the wakeup GDT.  We set these up as Big Real Mode,
	 * that is, with limits set to 4 GB.  At least the Lenovo
	 * Thinkpad X61 is known to need this for the video BIOS
	 * initialization quirk to work; this is likely to also
	 * be the case for other laptops or integrated video devices.
	 */

	/* GDT[0]: GDT self-pointer */
	header->wakeup_gdt[0] =
		(u64)(sizeof(header->wakeup_gdt) - 1) +
		((u64)__pa(&header->wakeup_gdt) << 16);
	/* GDT[1]: big real mode-like code segment */
	header->wakeup_gdt[1] =
		GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
	/* GDT[2]: big real mode-like data segment */
	header->wakeup_gdt[2] =
		GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);

#ifndef CONFIG_64BIT
	store_gdt((struct desc_ptr *)&header->pmode_gdt);

	if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
		       &header->pmode_efer_high))
		header->pmode_efer_low = header->pmode_efer_high = 0;
#endif /* !CONFIG_64BIT */

	header->pmode_cr0 = read_cr0();
	header->pmode_cr4 = read_cr4_safe();
	header->pmode_behavior = 0;
	if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
			&header->pmode_misc_en_low,
			&header->pmode_misc_en_high))
		header->pmode_behavior |=
			(1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE);
	header->realmode_flags = acpi_realmode_flags;
	header->real_magic = 0x12345678;

#ifndef CONFIG_64BIT
	header->pmode_entry = (u32)&wakeup_pmode_return;
	header->pmode_cr3 = (u32)__pa(&initial_page_table);
	saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
	header->trampoline_segment = trampoline_address() >> 4;
#ifdef CONFIG_SMP
	stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
	early_gdt_descr.address =
			(unsigned long)get_cpu_gdt_table(smp_processor_id());
	initial_gs = per_cpu_offset(smp_processor_id());
#endif
	initial_code = (unsigned long)wakeup_long64;
       saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */

	do_suspend_lowlevel();
	return 0;
}
void __init setup_per_cpu_areas(void)
{
	unsigned int cpu;
	unsigned long delta;
	int rc;

	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);

#ifdef CONFIG_X86_32
	if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
		pcpu_chosen_fc = PCPU_FC_PAGE;
#endif
	rc = -EINVAL;
	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
		const size_t dyn_size = PERCPU_MODULE_RESERVE +
			PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
		size_t atom_size;

#ifdef CONFIG_X86_64
		atom_size = PMD_SIZE;
#else
		atom_size = PAGE_SIZE;
#endif
		rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
					    dyn_size, atom_size,
					    pcpu_cpu_distance,
					    pcpu_fc_alloc, pcpu_fc_free);
		if (rc < 0)
			pr_warning("%s allocator failed (%d), falling back to page size\n",
				   pcpu_fc_names[pcpu_chosen_fc], rc);
	}
	if (rc < 0)
		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
					   pcpu_fc_alloc, pcpu_fc_free,
					   pcpup_populate_pte);
	if (rc < 0)
		panic("cannot initialize percpu area (err=%d)", rc);

	
	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
	for_each_possible_cpu(cpu) {
		per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
		per_cpu(cpu_number, cpu) = cpu;
		setup_percpu_segment(cpu);
		setup_stack_canary_segment(cpu);
#ifdef CONFIG_X86_LOCAL_APIC
		per_cpu(x86_cpu_to_apicid, cpu) =
			early_per_cpu_map(x86_cpu_to_apicid, cpu);
		per_cpu(x86_bios_cpu_apicid, cpu) =
			early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
#ifdef CONFIG_X86_32
		per_cpu(x86_cpu_to_logical_apicid, cpu) =
			early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif
#ifdef CONFIG_X86_64
		per_cpu(irq_stack_ptr, cpu) =
			per_cpu(irq_stack_union.irq_stack, cpu) +
			IRQ_STACK_SIZE - 64;
#endif
#ifdef CONFIG_NUMA
		per_cpu(x86_cpu_to_node_map, cpu) =
			early_per_cpu_map(x86_cpu_to_node_map, cpu);
		set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
		if (!cpu)
			switch_to_new_gdt(cpu);
	}

	
#ifdef CONFIG_X86_LOCAL_APIC
	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
#ifdef CONFIG_X86_32
	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
#endif
#ifdef CONFIG_NUMA
	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif

	
	setup_node_to_cpumask_map();

	
	setup_cpu_local_masks();
}
Ejemplo n.º 24
0
void __init setup_per_cpu_areas(void)
{
	unsigned int cpu;
	unsigned long delta;
	int rc;

	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);

	/*
	 * Allocate percpu area.  Embedding allocator is our favorite;
	 * however, on NUMA configurations, it can result in very
	 * sparse unit mapping and vmalloc area isn't spacious enough
	 * on 32bit.  Use page in that case.
	 */
#ifdef CONFIG_X86_32
	if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
		pcpu_chosen_fc = PCPU_FC_PAGE;
#endif
	rc = -EINVAL;
	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
		const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE;
		const size_t dyn_size = PERCPU_MODULE_RESERVE +
			PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;

		rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
					    dyn_size, atom_size,
					    pcpu_cpu_distance,
					    pcpu_fc_alloc, pcpu_fc_free);
		if (rc < 0)
			pr_warning("%s allocator failed (%d), falling back to page size\n",
				   pcpu_fc_names[pcpu_chosen_fc], rc);
	}
	if (rc < 0)
		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
					   pcpu_fc_alloc, pcpu_fc_free,
					   pcpup_populate_pte);
	if (rc < 0)
		panic("cannot initialize percpu area (err=%d)", rc);

	/* alrighty, percpu areas up and running */
	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
	for_each_possible_cpu(cpu) {
		per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
		per_cpu(cpu_number, cpu) = cpu;
		setup_percpu_segment(cpu);
		setup_stack_canary_segment(cpu);
		/*
		 * Copy data used in early init routines from the
		 * initial arrays to the per cpu data areas.  These
		 * arrays then become expendable and the *_early_ptr's
		 * are zeroed indicating that the static arrays are
		 * gone.
		 */
#ifdef CONFIG_X86_LOCAL_APIC
		per_cpu(x86_cpu_to_apicid, cpu) =
			early_per_cpu_map(x86_cpu_to_apicid, cpu);
		per_cpu(x86_bios_cpu_apicid, cpu) =
			early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
#ifdef CONFIG_X86_32
		per_cpu(x86_cpu_to_logical_apicid, cpu) =
			early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif
#ifdef CONFIG_X86_64
		per_cpu(irq_stack_ptr, cpu) =
			per_cpu(irq_stack_union.irq_stack, cpu) +
			IRQ_STACK_SIZE - 64;
#endif
#ifdef CONFIG_NUMA
		per_cpu(x86_cpu_to_node_map, cpu) =
			early_per_cpu_map(x86_cpu_to_node_map, cpu);
		/*
		 * Ensure that the boot cpu numa_node is correct when the boot
		 * cpu is on a node that doesn't have memory installed.
		 * Also cpu_up() will call cpu_to_node() for APs when
		 * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set
		 * up later with c_init aka intel_init/amd_init.
		 * So set them all (boot cpu and all APs).
		 */
		set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
		/*
		 * Up to this point, the boot CPU has been using .init.data
		 * area.  Reload any changed state for the boot CPU.
		 */
		if (!cpu)
			switch_to_new_gdt(cpu);
	}

	/* indicate the early static arrays will soon be gone */
#ifdef CONFIG_X86_LOCAL_APIC
	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
#ifdef CONFIG_X86_32
	early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
#endif
#ifdef CONFIG_NUMA
	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif

	/* Setup node to cpumask map */
	setup_node_to_cpumask_map();

	/* Setup cpu initialized, callin, callout masks */
	setup_cpu_local_masks();
}
Ejemplo n.º 25
0
/*
 * __cpu_suspend
 *
 * arg: argument to pass to the finisher function
 * fn: finisher function pointer
 *
 */
int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
	struct mm_struct *mm = current->active_mm;
	int ret;
	unsigned long flags;

	/*
	 * From this point debug exceptions are disabled to prevent
	 * updates to mdscr register (saved and restored along with
	 * general purpose registers) from kernel debuggers.
	 */
	local_dbg_save(flags);

	/*
	 * Function graph tracer state gets incosistent when the kernel
	 * calls functions that never return (aka suspend finishers) hence
	 * disable graph tracing during their execution.
	 */
	pause_graph_tracing();

	/*
	 * mm context saved on the stack, it will be restored when
	 * the cpu comes out of reset through the identity mapped
	 * page tables, so that the thread address space is properly
	 * set-up on function return.
	 */
	ret = __cpu_suspend_enter(arg, fn);
	if (ret == 0) {
		/*
		 * We are resuming from reset with TTBR0_EL1 set to the
		 * idmap to enable the MMU; restore the active_mm mappings in
		 * TTBR0_EL1 unless the active_mm == &init_mm, in which case
		 * the thread entered __cpu_suspend with TTBR0_EL1 set to
		 * reserved TTBR0 page tables and should be restored as such.
		 */
		if (mm == &init_mm)
			cpu_set_reserved_ttbr0();
		else
			cpu_switch_mm(mm->pgd, mm);

		flush_tlb_all();

		/*
		 * Restore per-cpu offset before any kernel
		 * subsystem relying on it has a chance to run.
		 */
		set_my_cpu_offset(per_cpu_offset(smp_processor_id()));

		/*
		 * Restore HW breakpoint registers to sane values
		 * before debug exceptions are possibly reenabled
		 * through local_dbg_restore.
		 */
		if (hw_breakpoint_restore)
			hw_breakpoint_restore(NULL);
	}

	unpause_graph_tracing();

	/*
	 * Restore pstate flags. OS lock and mdscr have been already
	 * restored, so from this point onwards, debugging is fully
	 * renabled if it was enabled when core started shutdown.
	 */
	local_dbg_restore(flags);

	return ret;
}
Ejemplo n.º 26
0
/*
 * This is the secondary CPU boot entry.  We're using this CPUs
 * idle thread stack, but a set of temporary page tables.
 */
asmlinkage void secondary_start_kernel(void)
{
	struct mm_struct *mm = &init_mm;
	unsigned int cpu = smp_processor_id();

	/*
	 * All kernel threads share the same mm context; grab a
	 * reference and switch to it.
	 */
	atomic_inc(&mm->mm_count);
	current->active_mm = mm;

	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));

	/*
	 * TTBR0 is only used for the identity mapping at this stage. Make it
	 * point to zero page to avoid speculatively fetching new entries.
	 */
	cpu_uninstall_idmap();

	preempt_disable();
	trace_hardirqs_off();

	/*
	 * If the system has established the capabilities, make sure
	 * this CPU ticks all of those. If it doesn't, the CPU will
	 * fail to come online.
	 */
	verify_local_cpu_capabilities();

	if (cpu_ops[cpu]->cpu_postboot)
		cpu_ops[cpu]->cpu_postboot();

	/*
	 * Log the CPU info before it is marked online and might get read.
	 */
	cpuinfo_store_cpu();

	/*
	 * Enable GIC and timers.
	 */
	notify_cpu_starting(cpu);

	store_cpu_topology(cpu);

	/*
	 * OK, now it's safe to let the boot CPU continue.  Wait for
	 * the CPU migration code to notice that the CPU is online
	 * before we continue.
	 */
	pr_info("CPU%u: Booted secondary processor [%08x]\n",
					 cpu, read_cpuid_id());
	update_cpu_boot_status(CPU_BOOT_SUCCESS);
	set_cpu_online(cpu, true);
	complete(&cpu_running);

	local_irq_enable();
	local_async_enable();

	/*
	 * OK, it's off to the idle thread for us
	 */
	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
}
Ejemplo n.º 27
0
static __cpuinit int
cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
{
	struct vcpu_guest_context *ctxt;
	struct desc_struct *gdt;
	unsigned long gdt_mfn;

	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
		return 0;

	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
	if (ctxt == NULL)
		return -ENOMEM;

	gdt = get_cpu_gdt_table(cpu);

	ctxt->flags = VGCF_IN_KERNEL;
	ctxt->user_regs.ds = __USER_DS;
	ctxt->user_regs.es = __USER_DS;
	ctxt->user_regs.ss = __KERNEL_DS;
#ifdef CONFIG_X86_32
	ctxt->user_regs.fs = __KERNEL_PERCPU;
	ctxt->user_regs.gs = __KERNEL_STACK_CANARY;
#else
	ctxt->gs_base_kernel = per_cpu_offset(cpu);
#endif
	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
	ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */

	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));

	xen_copy_trap_info(ctxt->trap_ctxt);

	ctxt->ldt_ents = 0;

	BUG_ON((unsigned long)gdt & ~PAGE_MASK);

	gdt_mfn = arbitrary_virt_to_mfn(gdt);
	make_lowmem_page_readonly(gdt);
	make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));

	ctxt->gdt_frames[0] = gdt_mfn;
	ctxt->gdt_ents      = GDT_ENTRIES;

	ctxt->user_regs.cs = __KERNEL_CS;
	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);

	ctxt->kernel_ss = __KERNEL_DS;
	ctxt->kernel_sp = idle->thread.sp0;

#ifdef CONFIG_X86_32
	ctxt->event_callback_cs     = __KERNEL_CS;
	ctxt->failsafe_callback_cs  = __KERNEL_CS;
#endif
	ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
	ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;

	per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));

	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
		BUG();

	kfree(ctxt);
	return 0;
}
Ejemplo n.º 28
0
void __init smp_prepare_boot_cpu(void)
{
	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
	cpuinfo_store_boot_cpu();
	save_boot_cpu_run_el();
}
Ejemplo n.º 29
0
void __init smp_prepare_boot_cpu(void)
{
	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
}
Ejemplo n.º 30
0
int acpi_suspend_lowlevel(void)
{
	struct wakeup_header *header;
	
	char *acpi_realmode;

	acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code);

	header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET);
	if (header->signature != WAKEUP_HEADER_SIGNATURE) {
		printk(KERN_ERR "wakeup header does not match\n");
		return -EINVAL;
	}

	header->video_mode = saved_video_mode;

	header->wakeup_jmp_seg = acpi_wakeup_address >> 4;


	
	header->wakeup_gdt[0] =
		(u64)(sizeof(header->wakeup_gdt) - 1) +
		((u64)__pa(&header->wakeup_gdt) << 16);
	
	header->wakeup_gdt[1] =
		GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
	
	header->wakeup_gdt[2] =
		GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);

#ifndef CONFIG_64BIT
	store_gdt((struct desc_ptr *)&header->pmode_gdt);

	if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
		       &header->pmode_efer_high))
		header->pmode_efer_low = header->pmode_efer_high = 0;
#endif 

	header->pmode_cr0 = read_cr0();
	header->pmode_cr4 = read_cr4_safe();
	header->pmode_behavior = 0;
	if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
			&header->pmode_misc_en_low,
			&header->pmode_misc_en_high))
		header->pmode_behavior |=
			(1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE);
	header->realmode_flags = acpi_realmode_flags;
	header->real_magic = 0x12345678;

#ifndef CONFIG_64BIT
	header->pmode_entry = (u32)&wakeup_pmode_return;
	header->pmode_cr3 = (u32)__pa(&initial_page_table);
	saved_magic = 0x12345678;
#else 
	header->trampoline_segment = trampoline_address() >> 4;
#ifdef CONFIG_SMP
	stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
	early_gdt_descr.address =
			(unsigned long)get_cpu_gdt_table(smp_processor_id());
	initial_gs = per_cpu_offset(smp_processor_id());
#endif
	initial_code = (unsigned long)wakeup_long64;
       saved_magic = 0x123456789abcdef0L;
#endif 

	do_suspend_lowlevel();
	return 0;
}