int __init check_nmi_watchdog (void) { int *counts; int cpu; if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DISABLED)) return 0; if (!atomic_read(&nmi_active)) return 0; counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!counts) return -1; printk(KERN_INFO "testing NMI watchdog ... "); #ifdef CONFIG_SMP if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); #endif for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda(cpu)->__nmi_count; local_irq_enable(); mdelay((20*1000)/nmi_hz); // wait 20 ticks for_each_online_cpu(cpu) { if (!per_cpu(wd_enabled, cpu)) continue; if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { printk(KERN_WARNING "WARNING: CPU#%d: NMI " "appears to be stuck (%d->%d)!\n", cpu, counts[cpu], cpu_pda(cpu)->__nmi_count); per_cpu(wd_enabled, cpu) = 0; atomic_dec(&nmi_active); } } if (!atomic_read(&nmi_active)) { kfree(counts); atomic_set(&nmi_active, -1); endflag = 1; return -1; } endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = lapic_adjust_nmi_hz(1); kfree(counts); return 0; }
/* * Great future plan: * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. * Always point %gs to its beginning */ void __init setup_per_cpu_areas(void) { int i; unsigned long size; #ifdef CONFIG_HOTPLUG_CPU prefill_possible_map(); #endif /* Copy section for each CPU (we discard the original) */ size = PERCPU_ENOUGH_ROOM; printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); for_each_cpu_mask (i, cpu_possible_map) { char *ptr; if (!NODE_DATA(cpu_to_node(i))) { printk("cpu with no node %d, num_online_nodes %d\n", i, num_online_nodes()); ptr = alloc_bootmem(size); } else { ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); } if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); cpu_pda(i)->data_offset = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); }
/** * This initializes a per-CPU area for each CPU. * * TODO: The PDA and per-CPU areas are pretty tightly wound. It should be * possible to make the per-CPU area *be* the PDA, or put another way, * point %GS at the per-CPU area rather than the PDA. All of the PDA's * current contents would become normal per-CPU variables. */ static void __init setup_per_cpu_areas(void) { int i; size_t size; /* * There is an ELF section containing all per-CPU variables * surrounded by __per_cpu_start and __per_cpu_end symbols. * We create a copy of this ELF section for each CPU. */ size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); for_each_cpu_mask (i, cpu_present_map) { char *ptr; ptr = alloc_bootmem_aligned(size, PAGE_SIZE); if (!ptr) panic("Cannot allocate cpu data for CPU %d\n", i); /* * Pre-bias data_offset by subtracting its offset from * __per_cpu_start. Later, per_cpu() will calculate a * per_cpu variable's address with: * * addr = offset_in_percpu_ELF_section + data_offset * = (__per_cpu_start + offset) + (ptr - __per_cpu_start) * = offset + ptr */ cpu_pda(i)->data_offset = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); }
void __init x86_64_start_kernel(char * real_mode_data) { int i; /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); /* Make NULL pointers segfault */ zap_identity_mappings(); for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); load_idt((const struct desc_ptr *)&idt_descr); early_printk("Kernel alive\n"); for (i = 0; i < NR_CPUS; i++) cpu_pda(i) = &boot_cpu_pda[i]; pda_init(0); copy_bootdata(__va(real_mode_data)); #ifdef CONFIG_SMP cpu_set(0, cpu_online_map); #endif start_kernel(); }
void __cpuinit numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); if (cpu_pda(cpu) && node != NUMA_NO_NODE) cpu_pda(cpu)->nodenumber = node; if (cpu_to_node_map) cpu_to_node_map[cpu] = node; else if (per_cpu_offset(cpu)) per_cpu(x86_cpu_to_node_map, cpu) = node; else Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); }
/* * Allocate cpu_pda pointer table and array via alloc_bootmem. */ static void __init setup_cpu_pda_map(void) { char *pda; struct x8664_pda **new_cpu_pda; unsigned long size; int cpu; size = roundup(sizeof(struct x8664_pda), cache_line_size()); /* allocate cpu_pda array and pointer table */ { unsigned long tsize = nr_cpu_ids * sizeof(void *); unsigned long asize = size * (nr_cpu_ids - 1); tsize = roundup(tsize, cache_line_size()); new_cpu_pda = alloc_bootmem(tsize + asize); pda = (char *)new_cpu_pda + tsize; } /* initialize pointer table to static pda's */ for_each_possible_cpu(cpu) { if (cpu == 0) { /* leave boot cpu pda in place */ new_cpu_pda[0] = cpu_pda(0); continue; } new_cpu_pda[cpu] = (struct x8664_pda *)pda; new_cpu_pda[cpu]->in_bootmem = 1; pda += size; } /* point to new pointer table */ _cpu_pda = new_cpu_pda; }
int show_interrupts(struct seq_file *p, void *v) { int i = *(loff_t *) v, j; struct irqaction * action; unsigned long flags; if (i == 0) { seq_printf(p, " "); for_each_online_cpu(j) seq_printf(p, "CPU%-8d",j); seq_putc(p, '\n'); } if (i < NR_IRQS) { spin_lock_irqsave(&irq_desc[i].lock, flags); action = irq_desc[i].action; if (!action) goto skip; seq_printf(p, "%3d: ",i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif seq_printf(p, " %8s", irq_desc[i].chip->name); seq_printf(p, "-%-8s", irq_desc[i].name); seq_printf(p, " %s", action->name); for (action=action->next; action; action = action->next) seq_printf(p, ", %s", action->name); seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); seq_putc(p, '\n'); seq_printf(p, "LOC: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); seq_putc(p, '\n'); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); } return 0; }
int __init check_nmi_watchdog (void) { volatile int endflag = 0; int *counts; int cpu; counts = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!counts) return -1; printk(KERN_INFO "testing NMI watchdog ... "); #ifdef CONFIG_SMP if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); #endif for (cpu = 0; cpu < NR_CPUS; cpu++) counts[cpu] = cpu_pda(cpu)->__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks for_each_online_cpu(cpu) { if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { endflag = 1; printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", cpu, counts[cpu], cpu_pda(cpu)->__nmi_count); nmi_active = 0; lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; nmi_perfctr_msr = 0; kfree(counts); return -1; } } endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to something more reasonable; makes a difference in some configs */ if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; kfree(counts); return 0; }
static int __cpuinit xen_cpu_up(unsigned int cpu) { struct task_struct *idle = idle_task(cpu); int rc; #ifdef CONFIG_X86_64 /* Allocate node local memory for AP pdas */ WARN_ON(cpu == 0); if (cpu > 0) { rc = get_local_pda(cpu); if (rc) return rc; } #endif #ifdef CONFIG_X86_32 init_gdt(cpu); per_cpu(current_task, cpu) = idle; irq_ctx_init(cpu); #else cpu_pda(cpu)->pcurrent = idle; clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); xen_init_lock_cpu(cpu); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; rc = cpu_initialize_context(cpu, idle); if (rc) return rc; if (num_online_cpus() == 1) alternatives_smp_switch(1); rc = xen_smp_intr_init(cpu); if (rc) return rc; rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); BUG_ON(rc); while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { HYPERVISOR_sched_op(SCHEDOP_yield, 0); barrier(); } return 0; }
static void cpu_initialize_context(unsigned int cpu) { /* vcpu_guest_context_t is too large to allocate on the stack. * Hence we allocate statically and protect it with a lock */ vm_page_t m[NPGPTD + 2]; static vcpu_guest_context_t ctxt; vm_offset_t boot_stack; vm_offset_t newPTD; vm_paddr_t ma[NPGPTD]; int i; /* * Page 0,[0-3] PTD * Page 1, [4] boot stack * Page [5] PDPT * */ for (i = 0; i < NPGPTD + 2; i++) { m[i] = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); pmap_zero_page(m[i]); } boot_stack = kmem_alloc_nofault(kernel_map, PAGE_SIZE); newPTD = kmem_alloc_nofault(kernel_map, NPGPTD * PAGE_SIZE); ma[0] = VM_PAGE_TO_MACH(m[0])|PG_V; #ifdef PAE pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); for (i = 0; i < NPGPTD; i++) { ((vm_paddr_t *)boot_stack)[i] = ma[i] = VM_PAGE_TO_MACH(m[i])|PG_V; } #endif /* * Copy cpu0 IdlePTD to new IdlePTD - copying only * kernel mappings */ pmap_qenter(newPTD, m, 4); memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), nkpt*sizeof(vm_paddr_t)); pmap_qremove(newPTD, 4); kmem_free(kernel_map, newPTD, 4 * PAGE_SIZE); /* * map actual idle stack to boot_stack */ pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); xen_pgdpt_pin(VM_PAGE_TO_MACH(m[NPGPTD + 1])); rw_wlock(&pvh_global_lock); for (i = 0; i < 4; i++) { int pdir = (PTDPTDI + i) / NPDEPG; int curoffset = (PTDPTDI + i) % NPDEPG; xen_queue_pt_update((vm_paddr_t) ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), ma[i]); } PT_UPDATES_FLUSH(); rw_wunlock(&pvh_global_lock); memset(&ctxt, 0, sizeof(ctxt)); ctxt.flags = VGCF_IN_KERNEL; ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.user_regs.eip = (unsigned long)init_secondary; ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); smp_trap_init(ctxt.trap_ctxt); ctxt.ldt_ents = 0; ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); ctxt.gdt_ents = 512; #ifdef __i386__ ctxt.user_regs.esp = boot_stack + PAGE_SIZE; ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.kernel_sp = boot_stack + PAGE_SIZE; ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; ctxt.ctrlreg[3] = VM_PAGE_TO_MACH(m[NPGPTD + 1]); #else /* __x86_64__ */ ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); ctxt.kernel_sp = idle->thread.rsp0; ctxt.event_callback_eip = (unsigned long)hypervisor_callback; ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; ctxt.syscall_callback_eip = (unsigned long)system_call; ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); #endif printf("gdtpfn=%lx pdptpfn=%lx\n", ctxt.gdt_frames[0], ctxt.ctrlreg[3] >> PAGE_SHIFT); PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt)); DELAY(3000); PANIC_IF(HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)); }
void __init x86_64_init_pda(void) { _cpu_pda = __cpu_pda; cpu_pda(0) = &_boot_cpu_pda; pda_init(0); }
void __init x86_64_start_kernel(char * real_mode_data) { int i; /* * Build-time sanity checks on the kernel image and module * area mappings. (these are purely build-time and produce no code) */ BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START); BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE); BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE); BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0); BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); /* Make NULL pointers segfault */ zap_identity_mappings(); /* Cleanup the over mapped high alias */ cleanup_highmap(); for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { #ifdef CONFIG_EARLY_PRINTK set_intr_gate(i, &early_idt_handlers[i]); #else set_intr_gate(i, early_idt_handler); #endif } load_idt((const struct desc_ptr *)&idt_descr); early_printk("Kernel alive\n"); for (i = 0; i < NR_CPUS; i++) cpu_pda(i) = &boot_cpu_pda[i]; pda_init(0); copy_bootdata(__va(real_mode_data)); reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; unsigned long ramdisk_end = ramdisk_image + ramdisk_size; reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif reserve_ebda_region(); reserve_setup_data(); /* * At this point everything still needed from the boot loader * or BIOS or kernel text should be early reserved or marked not * RAM in e820. All other memory is free game. */ start_kernel(); }