/* * text_poke_early - Update instructions on a live kernel at boot time * @addr: address to modify * @opcode: source of the copy * @len: length to copy * * When you use this code to patch more than one byte of an instruction * you need to make sure that other CPUs cannot execute this code in parallel. * Also no thread must be currently preempted in the middle of these * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. * * This routine is called with local interrupt disabled. */ static void *__init text_poke_early(void *addr, const void *opcode, size_t len) { memcpy(addr, opcode, len); sync_core(); return addr; }
static force_inline void do_vgettimeofday(struct timeval * tv) { long sequence, t; unsigned long sec, usec; do { sequence = __vxtime_sequence[1]; rmb(); sec = __xtime.tv_sec; usec = __xtime.tv_usec + (__jiffies - __wall_jiffies) * (1000000 / HZ); switch (__vxtime.mode) { case VXTIME_TSC: sync_core(); rdtscll(t); usec += (((t - __vxtime.last_tsc) * __vxtime.tsc_quot) >> 32); break; case VXTIME_HPET: usec += ((readl(fix_to_virt(VSYSCALL_HPET) + 0xf0) - __vxtime.last) * __vxtime.quot) >> 32; break; } rmb(); } while (sequence != __vxtime_sequence[0]); tv->tv_sec = sec + usec / 1000000; tv->tv_usec = usec % 1000000; }
/* * Mark it noinline so we make sure it is not unrolled. * Wait until value is reached. */ static noinline void tsc_barrier(long wait_cpu, int value) { sync_core(); per_cpu(wait_sync, smp_processor_id())--; do { barrier(); } while (unlikely(per_cpu(wait_sync, wait_cpu) > value)); __get_cpu_var(tsc_count) = get_cycles_sync(); }
int cch_allocate(struct gru_context_configuration_handle *cch) { int ret; cch->opc = CCHOP_ALLOCATE; start_instruction(cch); ret = wait_instruction_complete(cch, cchop_allocate); /* * Stop speculation into the GSEG being mapped by the previous ALLOCATE. * The GSEG memory does not exist until the ALLOCATE completes. */ sync_core(); return ret; }
int cch_deallocate(struct gru_context_configuration_handle *cch) { int ret; cch->opc = CCHOP_DEALLOCATE; start_instruction(cch); ret = wait_instruction_complete(cch, cchop_deallocate); /* * Stop speculation into the GSEG being unmapped by the previous * DEALLOCATE. */ sync_core(); return ret; }
int cch_allocate(struct gru_context_configuration_handle *cch) { int ret; cch->opc = CCHOP_ALLOCATE; start_instruction(cch); ret = wait_instruction_complete(cch, cchop_allocate); /* */ sync_core(); return ret; }
/*----------------------------------------------------------------------*/ int gru_get_cb_exception_detail(void *cb, struct control_block_extended_exc_detail *excdet) { struct gru_control_block_extended *cbe; struct gru_thread_state *kgts = NULL; unsigned long off; int cbrnum, bid; /* * Locate kgts for cb. This algorithm is SLOW but * this function is rarely called (ie., almost never). * Performance does not matter. */ for_each_possible_blade(bid) { if (!gru_base[bid]) break; kgts = gru_base[bid]->bs_kgts; if (!kgts || !kgts->ts_gru) continue; off = cb - kgts->ts_gru->gs_gru_base_vaddr; if (off < GRU_SIZE) break; kgts = NULL; } BUG_ON(!kgts); cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); cbe = get_cbe(GRUBASE(cb), cbrnum); gru_flush_cache(cbe); /* CBE not coherent */ sync_core(); excdet->opc = cbe->opccpy; excdet->exopc = cbe->exopccpy; excdet->ecause = cbe->ecause; excdet->exceptdet0 = cbe->idef1upd; excdet->exceptdet1 = cbe->idef3upd; gru_flush_cache(cbe); return 0; }
int gru_get_cb_exception_detail(void *cb, struct control_block_extended_exc_detail *excdet) { struct gru_control_block_extended *cbe; struct gru_thread_state *kgts = NULL; unsigned long off; int cbrnum, bid; /* */ for_each_possible_blade(bid) { if (!gru_base[bid]) break; kgts = gru_base[bid]->bs_kgts; if (!kgts || !kgts->ts_gru) continue; off = cb - kgts->ts_gru->gs_gru_base_vaddr; if (off < GRU_SIZE) break; kgts = NULL; } BUG_ON(!kgts); cbrnum = thread_cbr_number(kgts, get_cb_number(cb)); cbe = get_cbe(GRUBASE(cb), cbrnum); gru_flush_cache(cbe); /* */ sync_core(); excdet->opc = cbe->opccpy; excdet->exopc = cbe->exopccpy; excdet->ecause = cbe->ecause; excdet->exceptdet0 = cbe->idef1upd; excdet->exceptdet1 = cbe->idef3upd; gru_flush_cache(cbe); return 0; }
static void __init synchronize_tsc_ap (void) { int i; /* * Not every cpu is online at the time * this gets called, so we first wait for the BP to * finish SMP initialization: */ while (!atomic_read(&tsc_start_flag)) mb(); for (i = 0; i < NR_LOOPS; i++) { atomic_inc(&tsc_count_start); while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb(); sync_core(); rdtscll(tsc_values[smp_processor_id()]); if (i == NR_LOOPS-1) write_tsc(0, 0); atomic_inc(&tsc_count_stop); while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); } }
static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; /* Unmask CPUID levels if masked: */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); c->cpuid_level = cpuid_eax(0); get_cpu_cap(c); } } if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) { unsigned lower_word; wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* Required by the SDM */ sync_core(); rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * * A race condition between speculative fetches and invalidating * a large page. This is worked around in microcode, but we * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && c->microcode < 0x20e) { printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); } #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #else /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; #endif /* CPUID workaround for 0F33/0F34 CPU */ if (c->x86 == 0xF && c->x86_model == 0x3 && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) c->x86_phys_bits = 36; /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate * with P/T states and does not stop in deep C-states. * * It is also reliable across cores and sockets. (but not across * cabinets - we turn it off in that case explicitly.) */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); if (!check_tsc_unstable()) sched_clock_stable = 1; } /* * There is a known erratum on Pentium III and Core Solo * and Core Duo CPUs. * " Page with PAT set to WC while associated MTRR is UC * may consolidate to UC " * Because of this erratum, it is better to stick with * setting WC in MTRR rather than using PAT on these CPUs. * * Enable PAT WC only on P4, Core 2 or later CPUs. */ if (c->x86 == 6 && c->x86_model < 15) clear_cpu_cap(c, X86_FEATURE_PAT); #ifdef CONFIG_KMEMCHECK /* * P4s have a "fast strings" feature which causes single- * stepping REP instructions to only generate a #DB on * cache-line boundaries. * * Ingo Molnar reported a Pentium D (model 6) and a Xeon * (model 2) with the same problem. */ if (c->x86 == 15) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { printk(KERN_INFO "kmemcheck: Disabling fast string operations\n"); misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); } } #endif /* * If fast string is not enabled in IA32_MISC_ENABLE for any reason, * clear the fast string and enhanced fast string CPU capabilities. */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { printk(KERN_INFO "Disabled fast string operations\n"); setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); setup_clear_cpu_cap(X86_FEATURE_ERMS); } } }
static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; /* Unmask CPUID levels if masked: */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { c->cpuid_level = cpuid_eax(0); get_cpu_cap(c); } } if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) { unsigned lower_word; wrmsr(MSR_IA32_UCODE_REV, 0, 0); /* Required by the SDM */ sync_core(); rdmsr(MSR_IA32_UCODE_REV, lower_word, c->microcode); } /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * * A race condition between speculative fetches and invalidating * a large page. This is worked around in microcode, but we * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); } #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #else /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; #endif /* CPUID workaround for 0F33/0F34 CPU */ if (c->x86 == 0xF && c->x86_model == 0x3 && (c->x86_mask == 0x3 || c->x86_mask == 0x4)) c->x86_phys_bits = 36; /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate * with P/T states and does not stop in deep C-states. * * It is also reliable across cores and sockets. (but not across * cabinets - we turn it off in that case explicitly.) */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); if (!check_tsc_unstable()) set_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ if (c->x86 == 6) { switch (c->x86_model) { case 0x27: /* Penwell */ case 0x35: /* Cloverview */ case 0x4a: /* Merrifield */ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); break; default: break; } } /* * There is a known erratum on Pentium III and Core Solo * and Core Duo CPUs. * " Page with PAT set to WC while associated MTRR is UC * may consolidate to UC " * Because of this erratum, it is better to stick with * setting WC in MTRR rather than using PAT on these CPUs. * * Enable PAT WC only on P4, Core 2 or later CPUs. */ if (c->x86 == 6 && c->x86_model < 15) clear_cpu_cap(c, X86_FEATURE_PAT); #ifdef CONFIG_KMEMCHECK /* * P4s have a "fast strings" feature which causes single- * stepping REP instructions to only generate a #DB on * cache-line boundaries. * * Ingo Molnar reported a Pentium D (model 6) and a Xeon * (model 2) with the same problem. */ if (c->x86 == 15) if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0) pr_info("kmemcheck: Disabling fast string operations\n"); #endif /* * If fast string is not enabled in IA32_MISC_ENABLE for any reason, * clear the fast string and enhanced fast string CPU capabilities. */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { pr_info("Disabled fast string operations\n"); setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); setup_clear_cpu_cap(X86_FEATURE_ERMS); } } /* * Intel Quark Core DevMan_001.pdf section 6.4.11 * "The operating system also is required to invalidate (i.e., flush) * the TLB when any changes are made to any of the page table entries. * The operating system must reload CR3 to cause the TLB to be flushed" * * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE * to be modified. */ if (c->x86 == 5 && c->x86_model == 9) { pr_info("Disabling PGE capability bit\n"); setup_clear_cpu_cap(X86_FEATURE_PGE); } if (c->cpuid_level >= 0x00000001) { u32 eax, ebx, ecx, edx; cpuid(0x00000001, &eax, &ebx, &ecx, &edx); /* * If HTT (EDX[28]) is set EBX[16:23] contain the number of * apicids which are reserved per package. Store the resulting * shift value for the package management code. */ if (edx & (1U << 28)) c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); }
static void __init synchronize_tsc_bp (void) { int i; unsigned long long t0; unsigned long long sum, avg; long long delta; long one_usec; int buggy = 0; printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",num_booting_cpus()); one_usec = cpu_khz; atomic_set(&tsc_start_flag, 1); wmb(); /* * We loop a few times to get a primed instruction cache, * then the last pass is more or less synchronized and * the BP and APs set their cycle counters to zero all at * once. This reduces the chance of having random offsets * between the processors, and guarantees that the maximum * delay between the cycle counters is never bigger than * the latency of information-passing (cachelines) between * two CPUs. */ for (i = 0; i < NR_LOOPS; i++) { /* * all APs synchronize but they loop on '== num_cpus' */ while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) mb(); atomic_set(&tsc_count_stop, 0); wmb(); /* * this lets the APs save their current TSC: */ atomic_inc(&tsc_count_start); sync_core(); rdtscll(tsc_values[smp_processor_id()]); /* * We clear the TSC in the last loop: */ if (i == NR_LOOPS-1) write_tsc(0, 0); /* * Wait for all APs to leave the synchronization point: */ while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) mb(); atomic_set(&tsc_count_start, 0); wmb(); atomic_inc(&tsc_count_stop); } sum = 0; for (i = 0; i < NR_CPUS; i++) { if (cpu_isset(i, cpu_callout_map)) { t0 = tsc_values[i]; sum += t0; } } avg = sum / num_booting_cpus(); sum = 0; for (i = 0; i < NR_CPUS; i++) { if (!cpu_isset(i, cpu_callout_map)) continue; delta = tsc_values[i] - avg; if (delta < 0) delta = -delta; /* * We report bigger than 2 microseconds clock differences. */ if (delta > 2*one_usec) { long realdelta; if (!buggy) { buggy = 1; printk("\n"); } realdelta = delta / one_usec; if (tsc_values[i] < avg) realdelta = -realdelta; printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", i, realdelta); } sum += delta; } if (!buggy) printk("passed.\n"); }