void Blob<double>::ToProto(BlobProto* proto, bool write_diff) const { proto->clear_shape(); for (int_tp i = 0; i < shape_.size(); ++i) { proto->mutable_shape()->add_dim(shape_[i]); } proto->clear_double_data(); proto->clear_double_diff(); const double* data_vec = cpu_data(); for (int_tp i = 0; i < count_; ++i) { proto->add_double_data(data_vec[i]); } if (write_diff) { const double* diff_vec = cpu_diff(); for (int_tp i = 0; i < count_; ++i) { proto->add_double_diff(diff_vec[i]); } } }
static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; unsigned int i; if (skip_ioapic_setup) { char *m = (max_cpus == 0) ? "The nosmp parameter is incompatible with Xen; " \ "use Xen dom0_max_vcpus=1 parameter" : "The noapic parameter is incompatible with Xen"; xen_raw_printk(m); panic(m); } xen_init_lock_cpu(0); smp_store_boot_cpu_info(); cpu_data(0).x86_max_cores = 1; for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } set_cpu_sibling_map(0); if (xen_smp_intr_init(0)) BUG(); if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) panic("could not allocate xen_cpu_initialized_map\n"); cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) continue; set_cpu_possible(cpu, false); } for_each_possible_cpu(cpu) set_cpu_present(cpu, true); }
static int __init padlock_init(void) { int ret; struct cpuinfo_x86 *c = &cpu_data(0); if (!cpu_has_xcrypt) { printk(KERN_NOTICE PFX "VIA PadLock not detected.\n"); return -ENODEV; } if (!cpu_has_xcrypt_enabled) { printk(KERN_NOTICE PFX "VIA PadLock detected, but not enabled. Hmm, strange...\n"); return -ENODEV; } if ((ret = crypto_register_alg(&aes_alg))) goto aes_err; if ((ret = crypto_register_alg(&ecb_aes_alg))) goto ecb_aes_err; if ((ret = crypto_register_alg(&cbc_aes_alg))) goto cbc_aes_err; printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); if (c->x86 == 6 && c->x86_model == 15 && c->x86_mask == 2) { ecb_fetch_blocks = MAX_ECB_FETCH_BLOCKS; cbc_fetch_blocks = MAX_CBC_FETCH_BLOCKS; printk(KERN_NOTICE PFX "VIA Nano stepping 2 detected: enabling workaround.\n"); } out: return ret; cbc_aes_err: crypto_unregister_alg(&ecb_aes_alg); ecb_aes_err: crypto_unregister_alg(&aes_alg); aes_err: printk(KERN_ERR PFX "VIA PadLock AES initialization failed.\n"); goto out; }
static int via_rng_init(struct hwrng *rng) { struct cpuinfo_x86 *c = &cpu_data(0); u32 lo, hi, old_lo; /* Control the RNG via MSR. Tread lightly and pay very close * close attention to values written, as the reserved fields * are documented to be "undefined and unpredictable"; but it * does not say to write them as zero, so I make a guess that * we restore the values we find in the register. */ rdmsr(MSR_VIA_RNG, lo, hi); old_lo = lo; lo &= ~(0x7f << VIA_STRFILT_CNT_SHIFT); lo &= ~VIA_XSTORE_CNT_MASK; lo &= ~(VIA_STRFILT_ENABLE | VIA_STRFILT_FAIL | VIA_RAWBITS_ENABLE); lo |= VIA_RNG_ENABLE; lo |= VIA_NOISESRC1; /* Enable secondary noise source on CPUs where it is present. */ /* Nehemiah stepping 8 and higher */ if ((c->x86_model == 9) && (c->x86_mask > 7)) lo |= VIA_NOISESRC2; /* Esther */ if (c->x86_model >= 10) lo |= VIA_NOISESRC2; if (lo != old_lo) wrmsr(MSR_VIA_RNG, lo, hi); /* perhaps-unnecessary sanity check; remove after testing if unneeded */ rdmsr(MSR_VIA_RNG, lo, hi); if ((lo & VIA_RNG_ENABLE) == 0) { printk(KERN_ERR PFX "cannot enable VIA C3 RNG, aborting\n"); return -ENODEV; } return 0; }
/* * Some BIOS implementations switch to C3 in the published C2 state. * This seems to be a common problem on AMD boxen, but other vendors * are affected too. We pick the most conservative approach: we assume * that the local APIC stops in both C2 and C3. */ static void lapic_timer_check_state(int state, struct acpi_processor *pr, struct acpi_processor_cx *cx) { struct acpi_processor_power *pwr = &pr->power; u8 type = local_apic_timer_c2_ok ? ACPI_STATE_C3 : ACPI_STATE_C2; if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) return; /* * Check, if one of the previous states already marked the lapic * unstable */ if (pwr->timer_broadcast_on_state < state) return; if (cx->type >= type) pr->power.timer_broadcast_on_state = state; }
void Blob<Dtype>::ToProto(BlobProto* proto, bool write_diff) const { proto->set_num(num_); proto->set_channels(channels_); proto->set_height(height_); proto->set_width(width_); proto->set_depth(depth_); proto->clear_data(); proto->clear_diff(); const Dtype* data_vec = cpu_data(); for (int i = 0; i < count_; ++i) { proto->add_data(data_vec[i]); } if (write_diff) { const Dtype* diff_vec = cpu_diff(); for (int i = 0; i < count_; ++i) { proto->add_diff(diff_vec[i]); } } }
static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu; struct cpuinfo_x86 *c; if (!capable(CAP_SYS_RAWIO)) return -EPERM; cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) return -EIO; /* MSR not supported */ return 0; }
void __init smp_store_cpu_info(int id) { int cpu_node; /* multiplier and counter set by smp_setup_percpu_timer() */ cpu_data(id).udelay_val = loops_per_jiffy; cpu_find_by_mid(id, &cpu_node); cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); cpu_data(id).pgcache_size = 0; cpu_data(id).pte_cache[0] = NULL; cpu_data(id).pte_cache[1] = NULL; cpu_data(id).pgd_cache = NULL; cpu_data(id).idle_volume = 1; }
static void print_mce(struct mce *m) { int ret = 0; pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); if (m->ip) { pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", m->cs, m->ip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); pr_cont("\n"); } pr_emerg(HW_ERR "TSC %llx ", m->tsc); if (m->addr) pr_cont("ADDR %llx ", m->addr); if (m->misc) pr_cont("MISC %llx ", m->misc); pr_cont("\n"); /* * Note this output is parsed by external tools and old fields * should not be changed. */ pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, cpu_data(m->extcpu).microcode); /* * Print out human-readable details about the MCE error, * (if the CPU has an implementation for that) */ ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); if (ret == NOTIFY_STOP) return; pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); }
static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu; struct cpuinfo_x86 *c; int ret = 0; lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { ret = -ENXIO; /* No such CPU */ goto out; } c = &cpu_data(cpu); if (c->cpuid_level < 0) ret = -EIO; /* CPUID not supported */ out: unlock_kernel(); return ret; }
int amd_set_subcaches(int cpu, int mask) { static unsigned int reset, ban; struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); unsigned int reg; int cuid = 0; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) return -EINVAL; /* if necessary, collect reset state of L3 partitioning and BAN mode */ if (reset == 0) { pci_read_config_dword(nb->link, 0x1d4, &reset); pci_read_config_dword(nb->misc, 0x1b8, &ban); ban &= 0x180000; } /* deactivate BAN mode if any subcaches are to be disabled */ if (mask != 0xf) { pci_read_config_dword(nb->misc, 0x1b8, ®); pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); } #ifdef CONFIG_SMP cuid = cpu_data(cpu).compute_unit_id; #endif mask <<= 4 * cuid; mask |= (0xf ^ (1 << cuid)) << 26; pci_write_config_dword(nb->link, 0x1d4, mask); /* reset BAN mode if L3 partitioning returned to reset state */ pci_read_config_dword(nb->link, 0x1d4, ®); if (reg == reset) { pci_read_config_dword(nb->misc, 0x1b8, ®); reg &= ~0x180000; pci_write_config_dword(nb->misc, 0x1b8, reg | ban); } return 0; }
/* * Assume __initcall executes before all user space. Hopefully kmod * doesn't violate that. We'll find out if it does. */ static void vsyscall_set_cpu(int cpu) { unsigned long d; unsigned long node = 0; #ifdef CONFIG_NUMA node = cpu_to_node(cpu); #endif if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* * Store cpu number in limit so that it can be loaded quickly * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node) */ d = 0x0f40000000000ULL; d |= cpu; d |= (node & 0xf) << 12; d |= (node >> 4) << 48; write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); }
void __init smp_cpus_done(unsigned int max_cpus) { extern void smp4m_smp_done(void); extern void smp4d_smp_done(void); unsigned long bogosum = 0; int cpu, num = 0; for_each_online_cpu(cpu) { num++; bogosum += cpu_data(cpu).udelay_val; } printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n", num, bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); switch(sparc_cpu_model) { case sun4m: smp4m_smp_done(); break; case sun4d: smp4d_smp_done(); break; case sparc_leon: leon_smp_done(); break; case sun4e: printk("SUN4E\n"); BUG(); break; case sun4u: printk("SUN4U\n"); BUG(); break; default: printk("UNKNOWN!\n"); BUG(); break; } }
static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file_inode(file)); struct cpuinfo_x86 *c; struct msr_session_info *myinfo; if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) return -EIO; /* MSR not supported */ myinfo = kmalloc(sizeof(*myinfo), GFP_KERNEL); if (!myinfo) return -ENOMEM; myinfo->rawio_allowed = capable(CAP_SYS_RAWIO); file->private_data = myinfo; return 0; }
Dtype Blob<Dtype>::sumsq_data() const { Dtype sumsq; const Dtype* data; if (!data_) { return 0; } switch (data_->head()) { case SyncedMemory::HEAD_AT_CPU: { data = cpu_data(); sumsq = caffe_cpu_dot(count_, data, data); break; } case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: { #ifndef CPU_ONLY data = gpu_data(); if (device_->backend() == Backend::BACKEND_CUDA) { #ifdef USE_CUDA caffe_gpu_dot(count_, data, data, &sumsq); #endif } else { #ifdef USE_GREENTEA greentea_gpu_dot(device_->id(), count_, (cl_mem) data, 0, (cl_mem) data, 0, &sumsq); #endif } #else NO_GPU; #endif break; } case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL)<< "Unknown SyncedMemory head state: " << data_->head(); } return sumsq; }
static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; xen_init_lock_cpu(0); smp_store_cpu_info(0); cpu_data(0).x86_max_cores = 1; set_cpu_sibling_map(0); if (xen_smp_intr_init(0)) BUG(); if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) panic("could not allocate xen_cpu_initialized_map\n"); cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); /* Restrict the possible_map according to max_cpus. */ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) continue; set_cpu_possible(cpu, false); } for_each_possible_cpu (cpu) { struct task_struct *idle; if (cpu == 0) continue; idle = fork_idle(cpu); if (IS_ERR(idle)) panic("failed fork for CPU %d", cpu); set_cpu_present(cpu, true); } }
Dtype Blob<Dtype>::asum_data() const { if (!data_) { return 0; } switch (data_->head()) { case SyncedMemory::HEAD_AT_CPU: return caffe_cpu_asum(count_, cpu_data()); case SyncedMemory::HEAD_AT_GPU: case SyncedMemory::SYNCED: #ifndef CPU_ONLY { Dtype asum; caffe_gpu_asum(count_, gpu_data(), &asum); return asum; } #else NO_GPU; #endif case SyncedMemory::UNINITIALIZED: return 0; default: LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head(); } return 0; }
int mlx4_enable_wc(void) { struct cpuinfo_x86 *c = &cpu_data(0); int ret; if (wc_enabled) return 0; if (!cpu_has(c, X86_FEATURE_MSR) || !cpu_has(c, X86_FEATURE_PAT)) { printk(KERN_INFO "ib_mlx4: WC not available" " on this processor\n"); return -ENOSYS; } if (have_wc_errata()) return -ENOSYS; if (!(ret = read_and_modify_pat())) wc_enabled = 1; else printk(KERN_INFO "ib_mlx4: failed to enable WC\n"); return ret ? -EIO : 0; }
inline Dtype data_at(const vector<int>& index) const { return cpu_data()[offset(index)]; }
inline Dtype data_at(const int n, const int c, const int h, const int w) const { return cpu_data()[offset(n, c, h, w)]; }
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) { unsigned int i; unsigned int valid_states = 0; unsigned int cpu = policy->cpu; struct acpi_cpufreq_data *data; unsigned int result = 0; struct cpuinfo_x86 *c = &cpu_data(policy->cpu); struct acpi_processor_performance *perf; #ifdef CONFIG_SMP static int blacklisted; #endif pr_debug("acpi_cpufreq_cpu_init\n"); #ifdef CONFIG_SMP if (blacklisted) return blacklisted; blacklisted = acpi_cpufreq_blacklist(c); if (blacklisted) return blacklisted; #endif data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; if (!zalloc_cpumask_var(&data->freqdomain_cpus, GFP_KERNEL)) { result = -ENOMEM; goto err_free; } perf = per_cpu_ptr(acpi_perf_data, cpu); data->acpi_perf_cpu = cpu; policy->driver_data = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; result = acpi_processor_register_performance(perf, cpu); if (result) goto err_free_mask; policy->shared_type = perf->shared_type; /* * Will let policy->cpus know about dependency only when software * coordination is required. */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { cpumask_copy(policy->cpus, perf->shared_cpu_map); } cpumask_copy(data->freqdomain_cpus, perf->shared_cpu_map); #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); if (bios_with_sw_any_bug && !policy_is_shared(policy)) { policy->shared_type = CPUFREQ_SHARED_TYPE_ALL; cpumask_copy(policy->cpus, topology_core_cpumask(cpu)); } if (check_amd_hwpstate_cpu(cpu) && !acpi_pstate_strict) { cpumask_clear(policy->cpus); cpumask_set_cpu(cpu, policy->cpus); cpumask_copy(data->freqdomain_cpus, topology_sibling_cpumask(cpu)); policy->shared_type = CPUFREQ_SHARED_TYPE_HW; pr_info_once(PFX "overriding BIOS provided _PSD data\n"); } #endif /* capability check */ if (perf->state_count <= 1) { pr_debug("No P-States\n"); result = -ENODEV; goto err_unreg; } if (perf->control_register.space_id != perf->status_register.space_id) { result = -ENODEV; goto err_unreg; } switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && boot_cpu_data.x86 == 0xf) { pr_debug("AMD K8 systems must use native drivers.\n"); result = -ENODEV; goto err_unreg; } pr_debug("SYSTEM IO addr space\n"); data->cpu_feature = SYSTEM_IO_CAPABLE; break; case ACPI_ADR_SPACE_FIXED_HARDWARE: pr_debug("HARDWARE addr space\n"); if (check_est_cpu(cpu)) { data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE; break; } if (check_amd_hwpstate_cpu(cpu)) { data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE; break; } result = -ENODEV; goto err_unreg; default: pr_debug("Unknown addr space %d\n", (u32) (perf->control_register.space_id)); result = -ENODEV; goto err_unreg; } data->freq_table = kzalloc(sizeof(*data->freq_table) * (perf->state_count+1), GFP_KERNEL); if (!data->freq_table) { result = -ENOMEM; goto err_unreg; } /* detect transition latency */ policy->cpuinfo.transition_latency = 0; for (i = 0; i < perf->state_count; i++) { if ((perf->states[i].transition_latency * 1000) > policy->cpuinfo.transition_latency) policy->cpuinfo.transition_latency = perf->states[i].transition_latency * 1000; } /* Check for high latency (>20uS) from buggy BIOSes, like on T42 */ if (perf->control_register.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE && policy->cpuinfo.transition_latency > 20 * 1000) { policy->cpuinfo.transition_latency = 20 * 1000; printk_once(KERN_INFO "P-state transition latency capped at 20 uS\n"); } /* table init */ for (i = 0; i < perf->state_count; i++) { if (i > 0 && perf->states[i].core_frequency >= data->freq_table[valid_states-1].frequency / 1000) continue; data->freq_table[valid_states].driver_data = i; data->freq_table[valid_states].frequency = perf->states[i].core_frequency * 1000; valid_states++; } data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END; perf->state = 0; result = cpufreq_table_validate_and_show(policy, data->freq_table); if (result) goto err_freqfree; if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq) printk(KERN_WARNING FW_WARN "P-state 0 is not max freq\n"); switch (perf->control_register.space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: /* * The core will not set policy->cur, because * cpufreq_driver->get is NULL, so we need to set it here. * However, we have to guess it, because the current speed is * unknown and not detectable via IO ports. */ policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu); break; case ACPI_ADR_SPACE_FIXED_HARDWARE: acpi_cpufreq_driver.get = get_cur_freq_on_cpu; break; default: break; } /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); pr_debug("CPU%u - ACPI performance management activated.\n", cpu); for (i = 0; i < perf->state_count; i++) pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n", (i == perf->state ? '*' : ' '), i, (u32) perf->states[i].core_frequency, (u32) perf->states[i].power, (u32) perf->states[i].transition_latency); /* * the first call to ->target() should result in us actually * writing something to the appropriate registers. */ data->resume = 1; return result; err_freqfree: kfree(data->freq_table); err_unreg: acpi_processor_unregister_performance(cpu); err_free_mask: free_cpumask_var(data->freqdomain_cpus); err_free: kfree(data); policy->driver_data = NULL; return result; }
static int check_amd_hwpstate_cpu(unsigned int cpuid) { struct cpuinfo_x86 *cpu = &cpu_data(cpuid); return cpu_has(cpu, X86_FEATURE_HW_PSTATE); }
static int check_est_cpu(unsigned int cpuid) { struct cpuinfo_x86 *cpu = &cpu_data(cpuid); return cpu_has(cpu, X86_FEATURE_EST); }
static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, unsigned int *high_freq) { u32 msr_lo, msr_hi; u32 save_lo, save_hi; u32 eax, ebx, ecx, edx; u32 try_hi; struct cpuinfo_x86 *c = &cpu_data(0); if (!low_freq || !high_freq) return -EINVAL; if (cpu_has(c, X86_FEATURE_LRTI)) { /* if the LongRun Table Interface is present, the * detection is a bit easier: * For minimum frequency, read out the maximum * level (msr_hi), write that into "currently * selected level", and read out the frequency. * For maximum frequency, read out level zero. */ /* minimum */ rdmsr(MSR_TMTA_LRTI_READOUT, msr_lo, msr_hi); wrmsr(MSR_TMTA_LRTI_READOUT, msr_hi, msr_hi); rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); *low_freq = msr_lo * 1000; /* to kHz */ /* maximum */ wrmsr(MSR_TMTA_LRTI_READOUT, 0, msr_hi); rdmsr(MSR_TMTA_LRTI_VOLT_MHZ, msr_lo, msr_hi); *high_freq = msr_lo * 1000; /* to kHz */ dprintk("longrun table interface told %u - %u kHz\n", *low_freq, *high_freq); if (*low_freq > *high_freq) *low_freq = *high_freq; return 0; } /* set the upper border to the value determined during TSC init */ *high_freq = (cpu_khz / 1000); *high_freq = *high_freq * 1000; dprintk("high frequency is %u kHz\n", *high_freq); /* get current borders */ rdmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); save_lo = msr_lo & 0x0000007F; save_hi = msr_hi & 0x0000007F; /* if current perf_pctg is larger than 90%, we need to decrease the * upper limit to make the calculation more accurate. */ cpuid(0x80860007, &eax, &ebx, &ecx, &edx); /* try decreasing in 10% steps, some processors react only * on some barrier values */ for (try_hi = 80; try_hi > 0 && ecx > 90; try_hi -= 10) { /* set to 0 to try_hi perf_pctg */ msr_lo &= 0xFFFFFF80; msr_hi &= 0xFFFFFF80; msr_hi |= try_hi; wrmsr(MSR_TMTA_LONGRUN_CTRL, msr_lo, msr_hi); /* read out current core MHz and current perf_pctg */ cpuid(0x80860007, &eax, &ebx, &ecx, &edx); /* restore values */ wrmsr(MSR_TMTA_LONGRUN_CTRL, save_lo, save_hi); } dprintk("percentage is %u %%, freq is %u MHz\n", ecx, eax); /* performance_pctg = (current_freq - low_freq)/(high_freq - low_freq) * eqals * low_freq * (1 - perf_pctg) = (cur_freq - high_freq * perf_pctg) * * high_freq * perf_pctg is stored tempoarily into "ebx". */ ebx = (((cpu_khz / 1000) * ecx) / 100); /* to MHz */ if ((ecx > 95) || (ecx == 0) || (eax < ebx)) return -EIO; edx = ((eax - ebx) * 100) / (100 - ecx); *low_freq = edx * 1000; /* back to kHz */ dprintk("low frequency is %u kHz\n", *low_freq); if (*low_freq > *high_freq) *low_freq = *high_freq; return 0; }
/* * Report back to the Boot Processor during boot time or to the caller processor * during CPU online. */ static void __cpuinit smp_callin(void) { int cpuid, phys_id; unsigned long timeout; /* * If waken up by an INIT in an 82489DX configuration * we may get here before an INIT-deassert IPI reaches * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. * * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. */ cpuid = smp_processor_id(); if (apic->wait_for_init_deassert && cpuid != 0) apic->wait_for_init_deassert(&init_deasserted); /* * (This works even if the APIC is not enabled.) */ phys_id = read_apic_id(); if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, phys_id, cpuid); } pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); /* * STARTUP IPIs are fragile beasts as they might sometimes * trigger some glue motherboard logic. Complete APIC bus * silence for 1 second, this overestimates the time the * boot CPU is spending to send the up to 2 STARTUP IPIs * by a factor of two. This should be enough. */ /* * Waiting 2s total for startup (udelay is not yet working) */ timeout = jiffies + 2*HZ; while (time_before(jiffies, timeout)) { /* * Has the boot CPU finished it's STARTUP sequence? */ if (cpumask_test_cpu(cpuid, cpu_callout_mask)) break; cpu_relax(); } if (!time_before(jiffies, timeout)) { panic("%s: CPU%d started up but did not get a callout!\n", __func__, cpuid); } /* * the boot CPU has finished the init stage and is spinning * on callin_map until we finish. We are free to set up this * CPU, first the APIC. (this is probably redundant on most * boards) */ pr_debug("CALLIN, before setup_local_APIC()\n"); if (apic->smp_callin_clear_local_apic) apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); /* * Need to setup vector mappings before we enable interrupts. */ setup_vector_irq(smp_processor_id()); /* * Save our processor parameters. Note: this information * is needed for clock calibration. */ smp_store_cpu_info(cpuid); /* * Get our bogomips. * Update loops_per_jiffy in cpu_data. Previous call to * smp_store_cpu_info() stored a value that is close but not as * accurate as the value just calculated. */ calibrate_delay(); cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); /* * This must be done before setting cpu_online_mask * or calling notify_cpu_starting. */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); notify_cpu_starting(cpuid); /* * Allow the master to continue. */ cpumask_set_cpu(cpuid, cpu_callin_mask); }
static void __cpuinit smp_callin (void) { int cpuid, phys_id, itc_master; struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo; extern void ia64_init_itm(void); extern volatile int time_keeper_id; #ifdef CONFIG_PERFMON extern void pfm_init_percpu(void); #endif cpuid = smp_processor_id(); phys_id = hard_smp_processor_id(); itc_master = time_keeper_id; if (cpu_online(cpuid)) { printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", phys_id, cpuid); BUG(); } fix_b0_for_bsp(); lock_ipi_calllock(); spin_lock(&vector_lock); /* Setup the per cpu irq handling data structures */ __setup_vector_irq(cpuid); cpu_set(cpuid, cpu_online_map); unlock_ipi_calllock(); per_cpu(cpu_state, cpuid) = CPU_ONLINE; spin_unlock(&vector_lock); smp_setup_percpu_timer(); ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ #ifdef CONFIG_PERFMON pfm_init_percpu(); #endif local_irq_enable(); if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { /* * Synchronize the ITC with the BP. Need to do this after irqs are * enabled because ia64_sync_itc() calls smp_call_function_single(), which * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls * local_bh_enable(), which bugs out if irqs are not enabled... */ Dprintk("Going to syncup ITC with ITC Master.\n"); ia64_sync_itc(itc_master); } /* * Get our bogomips. */ ia64_init_itm(); /* * Delay calibration can be skipped if new processor is identical to the * previous processor. */ last_cpuinfo = cpu_data(cpuid - 1); this_cpuinfo = local_cpu_data; if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq || last_cpuinfo->proc_freq != this_cpuinfo->proc_freq || last_cpuinfo->features != this_cpuinfo->features || last_cpuinfo->revision != this_cpuinfo->revision || last_cpuinfo->family != this_cpuinfo->family || last_cpuinfo->archrev != this_cpuinfo->archrev || last_cpuinfo->model != this_cpuinfo->model) calibrate_delay(); local_cpu_data->loops_per_jiffy = loops_per_jiffy; #ifdef CONFIG_IA32_SUPPORT ia32_gdt_init(); #endif /* * Allow the master to continue. */ cpu_set(cpuid, cpu_callin_map); Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid); }
inline Dtype data_at(const int n, const int c, const int h, const int w) const { return *(cpu_data() + offset(n, c, h, w)); }
/* * Report back to the Boot Processor during boot time or to the caller processor * during CPU online. */ static void smp_callin(void) { int cpuid, phys_id; /* * If waken up by an INIT in an 82489DX configuration * we may get here before an INIT-deassert IPI reaches * our local APIC. We have to wait for the IPI or we'll * lock up on an APIC access. * * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. */ cpuid = smp_processor_id(); if (apic->wait_for_init_deassert && cpuid) while (!atomic_read(&init_deasserted)) cpu_relax(); /* * (This works even if the APIC is not enabled.) */ phys_id = read_apic_id(); /* * the boot CPU has finished the init stage and is spinning * on callin_map until we finish. We are free to set up this * CPU, first the APIC. (this is probably redundant on most * boards) */ pr_debug("CALLIN, before setup_local_APIC()\n"); if (apic->smp_callin_clear_local_apic) apic->smp_callin_clear_local_apic(); setup_local_APIC(); end_local_APIC_setup(); /* * Need to setup vector mappings before we enable interrupts. */ setup_vector_irq(smp_processor_id()); /* * Save our processor parameters. Note: this information * is needed for clock calibration. */ smp_store_cpu_info(cpuid); /* * Get our bogomips. * Update loops_per_jiffy in cpu_data. Previous call to * smp_store_cpu_info() stored a value that is close but not as * accurate as the value just calculated. */ calibrate_delay(); cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); /* * This must be done before setting cpu_online_mask * or calling notify_cpu_starting. */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); notify_cpu_starting(cpuid); /* * Allow the master to continue. */ cpumask_set_cpu(cpuid, cpu_callin_mask); }
static int gameport_measure_speed(struct gameport *gameport) { #if defined(__i386__) unsigned int i, t, t1, t2, t3, tx; unsigned long flags; if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) return 0; tx = 1 << 30; for(i = 0; i < 50; i++) { local_irq_save(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); local_irq_restore(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } gameport_close(gameport); return 59659 / (tx < 1 ? 1 : tx); #elif defined (__x86_64__) unsigned int i, t; unsigned long tx, t1, t2, flags; if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) return 0; tx = 1 << 30; for(i = 0; i < 50; i++) { local_irq_save(flags); rdtscl(t1); for (t = 0; t < 50; t++) gameport_read(gameport); rdtscl(t2); local_irq_restore(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } gameport_close(gameport); return (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx); #else unsigned int j, t = 0; if (gameport_open(gameport, NULL, GAMEPORT_MODE_RAW)) return 0; j = jiffies; while (j == jiffies); j = jiffies; while (j == jiffies) { t++; gameport_read(gameport); } gameport_close(gameport); return t * HZ / 1000; #endif }
static int get_current_node(void) { return cpu_data(current_thread_info()->cpu).phys_proc_id; }