void *pmqthread(void *param) { int mustgetcpu = 0; struct params *par = param; cpu_set_t mask; int policy = SCHED_FIFO; struct sched_param schedp; struct timespec ts; memset(&schedp, 0, sizeof(schedp)); schedp.sched_priority = par->priority; sched_setscheduler(0, policy, &schedp); if (par->cpu != -1) { CPU_ZERO(&mask); CPU_SET(par->cpu, &mask); if(sched_setaffinity(0, sizeof(mask), &mask) == -1) fprintf(stderr, "WARNING: Could not set CPU affinity " "to CPU #%d\n", par->cpu); } else mustgetcpu = 1; par->tid = gettid(); while (!par->shutdown) { if (par->sender) { /* Optionally force receiver timeout */ if (par->forcetimeout) { struct timespec senddelay; senddelay.tv_sec = par->forcetimeout; senddelay.tv_nsec = 0; clock_nanosleep(CLOCK_MONOTONIC, 0, &senddelay, NULL); } /* Send message: Start of latency measurement ... */ gettimeofday(&par->sent, NULL); if (mq_send(par->testmq, testmsg, strlen(testmsg), 1) != 0) { fprintf(stderr, "could not send test message\n"); par->shutdown = 1; } par->samples++; if(par->max_cycles && par->samples >= par->max_cycles) par->shutdown = 1; if (mustgetcpu) par->cpu = get_cpu(); /* Wait until receiver ready */ if (par->timeout) { clock_gettime(CLOCK_REALTIME, &ts); ts.tv_sec += par->timeout; if (mq_timedreceive(par->syncmq, par->recvsyncmsg, MSG_SIZE, NULL, &ts) != strlen(syncmsg)) { fprintf(stderr, "could not receive sync message\n"); par->shutdown = 1; } } if (mq_receive(par->syncmq, par->recvsyncmsg, MSG_SIZE, NULL) != strlen(syncmsg)) { perror("could not receive sync message"); par->shutdown = 1; } if (!par->shutdown && strcmp(syncmsg, par->recvsyncmsg)) { fprintf(stderr, "ERROR: Sync message mismatch detected\n"); fprintf(stderr, " %s != %s\n", syncmsg, par->recvsyncmsg); par->shutdown = 1; } } else { /* Receiver */ if (par->timeout) { clock_gettime(CLOCK_REALTIME, &ts); par->timeoutcount = 0; ts.tv_sec += par->timeout; do { if (mq_timedreceive(par->testmq, par->recvtestmsg, MSG_SIZE, NULL, &ts) != strlen(testmsg)) { if (!par->forcetimeout || errno != ETIMEDOUT) { perror("could not receive test message"); par->shutdown = 1; break; } if (errno == ETIMEDOUT) { par->timeoutcount++; clock_gettime(CLOCK_REALTIME, &ts); ts.tv_sec += par->timeout; } } else break; } while (1); } else { if (mq_receive(par->testmq, par->recvtestmsg, MSG_SIZE, NULL) != strlen(testmsg)) { perror("could not receive test message"); par->shutdown = 1; } } /* ... Received the message: End of latency measurement */ gettimeofday(&par->received, NULL); if (!par->shutdown && strcmp(testmsg, par->recvtestmsg)) { fprintf(stderr, "ERROR: Test message mismatch detected\n"); fprintf(stderr, " %s != %s\n", testmsg, par->recvtestmsg); par->shutdown = 1; } par->samples++; timersub(&par->received, &par->neighbor->sent, &par->diff); if (par->diff.tv_usec < par->mindiff) par->mindiff = par->diff.tv_usec; if (par->diff.tv_usec > par->maxdiff) par->maxdiff = par->diff.tv_usec; par->sumdiff += (double) par->diff.tv_usec; if (par->tracelimit && par->maxdiff > par->tracelimit) { char tracing_enabled_file[MAX_PATH]; strcpy(tracing_enabled_file, get_debugfileprefix()); strcat(tracing_enabled_file, "tracing_enabled"); int tracing_enabled = open(tracing_enabled_file, O_WRONLY); if (tracing_enabled >= 0) { write(tracing_enabled, "0", 1); close(tracing_enabled); } else snprintf(par->error, sizeof(par->error), "Could not access %s\n", tracing_enabled_file); par->shutdown = 1; par->neighbor->shutdown = 1; } if (par->max_cycles && par->samples >= par->max_cycles) par->shutdown = 1; if (mustgetcpu) par->cpu = get_cpu(); clock_nanosleep(CLOCK_MONOTONIC, 0, &par->delay, NULL); /* Tell receiver that we are ready for the next measurement */ if (mq_send(par->syncmq, syncmsg, strlen(syncmsg), 1) != 0) { fprintf(stderr, "could not send sync message\n"); par->shutdown = 1; } } } par->stopped = 1; return NULL; }
void lru_add_drain(void) { drain_cpu_pagevecs(get_cpu()); put_cpu(); }
/* * this changes the io permissions bitmap in the current task. */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct *t = ¤t->thread; struct tss_struct *tss; unsigned int i, max_long, bytes, bytes_updated; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; #ifdef CONFIG_GRKERNSEC_IO if (turn_on) { gr_handle_ioperm(); return -EPERM; } #endif if (turn_on && !capable(CAP_SYS_RAWIO)) return -EPERM; /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ if (!t->io_bitmap_ptr) { unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!bitmap) return -ENOMEM; memset(bitmap, 0xff, IO_BITMAP_BYTES); t->io_bitmap_ptr = bitmap; set_thread_flag(TIF_IO_BITMAP); } /* * do it in the per-thread copy and in the TSS ... * * Disable preemption via get_cpu() - we must not switch away * because the ->io_bitmap_max value must match the bitmap * contents: */ tss = init_tss + get_cpu(); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); /* * Search for a (possibly new) maximum. This is simple and stupid, * to keep it obviously correct: */ max_long = 0; for (i = 0; i < IO_BITMAP_LONGS; i++) if (t->io_bitmap_ptr[i] != ~0UL) max_long = i; bytes = (max_long + 1) * sizeof(unsigned long); bytes_updated = max(bytes, t->io_bitmap_max); t->io_bitmap_max = bytes; /* Update the TSS: */ memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); put_cpu(); return 0; }
/* collect final CPU utilization raw data */ void measure_cpu_stop() { get_cpu(lib_end_count); }
void cpu_stop_internal(void) { get_cpu(lib_end_count); }
void *semathread(void *param) { int mustgetcpu = 0; struct params *par = param; cpu_set_t mask; int policy = SCHED_FIFO; struct sched_param schedp; struct sembuf sb = { 0, 0, 0}; sigset_t sigset; sigemptyset(&sigset); pthread_sigmask(SIG_SETMASK, &sigset, NULL); memset(&schedp, 0, sizeof(schedp)); schedp.sched_priority = par->priority; sched_setscheduler(0, policy, &schedp); if (par->cpu != -1) { CPU_ZERO(&mask); CPU_SET(par->cpu, &mask); if(sched_setaffinity(0, sizeof(mask), &mask) == -1) snprintf(par->error, sizeof(par->error), "WARNING: Could not set CPU affinity " "to CPU #%d\n", par->cpu); } else { int max_cpus = sysconf(_SC_NPROCESSORS_CONF); if (max_cpus > 1) mustgetcpu = 1; else par->cpu = 0; } if (!wasforked) par->tid = gettid(); while (!par->shutdown) { if (par->sender) { sb.sem_num = SEM_WAIT_FOR_SENDER; sb.sem_op = SEM_UNLOCK; /* * Unlocking the semaphore: * Start of latency measurement ... */ gettimeofday(&par->unblocked, NULL); semop(par->semid, &sb, 1); par->samples++; if(par->max_cycles && par->samples >= par->max_cycles) par->shutdown = 1; if (mustgetcpu) par->cpu = get_cpu(); sb.sem_num = SEM_WAIT_FOR_RECEIVER; sb.sem_op = SEM_LOCK; semop(par->semid, &sb, 1); sb.sem_num = SEM_WAIT_FOR_SENDER; sb.sem_op = SEM_LOCK; semop(par->semid, &sb, 1); } else { /* Receiver */ struct params *neighbor; if (wasforked) neighbor = par + par->num_threads; else neighbor = par->neighbor; sb.sem_num = SEM_WAIT_FOR_SENDER; sb.sem_op = SEM_LOCK; semop(par->semid, &sb, 1); /* * ... We got the lock: * End of latency measurement */ gettimeofday(&par->received, NULL); par->samples++; if (par->max_cycles && par->samples >= par->max_cycles) par->shutdown = 1; if (mustgetcpu) par->cpu = get_cpu(); timersub(&par->received, &neighbor->unblocked, &par->diff); if (par->diff.tv_usec < par->mindiff) par->mindiff = par->diff.tv_usec; if (par->diff.tv_usec > par->maxdiff) par->maxdiff = par->diff.tv_usec; par->sumdiff += (double) par->diff.tv_usec; if (par->tracelimit && par->maxdiff > par->tracelimit) { char tracing_enabled_file[MAX_PATH]; strcpy(tracing_enabled_file, get_debugfileprefix()); strcat(tracing_enabled_file, "tracing_enabled"); int tracing_enabled = open(tracing_enabled_file, O_WRONLY); if (tracing_enabled >= 0) { write(tracing_enabled, "0", 1); close(tracing_enabled); } else snprintf(par->error, sizeof(par->error), "Could not access %s\n", tracing_enabled_file); par->shutdown = 1; neighbor->shutdown = 1; } sb.sem_num = SEM_WAIT_FOR_RECEIVER; sb.sem_op = SEM_UNLOCK; semop(par->semid, &sb, 1); nanosleep(&par->delay, NULL); sb.sem_num = SEM_WAIT_FOR_SENDER; sb.sem_op = SEM_UNLOCK; semop(par->semid, &sb, 1); } } if (par->sender) { sb.sem_num = SEM_WAIT_FOR_SENDER; sb.sem_op = SEM_UNLOCK; semop(par->semid, &sb, 1); sb.sem_num = SEM_WAIT_FOR_RECEIVER; sb.sem_op = SEM_UNLOCK; semop(par->semid, &sb, 1); } par->stopped = 1; return NULL; }
static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, u32 index, int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; #ifdef CONFIG_X86_64 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) ? F(GBPAGES) : 0; unsigned f_lm = F(LM); #else unsigned f_gbpages = 0; unsigned f_lm = 0; #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0; unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0; unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0; unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0; unsigned f_la57 = 0; /* cpuid 1.edx */ const u32 kvm_cpuid_1_edx_x86_features = F(FPU) | F(VME) | F(DE) | F(PSE) | F(TSC) | F(MSR) | F(PAE) | F(MCE) | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) | 0 /* Reserved, DS, ACPI */ | F(MMX) | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | 0 /* HTT, TM, Reserved, PBE */; /* cpuid 0x80000001.edx */ const u32 kvm_cpuid_8000_0001_edx_x86_features = F(FPU) | F(VME) | F(DE) | F(PSE) | F(TSC) | F(MSR) | F(PAE) | F(MCE) | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | F(PAT) | F(PSE36) | 0 /* Reserved */ | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); /* cpuid 1.ecx */ const u32 kvm_cpuid_1_ecx_x86_features = /* NOTE: MONITOR (and MWAIT) are emulated as NOP, * but *not* advertised to guests via CPUID ! */ F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | 0 /* DS-CPL, VMX, SMX, EST */ | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | F(F16C) | F(RDRAND); /* cpuid 0x80000001.ecx */ const u32 kvm_cpuid_8000_0001_ecx_x86_features = F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | F(TOPOEXT) | F(PERFCTR_CORE); /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | F(PMM) | F(PMM_EN); /* cpuid 7.0.ebx */ const u32 kvm_cpuid_7_0_ebx_x86_features = F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt; /* cpuid 0xD.1.eax */ const u32 kvm_cpuid_D_1_eax_x86_features = F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); r = -E2BIG; if (*nent >= maxnent) goto out; do_cpuid_1_ent(entry, function, index); ++*nent; switch (function) { case 0: entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd)); break; case 1: entry->edx &= kvm_cpuid_1_edx_x86_features; cpuid_mask(&entry->edx, CPUID_1_EDX); entry->ecx &= kvm_cpuid_1_ecx_x86_features; cpuid_mask(&entry->ecx, CPUID_1_ECX); /* we support x2apic emulation even if host does not support * it since we emulate x2apic in software */ entry->ecx |= F(X2APIC); break; /* function 2 entries are STATEFUL. That is, repeated cpuid commands * may return different values. This forces us to get_cpu() before * issuing the first command, and also to emulate this annoying behavior * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ case 2: { int t, times = entry->eax & 0xff; entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; for (t = 1; t < times; ++t) { if (*nent >= maxnent) goto out; do_cpuid_1_ent(&entry[t], function, 0); entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; ++*nent; } break; } /* function 4 has additional index. */ case 4: { int i, cache_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until cache_type is zero */ for (i = 1; ; ++i) { if (*nent >= maxnent) goto out; cache_type = entry[i - 1].eax & 0x1f; if (!cache_type) break; do_cpuid_1_ent(&entry[i], function, i); entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; } break; } case 6: /* Thermal management */ entry->eax = 0x4; /* allow ARAT */ entry->ebx = 0; entry->ecx = 0; entry->edx = 0; break; case 7: { entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* Mask ebx against host capability word 9 */ if (index == 0) { entry->ebx &= kvm_cpuid_7_0_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_7_0_EBX); // TSC_ADJUST is emulated entry->ebx |= F(TSC_ADJUST); entry->ecx &= kvm_cpuid_7_0_ecx_x86_features; f_la57 = entry->ecx & F(LA57); cpuid_mask(&entry->ecx, CPUID_7_ECX); /* Set LA57 based on hardware capability. */ entry->ecx |= f_la57; entry->ecx |= f_umip; /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); /* * We emulate ARCH_CAPABILITIES in software even * if the host doesn't support it. */ entry->edx |= F(ARCH_CAPABILITIES); } else { entry->ebx = 0; entry->ecx = 0; entry->edx = 0; } entry->eax = 0; break; } case 9: break; case 0xa: { /* Architectural Performance Monitoring */ struct x86_pmu_capability cap; union cpuid10_eax eax; union cpuid10_edx edx; perf_get_x86_pmu_capability(&cap); /* * Only support guest architectural pmu on a host * with architectural pmu. */ if (!cap.version) memset(&cap, 0, sizeof(cap)); eax.split.version_id = min(cap.version, 2); eax.split.num_counters = cap.num_counters_gp; eax.split.bit_width = cap.bit_width_gp; eax.split.mask_length = cap.events_mask_len; edx.split.num_counters_fixed = cap.num_counters_fixed; edx.split.bit_width_fixed = cap.bit_width_fixed; edx.split.reserved = 0; entry->eax = eax.full; entry->ebx = cap.events_mask; entry->ecx = 0; entry->edx = edx.full; break; } /* function 0xb has additional index. */ case 0xb: { int i, level_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until level_type is zero */ for (i = 1; ; ++i) { if (*nent >= maxnent) goto out; level_type = entry[i - 1].ecx & 0xff00; if (!level_type) break; do_cpuid_1_ent(&entry[i], function, i); entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; } break; } case 0xd: { int idx, i; u64 supported = kvm_supported_xcr0(); entry->eax &= supported; entry->ebx = xstate_required_size(supported, false); entry->ecx = entry->ebx; entry->edx &= supported >> 32; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; if (!supported) break; for (idx = 1, i = 1; idx < 64; ++idx) { u64 mask = ((u64)1 << idx); if (*nent >= maxnent) goto out; do_cpuid_1_ent(&entry[i], function, idx); if (idx == 1) { entry[i].eax &= kvm_cpuid_D_1_eax_x86_features; cpuid_mask(&entry[i].eax, CPUID_D_1_EAX); entry[i].ebx = 0; if (entry[i].eax & (F(XSAVES)|F(XSAVEC))) entry[i].ebx = xstate_required_size(supported, true); } else { if (entry[i].eax == 0 || !(supported & mask)) continue; if (WARN_ON_ONCE(entry[i].ecx & 1)) continue; } entry[i].ecx = 0; entry[i].edx = 0; entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; ++i; } break; } /* Intel PT */ case 0x14: { int t, times = entry->eax; if (!f_intel_pt) break; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; for (t = 1; t <= times; ++t) { if (*nent >= maxnent) goto out; do_cpuid_1_ent(&entry[t], function, t); entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; } break; } case KVM_CPUID_SIGNATURE: { static const char signature[12] = "KVMKVMKVM\0\0"; const u32 *sigptr = (const u32 *)signature; entry->eax = KVM_CPUID_FEATURES; entry->ebx = sigptr[0]; entry->ecx = sigptr[1]; entry->edx = sigptr[2]; break; } case KVM_CPUID_FEATURES: entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_TLB_FLUSH) | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | (1 << KVM_FEATURE_PV_SEND_IPI); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); entry->ebx = 0; entry->ecx = 0; entry->edx = 0; break; case 0x80000000: entry->eax = min(entry->eax, 0x8000001f); break; case 0x80000001: entry->edx &= kvm_cpuid_8000_0001_edx_x86_features; cpuid_mask(&entry->edx, CPUID_8000_0001_EDX); entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features; cpuid_mask(&entry->ecx, CPUID_8000_0001_ECX); break; case 0x80000007: /* Advanced power management */ /* invariant TSC is CPUID.80000007H:EDX[8] */ entry->edx &= (1 << 8); /* mask against host */ entry->edx &= boot_cpu_data.x86_power; entry->eax = entry->ebx = entry->ecx = 0; break; case 0x80000008: { unsigned g_phys_as = (entry->eax >> 16) & 0xff; unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); unsigned phys_as = entry->eax & 0xff; if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; /* * IBRS, IBPB and VIRT_SSBD aren't necessarily present in * hardware cpuid */ if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) entry->ebx |= F(AMD_IBPB); if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) entry->ebx |= F(AMD_IBRS); if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); /* * The preference is to use SPEC CTRL MSR instead of the * VIRT_SPEC MSR. */ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) && !boot_cpu_has(X86_FEATURE_AMD_SSBD)) entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: entry->ecx = entry->edx = 0; break; case 0x8000001a: break; case 0x8000001d: break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ entry->eax = min(entry->eax, 0xC0000004); break; case 0xC0000001: entry->edx &= kvm_cpuid_C000_0001_edx_x86_features; cpuid_mask(&entry->edx, CPUID_C000_0001_EDX); break; case 3: /* Processor serial number */ case 5: /* MONITOR/MWAIT */ case 0xC0000002: case 0xC0000003: case 0xC0000004: default: entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } kvm_x86_ops->set_supported_cpuid(function, entry); r = 0; out: put_cpu(); return r; }
void PlatformDisableSchedulerInterrupts(void) { get_cpu(); }
static unsigned long fill_arg(struct syscallrecord *rec, unsigned int argnum) { struct syscallentry *entry; unsigned int call; enum argtype argtype; call = rec->nr; entry = syscalls[call].entry; if (argnum > entry->num_args) return 0; argtype = get_argtype(entry, argnum); switch (argtype) { case ARG_UNDEFINED: if (RAND_BOOL()) return (unsigned long) rand64(); return (unsigned long) get_writable_address(page_size); case ARG_FD: if (RAND_BOOL()) { unsigned int i; /* If this is the 2nd or more ARG_FD, make it unique */ for (i = 0; i < argnum; i++) { enum argtype arg; arg = get_argtype(entry, i); if (arg == ARG_FD) return get_new_random_fd(); } } return get_random_fd(); case ARG_LEN: return (unsigned long) get_len(); case ARG_ADDRESS: return handle_arg_address(rec, argnum); case ARG_NON_NULL_ADDRESS: return (unsigned long) get_non_null_address(); case ARG_MMAP: return (unsigned long) get_map(); case ARG_PID: return (unsigned long) get_pid(); case ARG_RANGE: return handle_arg_range(entry, argnum); case ARG_OP: /* Like ARG_LIST, but just a single value. */ return handle_arg_op(entry, argnum); case ARG_LIST: return handle_arg_list(entry, argnum); case ARG_CPU: return (unsigned long) get_cpu(); case ARG_PATHNAME: return (unsigned long) generate_pathname(); case ARG_IOVEC: return handle_arg_iovec(entry, rec, argnum); case ARG_IOVECLEN: case ARG_SOCKADDRLEN: /* We already set the len in the ARG_IOVEC/ARG_SOCKADDR case * So here we just return what we had set there. */ return get_argval(rec, argnum); case ARG_SOCKADDR: return handle_arg_sockaddr(entry, rec, argnum); case ARG_MODE_T: return handle_arg_mode_t(); case ARG_SOCKETINFO: return (unsigned long) get_rand_socketinfo(); } BUG("unreachable!\n"); }
/* * Handle debug exception notifications. * * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. * * NOTIFY_DONE returned if one of the following conditions is true. * i) When the causative address is from user-space and the exception * is a valid one, i.e. not triggered as a result of lazy debug register * switching * ii) When there are more bits than trap<n> set in DR6 register (such * as BD, BS or BT) indicating that more than one debug condition is * met and requires some more action in do_debug(). * * NOTIFY_STOP returned for all other cases * */ static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct perf_event *bp; unsigned long dr7, dr6; unsigned long *dr6_p; /* The DR6 value is pointed by args->err */ dr6_p = (unsigned long *)ERR_PTR(args->err); dr6 = *dr6_p; /* If it's a single step, TRAP bits are random */ if (dr6 & DR_STEP) return NOTIFY_DONE; /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; get_debugreg(dr7, 7); /* Disable breakpoints during exception handling */ set_debugreg(0UL, 7); /* * Assert that local interrupts are disabled * Reset the DRn bits in the virtualized register value. * The ptrace trigger routine will add in whatever is needed. */ current->thread.debugreg6 &= ~DR_TRAP_BITS; cpu = get_cpu(); /* Handle all the breakpoints that were triggered */ for (i = 0; i < HBP_NUM; ++i) { if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; /* * The counter may be concurrently released but that can only * occur from a call_rcu() path. We can then safely fetch * the breakpoint, use its callback, touch its counter * while we are in an rcu_read_lock() path. */ rcu_read_lock(); bp = per_cpu(bp_per_reg[i], cpu); /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling */ (*dr6_p) &= ~(DR_TRAP0 << i); /* * bp can be NULL due to lazy debug register switching * or due to concurrent perf counter removing. */ if (!bp) { rcu_read_unlock(); break; } perf_bp_event(bp, args->regs); /* * Set up resume flag to avoid breakpoint recursion when * returning back to origin. */ if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) args->regs->flags |= X86_EFLAGS_RF; rcu_read_unlock(); } /* * Further processing in do_debug() is needed for a) user-space * breakpoints (to generate signals) and b) when the system has * taken exception due to multiple causes */ if ((current->thread.debugreg6 & DR_TRAP_BITS) || (dr6 & (~DR_TRAP_BITS))) rc = NOTIFY_DONE; set_debugreg(dr7, 7); put_cpu(); return rc; }
static int __kprobes hw_breakpoint_handler(struct die_args *args) { int cpu, i, rc = NOTIFY_STOP; struct perf_event *bp; unsigned int cmf, resume_mask; /* * Do an early return if none of the channels triggered. */ cmf = sh_ubc->triggered_mask(); if (unlikely(!cmf)) return NOTIFY_DONE; /* * By default, resume all of the active channels. */ resume_mask = sh_ubc->active_mask(); /* * Disable breakpoints during exception handling. */ sh_ubc->disable_all(); cpu = get_cpu(); for (i = 0; i < sh_ubc->num_events; i++) { unsigned long event_mask = (1 << i); if (likely(!(cmf & event_mask))) continue; /* * The counter may be concurrently released but that can only * occur from a call_rcu() path. We can then safely fetch * the breakpoint, use its callback, touch its counter * while we are in an rcu_read_lock() path. */ rcu_read_lock(); bp = per_cpu(bp_per_reg[i], cpu); if (bp) rc = NOTIFY_DONE; /* * Reset the condition match flag to denote completion of * exception handling. */ sh_ubc->clear_triggered_mask(event_mask); /* * bp can be NULL due to concurrent perf counter * removing. */ if (!bp) { rcu_read_unlock(); break; } /* * Don't restore the channel if the breakpoint is from * ptrace, as it always operates in one-shot mode. */ if (bp->overflow_handler == ptrace_triggered) resume_mask &= ~(1 << i); perf_bp_event(bp, args->regs); /* Deliver the signal to userspace */ if (arch_check_va_in_userspace(bp->attr.bp_addr, bp->attr.bp_len)) { siginfo_t info; info.si_signo = args->signr; info.si_errno = notifier_to_errno(rc); info.si_code = TRAP_HWBKPT; force_sig_info(args->signr, &info, current); } rcu_read_unlock(); } if (cmf == 0) rc = NOTIFY_DONE; sh_ubc->enable_all(resume_mask); put_cpu(); return rc; }
/** * information about the cache: level, associativity... */ int generic_cache_info(int cpu, int id, char* output, size_t len) { char tmp[_HW_DETECT_MAX_OUTPUT], tmp2[_HW_DETECT_MAX_OUTPUT]; char tmppath[_HW_DETECT_MAX_OUTPUT]; struct stat statbuf; if (cpu == -1) cpu = get_cpu(); if (cpu == -1) return -1; snprintf(path,sizeof(path), "/sys/devices/system/cpu/cpu%i/cache/index%i/", cpu, id); memset(output, 0, len); if(stat(path, &statbuf)) //path doesn't exist return -1; strncpy(tmppath, path, _HW_DETECT_MAX_OUTPUT); strncat(tmppath, "level", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1); if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) { snprintf(tmp2,_HW_DETECT_MAX_OUTPUT-1, "Level %s", tmp); strncat(output, tmp2, (len-strlen(output))-1); } strncpy(tmppath, path, _HW_DETECT_MAX_OUTPUT); strncat(tmppath, "type", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1); if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) { if(!strcmp(tmp, "Unified")) { strncpy(tmp2, output,_HW_DETECT_MAX_OUTPUT-1); snprintf(output, len, "%s ", tmp); strncat(output, tmp2, (len-strlen(output))-1); } else { strncat(output, " ", (len-strlen(output))-1); strncat(output, tmp, (len-strlen(output))-1); } } strncat(output, " Cache,", (len-strlen(output))-1); strncpy(tmppath, path, _HW_DETECT_MAX_OUTPUT); strncat(tmppath, "size", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1); if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) { strncat(output, " ", (len-strlen(output))-1); strncat(output, tmp, (len-strlen(output))-1); } strncpy(tmppath, path, _HW_DETECT_MAX_OUTPUT); strncat(tmppath, "ways_of_associativity", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1); if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) { strncat(output, ", ", (len-strlen(output))-1); strncat(output, tmp, (len-strlen(output))-1); strncat(output, "-way set associative", (len-strlen(output))-1); } strncpy(tmppath, path,_HW_DETECT_MAX_OUTPUT); strncat(tmppath, "coherency_line_size", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1); if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) { strncat(output, ", ", (len-strlen(output))-1); strncat(output, tmp, (len-strlen(output))-1); strncat(output, " Byte cachelines", (len-strlen(output))-1); } strncpy(tmppath, path,_HW_DETECT_MAX_OUTPUT); strncat(tmppath, "shared_cpu_map",(_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1); if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) { cpu_map_to_list(tmp, tmp2, _HW_DETECT_MAX_OUTPUT); snprintf(tmppath,_HW_DETECT_MAX_OUTPUT, "cpu%i ", cpu); if(!strcmp(tmp2, tmppath)) { strncat(output, ", exclusive for ", (len-strlen(output))-1); strncat(output, tmppath, (len-strlen(output))-1); } else { strncat(output, ", shared among ", (len-strlen(output))-1); strncat(output, tmp2, (len-strlen(output))-1); } } return 0; }
/* * Handle debug exception notifications. * * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. * * NOTIFY_DONE returned if one of the following conditions is true. * i) When the causative address is from user-space and the exception * is a valid one, i.e. not triggered as a result of lazy debug register * switching * ii) When there are more bits than trap<n> set in DR6 register (such * as BD, BS or BT) indicating that more than one debug condition is * met and requires some more action in do_debug(). * * NOTIFY_STOP returned for all other cases * */ static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct perf_event *bp; unsigned long dr7, dr6; unsigned long *dr6_p; /* The DR6 value is pointed by args->err */ dr6_p = (unsigned long *)ERR_PTR(args->err); dr6 = *dr6_p; /* If it's a single step, TRAP bits are random */ if (dr6 & DR_STEP) return NOTIFY_DONE; /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; get_debugreg(dr7, 7); /* Disable breakpoints during exception handling */ set_debugreg(0UL, 7); /* * Assert that local interrupts are disabled * Reset the DRn bits in the virtualized register value. * The ptrace trigger routine will add in whatever is needed. */ current->thread.debugreg6 &= ~DR_TRAP_BITS; cpu = get_cpu(); /* Handle all the breakpoints that were triggered */ for (i = 0; i < HBP_NUM; ++i) { if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; /* * The counter may be concurrently released but that can only * occur from a call_rcu() path. We can then safely fetch * the breakpoint, use its callback, touch its counter * while we are in an rcu_read_lock() path. */ rcu_read_lock(); bp = per_cpu(bp_per_reg[i], cpu); if (bp) rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling */ (*dr6_p) &= ~(DR_TRAP0 << i); /* * bp can be NULL due to lazy debug register switching * or due to concurrent perf counter removing. */ if (!bp) { rcu_read_unlock(); break; } perf_bp_event(bp, args->regs); rcu_read_unlock(); } if (dr6 & (~DR_TRAP_BITS)) rc = NOTIFY_DONE; set_debugreg(dr7, 7); put_cpu(); return rc; }
int vmadump_restore_cpu(cr_rstrt_proc_req_t *ctx, struct file *file, struct pt_regs *regs) { struct vmadump_restore_tmps *x86tmps; struct thread_struct *threadtmp; struct pt_regs *regtmp; int r; int idx, i, cpu; uint16_t fsindex, gsindex; #if HAVE_STRUCT_N_DESC_STRUCT struct n_desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; #else struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; #endif /* XXX: Note allocation assumes i387tmp and threadtmp are never active at the same time */ x86tmps = kmalloc(sizeof(*x86tmps), GFP_KERNEL); if (!x86tmps) return -ENOMEM; regtmp = VMAD_REGTMP(x86tmps); threadtmp = VMAD_THREADTMP(x86tmps); r = read_kern(ctx, file, regtmp, sizeof(*regtmp)); if (r != sizeof(*regtmp)) goto bad_read; /* Don't let the user pick funky segments */ if ((regtmp->cs != __USER_CS && regtmp->cs != __USER32_CS) && (regtmp->ss != __USER_DS && regtmp->ss != __USER32_DS)) { r = -EINVAL; goto bad_read; } /* Set our process type */ if (regtmp->cs == __USER32_CS) set_thread_flag(TIF_IA32); else clear_thread_flag(TIF_IA32); /* Only restore bottom 9 bits of eflags. Restoring anything else * is bad bad mojo for security. (0x200 = interrupt enable) */ #if HAVE_PT_REGS_EFLAGS regtmp->eflags = 0x200 | (regtmp->eflags & 0x000000FF); #elif HAVE_PT_REGS_FLAGS regtmp->flags = 0x200 | (regtmp->flags & 0x000000FF); #else #error #endif memcpy(regs, regtmp, sizeof(*regtmp)); /* Restore FPU info (and later general "extended state") */ r = vmadump_restore_i387(ctx, file, VMAD_I387TMP(x86tmps)); if (r < 0) goto bad_read; /* XXX FIX ME: RESTORE DEBUG INFORMATION ?? */ /* Here we read it but ignore it. */ r = vmadump_restore_debugreg(ctx, file); if (r < 0) goto bad_read; /* user(r)sp, since we don't use the ptrace entry path in BLCR */ #if HAVE_THREAD_USERSP r = read_kern(ctx, file, &threadtmp->usersp, sizeof(threadtmp->usersp)); if (r != sizeof(threadtmp->usersp)) goto bad_read; current->thread.usersp = threadtmp->usersp; vmad_write_oldrsp(threadtmp->usersp); #elif HAVE_THREAD_USERRSP r = read_kern(ctx, file, &threadtmp->userrsp, sizeof(threadtmp->userrsp)); if (r != sizeof(threadtmp->userrsp)) goto bad_read; current->thread.userrsp = threadtmp->userrsp; vmad_write_oldrsp(threadtmp->userrsp); #else #error #endif /*-- restore segmentation related stuff */ /* Restore FS_BASE MSR */ r = read_kern(ctx, file, &threadtmp->fs, sizeof(threadtmp->fs)); if (r != sizeof(threadtmp->fs)) goto bad_read; if (threadtmp->fs >= TASK_SIZE) { r = -EINVAL; goto bad_read; } current->thread.fs = threadtmp->fs; if ((r = checking_wrmsrl(MSR_FS_BASE, threadtmp->fs))) goto bad_read; /* Restore GS_KERNEL_BASE MSR */ r = read_kern(ctx, file, &threadtmp->gs, sizeof(threadtmp->gs)); if (r != sizeof(threadtmp->gs)) goto bad_read; if (threadtmp->gs >= TASK_SIZE) { r = -EINVAL; goto bad_read; } current->thread.gs = threadtmp->gs; if ((r = checking_wrmsrl(MSR_KERNEL_GS_BASE, threadtmp->gs))) goto bad_read; /* Restore 32 bit segment stuff */ r = read_kern(ctx, file, &fsindex, sizeof(fsindex)); if (r != sizeof(fsindex)) goto bad_read; r = read_kern(ctx, file, &gsindex, sizeof(gsindex)); if (r != sizeof(gsindex)) goto bad_read; r = read_kern(ctx, file, tls_array, sizeof(tls_array)); if (r != sizeof(tls_array)) goto bad_read; /* Sanitize fs, gs. These segment descriptors should load one * of the TLS entries and have DPL = 3. If somebody is doing * some other LDT monkey business, I'm currently not * supporting that here. Also, I'm presuming that the offsets * to the GDT_ENTRY_TLS_MIN is the same in both kernels. */ idx = fsindex >> 3; if (idx<GDT_ENTRY_TLS_MIN || idx>GDT_ENTRY_TLS_MAX || (fsindex&7) != 3) fsindex = 0; idx = gsindex >> 3; if (idx<GDT_ENTRY_TLS_MIN || idx>GDT_ENTRY_TLS_MAX || (gsindex&7) != 3) gsindex = 0; /* Sanitize the TLS entries... * Make sure the following bits are set/not set: * bit 12 : S = 1 (code/data - not system) * bit 13-14: DPL = 11 (priv level = 3 (user)) * bit 21 : = 0 (reserved) * * If the entry isn't valid, zero the whole descriptor. */ for (i=0; i < GDT_ENTRY_TLS_ENTRIES; i++) { if (tls_array[i].b != 0 && (tls_array[i].b & 0x00207000) != 0x00007000) { r = -EINVAL; goto bad_read; } } /* Ok load this crap */ cpu = get_cpu(); /* load_TLS can't get pre-empted. */ memcpy(current->thread.tls_array, tls_array, sizeof(current->thread.tls_array)); current->thread.fsindex = fsindex; current->thread.gsindex = gsindex; load_TLS(¤t->thread, cpu); loadsegment(fs, current->thread.fsindex); load_gs_index(current->thread.gsindex); put_cpu(); /* In case cr_restart and child don't have same ABI */ if (regtmp->cs == __USER32_CS) { loadsegment(ds, __USER32_DS); loadsegment(es, __USER32_DS); } else { loadsegment(ds, __USER_DS); loadsegment(es, __USER_DS); } #if HAVE_THREAD_INFO_SYSENTER_RETURN { void *sysenter_return; r = read_kern(ctx, file, &sysenter_return, sizeof(sysenter_return)); if (r != sizeof(sysenter_return)) goto bad_read; current_thread_info()->sysenter_return = sysenter_return; } #endif kfree(x86tmps); return 0; bad_read: kfree(x86tmps); if (r >= 0) r = -EIO; return r; }
void open_random_event(int mmap_enabled, int overflow_enabled) { int fd; int i,trinity_type; i=find_empty_event(); /* return if no free events */ if (i<0) return; event_data[i].overflows=0; event_data[i].throttles=0; /* repeat until we create a valid event */ while(1) { /* call trinity random perf_event_open() code */ //generic_sanitise(0); trinity_type=syscall_perf_event_open.sanitise(&shm->syscall[0]); memcpy(&event_data[i].attr, (struct perf_event_attr *)shm->syscall[0].a1, sizeof(struct perf_event_attr)); event_data[i].pid=shm->syscall[0].a2; event_data[i].cpu=get_cpu(); event_data[i].group_fd=shm->syscall[0].a4; event_data[i].flags=shm->syscall[0].a5; post_perf_event_open(&shm->syscall[0]); /* Randomly make part of a group 1/4 of the time */ if (rand()%4==2) { int j; j=find_random_active_event(); /* is it a master? */ /* can we set a group leader that isn't itself */ /* a leader? */ // if (event_data[j].group_fd==-1) { event_data[i].group_fd=event_data[j].fd; // } } /* Randomly try to use a kprobe */ if (event_data[i].attr.type==PERF_TYPE_TRACEPOINT) { if (rand()%10==5) { event_data[i].attr.config=kprobe_id; } } if (ignore_but_dont_skip.open) return; /* Debugging code */ /* We don't usually log failed opens as there are so many */ if (logging&TYPE_OPEN) { #if LOG_FAILURES if (trigger_failure_logging) { /* uncomment if failing opens causing crashes */ // static int quit_next=0; // if (event_data[i].attr.type==PERF_TYPE_TRACEPOINT) { sprintf(log_buffer,"# O -1 %d %d %d %lx ", event_data[i].pid, event_data[i].cpu, event_data[i].group_fd, event_data[i].flags); write(log_fd,log_buffer,strlen(log_buffer)); perf_log_attr(&event_data[i].attr); // fsync(log_fd); // } // if (quit_next==1) exit(1); // if (quit_next) quit_next--; // if ((event_data[i].attr.read_format==0x2d2d2d)) // quit_next=2; } #endif } /* Actually try to open the event */ fd=perf_event_open( &event_data[i].attr, event_data[i].pid, event_data[i].cpu, event_data[i].group_fd, event_data[i].flags); stats.open_attempts++; stats.total_syscalls++; int which_type=event_data[i].attr.type; if ((which_type<0) || (which_type>MAX_OPEN_TYPE-1)) { which_type=MAX_OPEN_TYPE-1; } /* If we succede, break out of the infinite loop */ if (fd>0) { stats.open_type_success[which_type]++; stats.open_trinity_type_success[trinity_type]++; break; } #if 0 /* Track source of UNKNOWN errnos */ if (errno==16) { printf("Event t=%d c=%llx pid=%d cpu=%d %s\n", event_data[i].attr.type, event_data[i].attr.config, event_data[i].pid, event_data[i].cpu, strerror(errno)); } #endif /* Otherwise, track the errors */ if (errno<MAX_ERRNOS) { stats.open_errno_count[errno]++; stats.open_type_fail[which_type]++; stats.open_trinity_type_fail[trinity_type]++; } /* no more file descriptors, so give up */ if (errno==EMFILE) return; } /* We successfully opened an event! */ stats.open_successful++; stats.current_open++; if (logging&TYPE_OPEN) { sprintf(log_buffer,"O %d %d %d %d %lx ", fd, event_data[i].pid, event_data[i].cpu, event_data[i].group_fd, event_data[i].flags); write(log_fd,log_buffer,strlen(log_buffer)); perf_log_attr(&event_data[i].attr); #if FSYNC_EVERY fsync(log_fd); #endif } event_data[i].fd=fd; event_data[i].active=1; active_events++; /* if we are member of a group, update size of group */ /* this is needed for calcuating "proper" read size */ /* Also I don't think we adjust this on close */ if (event_data[i].group_fd!=-1) { int j=lookup_event(event_data[i].group_fd); event_data[j].number_in_group++; event_data[j].read_size=update_read_size(j); } /* Setup mmap buffer */ if (mmap_enabled) { setup_mmap(i); } /* Setup overflow 50% of the time */ if ((overflow_enabled) && (rand()%2)) { if (!ignore_but_dont_skip.overflow) { int fcntl_result; if (logging&TYPE_OVERFLOW) { sprintf(log_buffer,"o %d\n",event_data[i].fd); write(log_fd,log_buffer,strlen(log_buffer)); } memset(&event_data[i].sa, 0, sizeof(struct sigaction)); event_data[i].sa.sa_sigaction = our_handler; event_data[i].sa.sa_flags = SA_SIGINFO; if (sigaction( SIGRTMIN+2, &event_data[i].sa, NULL) < 0) { printf("Error setting up signal handler\n"); } fcntl_result=fcntl(event_data[i].fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); if (fcntl_result<0) fprintf(stderr,"F1 error!\n"); fcntl_result=fcntl(event_data[i].fd, F_SETSIG, SIGRTMIN+2); if (fcntl_result<0) fprintf(stderr,"F1 error!\n"); fcntl_result=fcntl(event_data[i].fd, F_SETOWN,getpid()); if (fcntl_result<0) fprintf(stderr,"F1 error!\n"); } } event_data[i].number_in_group=1; event_data[i].read_size=update_read_size(i); }
TraceEventInfoList *qmp_trace_event_get_state(const char *name, bool has_vcpu, int64_t vcpu, Error **errp) { Error *err = NULL; TraceEventInfoList *events = NULL; TraceEventIter iter; TraceEvent *ev; bool is_pattern = trace_event_is_pattern(name); CPUState *cpu; /* Check provided vcpu */ cpu = get_cpu(has_vcpu, vcpu, &err); if (err) { error_propagate(errp, err); return NULL; } /* Check events */ if (!check_events(has_vcpu, true, is_pattern, name, errp)) { return NULL; } /* Get states (all errors checked above) */ trace_event_iter_init(&iter, name); while ((ev = trace_event_iter_next(&iter)) != NULL) { TraceEventInfoList *elem; bool is_vcpu = trace_event_is_vcpu(ev); if (has_vcpu && !is_vcpu) { continue; } elem = g_new(TraceEventInfoList, 1); elem->value = g_new(TraceEventInfo, 1); elem->value->vcpu = is_vcpu; elem->value->name = g_strdup(trace_event_get_name(ev)); if (!trace_event_get_state_static(ev)) { elem->value->state = TRACE_EVENT_STATE_UNAVAILABLE; } else { if (has_vcpu) { if (is_vcpu) { if (trace_event_get_vcpu_state_dynamic(cpu, ev)) { elem->value->state = TRACE_EVENT_STATE_ENABLED; } else { elem->value->state = TRACE_EVENT_STATE_DISABLED; } } /* else: already skipped above */ } else { if (trace_event_get_state_dynamic(ev)) { elem->value->state = TRACE_EVENT_STATE_ENABLED; } else { elem->value->state = TRACE_EVENT_STATE_DISABLED; } } } elem->next = events; events = elem; } return events; }
/** * ixgbe_fcoe_ddp_setup - called to set up ddp context * @netdev: the corresponding net_device * @xid: the exchange id requesting ddp * @sgl: the scatter-gather list for this request * @sgc: the number of scatter-gather items * * Returns : 1 for success and 0 for no ddp */ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid, struct scatterlist *sgl, unsigned int sgc, int target_mode) { struct ixgbe_adapter *adapter; struct ixgbe_hw *hw; struct ixgbe_fcoe *fcoe; struct ixgbe_fcoe_ddp *ddp; struct scatterlist *sg; unsigned int i, j, dmacount; unsigned int len; static const unsigned int bufflen = IXGBE_FCBUFF_MIN; unsigned int firstoff = 0; unsigned int lastsize; unsigned int thisoff = 0; unsigned int thislen = 0; u32 fcbuff, fcdmarw, fcfltrw, fcrxctl; dma_addr_t addr = 0; struct pci_pool *pool; unsigned int cpu; if (!netdev || !sgl) return 0; adapter = netdev_priv(netdev); if (xid >= IXGBE_FCOE_DDP_MAX) { e_warn(drv, "xid=0x%x out-of-range\n", xid); return 0; } /* no DDP if we are already down or resetting */ if (test_bit(__IXGBE_DOWN, &adapter->state) || test_bit(__IXGBE_RESETTING, &adapter->state)) return 0; fcoe = &adapter->fcoe; if (!fcoe->pool) { e_warn(drv, "xid=0x%x no ddp pool for fcoe\n", xid); return 0; } ddp = &fcoe->ddp[xid]; if (ddp->sgl) { e_err(drv, "xid 0x%x w/ non-null sgl=%p nents=%d\n", xid, ddp->sgl, ddp->sgc); return 0; } ixgbe_fcoe_clear_ddp(ddp); /* setup dma from scsi command sgl */ dmacount = pci_map_sg(adapter->pdev, sgl, sgc, DMA_FROM_DEVICE); if (dmacount == 0) { e_err(drv, "xid 0x%x DMA map error\n", xid); return 0; } /* alloc the udl from per cpu ddp pool */ cpu = get_cpu(); pool = *per_cpu_ptr(fcoe->pool, cpu); ddp->udl = pci_pool_alloc(pool, GFP_ATOMIC, &ddp->udp); if (!ddp->udl) { e_err(drv, "failed allocated ddp context\n"); goto out_noddp_unmap; } ddp->pool = pool; ddp->sgl = sgl; ddp->sgc = sgc; j = 0; for_each_sg(sgl, sg, dmacount, i) { addr = sg_dma_address(sg); len = sg_dma_len(sg); while (len) { /* max number of buffers allowed in one DDP context */ if (j >= IXGBE_BUFFCNT_MAX) { *per_cpu_ptr(fcoe->pcpu_noddp, cpu) += 1; goto out_noddp_free; } /* get the offset of length of current buffer */ thisoff = addr & ((dma_addr_t)bufflen - 1); thislen = min((bufflen - thisoff), len); /* * all but the 1st buffer (j == 0) * must be aligned on bufflen */ if ((j != 0) && (thisoff)) goto out_noddp_free; /* * all but the last buffer * ((i == (dmacount - 1)) && (thislen == len)) * must end at bufflen */ if (((i != (dmacount - 1)) || (thislen != len)) && ((thislen + thisoff) != bufflen)) goto out_noddp_free; ddp->udl[j] = (u64)(addr - thisoff); /* only the first buffer may have none-zero offset */ if (j == 0) firstoff = thisoff; len -= thislen; addr += thislen; j++; } }
static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) { struct ipcomp_data *ipcd = x->data; const int plen = skb->len; int dlen = IPCOMP_SCRATCH_SIZE; const u8 *start = skb->data; const int cpu = get_cpu(); u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen); int len; if (err) goto out; if (dlen < (plen + sizeof(struct ip_comp_hdr))) { err = -EINVAL; goto out; } len = dlen - plen; if (len > skb_tailroom(skb)) len = skb_tailroom(skb); __skb_put(skb, len); len += plen; skb_copy_to_linear_data(skb, scratch, len); while ((scratch += len, dlen -= len) > 0) { skb_frag_t *frag; struct page *page; err = -EMSGSIZE; if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) goto out; frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags; page = alloc_page(GFP_ATOMIC); err = -ENOMEM; if (!page) goto out; __skb_frag_set_page(frag, page); len = PAGE_SIZE; if (dlen < len) len = dlen; frag->page_offset = 0; skb_frag_size_set(frag, len); memcpy(skb_frag_address(frag), scratch, len); skb->truesize += len; skb->data_len += len; skb->len += len; skb_shinfo(skb)->nr_frags++; } err = 0; out: put_cpu(); return err; }
dotraplinkage void __kprobes do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; conditional_sti(regs); #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) goto gp_in_vm86; #endif tsk = current; if (!user_mode(regs)) goto gp_in_kernel; #ifdef CONFIG_X86_32 { int cpu; int ok; cpu = get_cpu(); ok = check_lazy_exec_limit(cpu, regs, error_code); put_cpu(); if (ok) return; if (print_fatal_signals) { printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n", error_code, error_code/8, regs->ip, smp_processor_id()); printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n", current->mm->context.exec_limit, current->mm->context.user_cs.a, current->mm->context.user_cs.b); } } #endif /*CONFIG_X86_32*/ tsk->thread.error_code = error_code; tsk->thread.trap_no = 13; if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk(KERN_INFO "%s[%d] general protection ip:%lx sp:%lx error:%lx", tsk->comm, task_pid_nr(tsk), regs->ip, regs->sp, error_code); print_vma_addr(" in ", regs->ip); printk("\n"); } force_sig(SIGSEGV, tsk); return; #ifdef CONFIG_X86_32 gp_in_vm86: local_irq_enable(); handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); return; #endif gp_in_kernel: if (fixup_exception(regs)) return; tsk->thread.error_code = error_code; tsk->thread.trap_no = 13; if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 13, SIGSEGV) == NOTIFY_STOP) return; die("general protection fault", regs, error_code); }
/* * Ok, this is the main fork-routine. * * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ long do_fork(unsigned long clone_flags, unsigned long stack_start, struct pt_regs *regs, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { struct task_struct *p; int trace = 0; long pid; if (unlikely(current->ptrace)) { trace = fork_traceflag (clone_flags); if (trace) clone_flags |= CLONE_PTRACE; } p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. */ pid = IS_ERR(p) ? PTR_ERR(p) : p->pid; if (!IS_ERR(p)) { struct completion vfork; if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); } if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) { /* * We'll start up with an immediate SIGSTOP. */ sigaddset(&p->pending.signal, SIGSTOP); set_tsk_thread_flag(p, TIF_SIGPENDING); } if (!(clone_flags & CLONE_STOPPED)) { /* * Do the wakeup last. On SMP we treat fork() and * CLONE_VM separately, because fork() has already * created cache footprint on this CPU (due to * copying the pagetables), hence migration would * probably be costy. Threads on the other hand * have less traction to the current CPU, and if * there's an imbalance then the scheduler can * migrate this fresh thread now, before it * accumulates a larger cache footprint: */ if (clone_flags & CLONE_VM) wake_up_forked_thread(p); else wake_up_forked_process(p); } else { int cpu = get_cpu(); p->state = TASK_STOPPED; if (cpu_is_offline(task_cpu(p))) set_task_cpu(p, cpu); put_cpu(); } ++total_forks; if (unlikely (trace)) { current->ptrace_message = pid; ptrace_notify ((trace << 8) | SIGTRAP); } if (clone_flags & CLONE_VFORK) { wait_for_completion(&vfork); if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); } else /* * Let the child process run first, to avoid most of the * COW overhead when the child exec()s afterwards. */ set_need_resched(); } return pid; }
void measure_cpu_start() { cpu_method = PROC_STAT; get_cpu(lib_start_count); }
static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3, u16 arg4, u16 arg5, u16 arg6, u16 arg7, void *ts1_base, u32 ts1_size, void *ts2_base, u32 ts2_size) { unsigned long flags; u16 status; struct desc_struct save_desc_40; int cpu; /* * PnP BIOSes are generally not terribly re-entrant. * Also, don't rely on them to save everything correctly. */ if (pnp_bios_is_utter_crap) return PNP_FUNCTION_NOT_SUPPORTED; cpu = get_cpu(); save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8]; get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc; /* On some boxes IRQ's during PnP BIOS calls are deadly. */ spin_lock_irqsave(&pnp_bios_lock, flags); /* The lock prevents us bouncing CPU here */ if (ts1_size) Q2_SET_SEL(smp_processor_id(), PNP_TS1, ts1_base, ts1_size); if (ts2_size) Q2_SET_SEL(smp_processor_id(), PNP_TS2, ts2_base, ts2_size); __asm__ __volatile__("pushl %%ebp\n\t" "pushl %%edi\n\t" "pushl %%esi\n\t" "pushl %%ds\n\t" "pushl %%es\n\t" "pushl %%fs\n\t" "pushl %%gs\n\t" "pushfl\n\t" "movl %%esp, pnp_bios_fault_esp\n\t" "movl $1f, pnp_bios_fault_eip\n\t" "lcall %5,%6\n\t" "1:popfl\n\t" "popl %%gs\n\t" "popl %%fs\n\t" "popl %%es\n\t" "popl %%ds\n\t" "popl %%esi\n\t" "popl %%edi\n\t" "popl %%ebp\n\t":"=a"(status) :"0"((func) | (((u32) arg1) << 16)), "b"((arg2) | (((u32) arg3) << 16)), "c"((arg4) | (((u32) arg5) << 16)), "d"((arg6) | (((u32) arg7) << 16)), "i"(PNP_CS32), "i"(0) :"memory"); spin_unlock_irqrestore(&pnp_bios_lock, flags); get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40; put_cpu(); /* If we get here and this is set then the PnP BIOS faulted on us. */ if (pnp_bios_is_utter_crap) { printk(KERN_ERR "PnPBIOS: Warning! Your PnP BIOS caused a fatal error. Attempting to continue\n"); printk(KERN_ERR "PnPBIOS: You may need to reboot with the \"pnpbios=off\" option to operate stably\n"); printk(KERN_ERR "PnPBIOS: Check with your vendor for an updated BIOS\n"); } return status; }
void cpu_start_internal(void) { get_cpu(lib_start_count); return; }
static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, u32 index, int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; #ifdef CONFIG_X86_64 unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL) ? F(GBPAGES) : 0; unsigned f_lm = F(LM); #else unsigned f_gbpages = 0; unsigned f_lm = 0; #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; /* cpuid 1.edx */ const u32 kvm_supported_word0_x86_features = F(FPU) | F(VME) | F(DE) | F(PSE) | F(TSC) | F(MSR) | F(PAE) | F(MCE) | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | 0 /* Reserved, DS, ACPI */ | F(MMX) | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | 0 /* HTT, TM, Reserved, PBE */; /* cpuid 0x80000001.edx */ const u32 kvm_supported_word1_x86_features = F(FPU) | F(VME) | F(DE) | F(PSE) | F(TSC) | F(MSR) | F(PAE) | F(MCE) | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | F(PAT) | F(PSE36) | 0 /* Reserved */ | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); /* cpuid 1.ecx */ const u32 kvm_supported_word4_x86_features = F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | 0 /* DS-CPL, VMX, SMX, EST */ | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | F(F16C) | F(RDRAND); /* cpuid 0x80000001.ecx */ const u32 kvm_supported_word6_x86_features = F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); /* cpuid 0xC0000001.edx */ const u32 kvm_supported_word5_x86_features = F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) | F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | F(PMM) | F(PMM_EN); /* cpuid 7.0.ebx */ const u32 kvm_supported_word9_x86_features = F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); r = -E2BIG; if (*nent >= maxnent) goto out; do_cpuid_1_ent(entry, function, index); ++*nent; switch (function) { case 0: entry->eax = min(entry->eax, (u32)0xd); break; case 1: entry->edx &= kvm_supported_word0_x86_features; cpuid_mask(&entry->edx, 0); entry->ecx &= kvm_supported_word4_x86_features; cpuid_mask(&entry->ecx, 4); /* we support x2apic emulation even if host does not support * it since we emulate x2apic in software */ entry->ecx |= F(X2APIC); break; /* function 2 entries are STATEFUL. That is, repeated cpuid commands * may return different values. This forces us to get_cpu() before * issuing the first command, and also to emulate this annoying behavior * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */ case 2: { int t, times = entry->eax & 0xff; entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; for (t = 1; t < times; ++t) { if (*nent >= maxnent) goto out; do_cpuid_1_ent(&entry[t], function, 0); entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; ++*nent; } break; } /* function 4 has additional index. */ case 4: { int i, cache_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until cache_type is zero */ for (i = 1; ; ++i) { if (*nent >= maxnent) goto out; cache_type = entry[i - 1].eax & 0x1f; if (!cache_type) break; do_cpuid_1_ent(&entry[i], function, i); entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; } break; } case 7: { entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* Mask ebx against host capability word 9 */ if (index == 0) { entry->ebx &= kvm_supported_word9_x86_features; cpuid_mask(&entry->ebx, 9); // TSC_ADJUST is emulated entry->ebx |= F(TSC_ADJUST); } else entry->ebx = 0; entry->eax = 0; entry->ecx = 0; entry->edx = 0; break; } case 9: break; case 0xa: { /* Architectural Performance Monitoring */ struct x86_pmu_capability cap; union cpuid10_eax eax; union cpuid10_edx edx; perf_get_x86_pmu_capability(&cap); /* * Only support guest architectural pmu on a host * with architectural pmu. */ if (!cap.version) memset(&cap, 0, sizeof(cap)); eax.split.version_id = min(cap.version, 2); eax.split.num_counters = cap.num_counters_gp; eax.split.bit_width = cap.bit_width_gp; eax.split.mask_length = cap.events_mask_len; edx.split.num_counters_fixed = cap.num_counters_fixed; edx.split.bit_width_fixed = cap.bit_width_fixed; edx.split.reserved = 0; entry->eax = eax.full; entry->ebx = cap.events_mask; entry->ecx = 0; entry->edx = edx.full; break; } /* function 0xb has additional index. */ case 0xb: { int i, level_type; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; /* read more entries until level_type is zero */ for (i = 1; ; ++i) { if (*nent >= maxnent) goto out; level_type = entry[i - 1].ecx & 0xff00; if (!level_type) break; do_cpuid_1_ent(&entry[i], function, i); entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; } break; } case 0xd: { int idx, i; entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; for (idx = 1, i = 1; idx < 64; ++idx) { if (*nent >= maxnent) goto out; do_cpuid_1_ent(&entry[i], function, idx); if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) continue; entry[i].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; ++*nent; ++i; } break; } case KVM_CPUID_SIGNATURE: { static const char signature[12] = "KVMKVMKVM\0\0"; const u32 *sigptr = (const u32 *)signature; entry->eax = KVM_CPUID_FEATURES; entry->ebx = sigptr[0]; entry->ecx = sigptr[1]; entry->edx = sigptr[2]; break; } case KVM_CPUID_FEATURES: entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) | (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_ASYNC_PF) | (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); entry->ebx = 0; entry->ecx = 0; entry->edx = 0; break; case 0x80000000: entry->eax = min(entry->eax, 0x8000001a); break; case 0x80000001: entry->edx &= kvm_supported_word1_x86_features; cpuid_mask(&entry->edx, 1); entry->ecx &= kvm_supported_word6_x86_features; cpuid_mask(&entry->ecx, 6); break; case 0x80000008: { unsigned g_phys_as = (entry->eax >> 16) & 0xff; unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); unsigned phys_as = entry->eax & 0xff; if (!g_phys_as) g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->ebx = entry->edx = 0; break; } case 0x80000019: entry->ecx = entry->edx = 0; break; case 0x8000001a: break; case 0x8000001d: break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: /*Just support up to 0xC0000004 now*/ entry->eax = min(entry->eax, 0xC0000004); break; case 0xC0000001: entry->edx &= kvm_supported_word5_x86_features; cpuid_mask(&entry->edx, 5); break; case 3: /* Processor serial number */ case 5: /* MONITOR/MWAIT */ case 6: /* Thermal management */ case 0x80000007: /* Advanced power management */ case 0xC0000002: case 0xC0000003: case 0xC0000004: default: entry->eax = entry->ebx = entry->ecx = entry->edx = 0; break; } kvm_x86_ops->set_supported_cpuid(function, entry); r = 0; out: put_cpu(); return r; }
static unsigned long fill_arg(struct syscallrecord *rec, unsigned int argnum) { struct syscallentry *entry; unsigned int call; enum argtype argtype; call = rec->nr; entry = syscalls[call].entry; if (argnum > entry->num_args) return 0; argtype = get_argtype(entry, argnum); switch (argtype) { case ARG_UNDEFINED: return (unsigned long) rand64(); case ARG_FD: return get_random_fd(); case ARG_LEN: return (unsigned long) get_len(); case ARG_ADDRESS: return handle_arg_address(rec, argnum); case ARG_NON_NULL_ADDRESS: return (unsigned long) get_non_null_address(); case ARG_MMAP: return (unsigned long) get_map(); case ARG_PID: return (unsigned long) get_pid(); case ARG_RANGE: return handle_arg_range(entry, argnum); case ARG_OP: /* Like ARG_LIST, but just a single value. */ return handle_arg_op(entry, argnum); case ARG_LIST: return handle_arg_list(entry, argnum); case ARG_RANDPAGE: return handle_arg_randpage(); case ARG_CPU: return (unsigned long) get_cpu(); case ARG_PATHNAME: return (unsigned long) generate_pathname(); case ARG_IOVEC: return handle_arg_iovec(entry, rec, argnum); case ARG_IOVECLEN: case ARG_SOCKADDRLEN: /* We already set the len in the ARG_IOVEC/ARG_SOCKADDR case * So here we just return what we had set there. */ return get_argval(rec, argnum); case ARG_SOCKADDR: return handle_arg_sockaddr(entry, rec, argnum); case ARG_MODE_T: return handle_arg_mode_t(); } BUG("unreachable!\n"); }
void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); struct thread_info *tinfo; unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned long dummy; unsigned used = 0; int graph = 0; int done = 0; if (!task) task = current; if (!stack) { if (regs) stack = (unsigned long *)regs->sp; else if (task != current) stack = (unsigned long *)task->thread.sp; else stack = &dummy; } if (!bp) bp = stack_frame(task, regs); /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested * exceptions */ tinfo = task_thread_info(task); while (!done) { unsigned long *stack_end; enum stack_type stype; char *id; stype = analyze_stack(cpu, task, stack, &stack_end, irq_stack, &used, &id); /* Default finish unless specified to continue */ done = 1; switch (stype) { /* Break out early if we are on the thread stack */ case STACK_IS_NORMAL: break; case STACK_IS_EXCEPTION: if (ops->stack(data, id) < 0) break; bp = ops->walk_stack(tinfo, stack, bp, ops, data, stack_end, &graph); ops->stack(data, "<EOE>"); /* * We link to the next stack via the * second-to-last pointer (index -2 to end) in the * exception stack: */ stack = (unsigned long *) stack_end[-2]; done = 0; break; case STACK_IS_IRQ: if (ops->stack(data, "IRQ") < 0) break; bp = ops->walk_stack(tinfo, stack, bp, ops, data, stack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last * pointer (index -1 to end) in the IRQ stack: */ stack = (unsigned long *) (stack_end[-1]); irq_stack = NULL; ops->stack(data, "EOI"); done = 0; break; case STACK_IS_UNKNOWN: ops->stack(data, "UNK"); break; } } /* * This handles the process stack: */ bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); put_cpu(); }
void lru_add_drain(void) { lru_add_drain_cpu(get_cpu()); put_cpu(); }
/* * this changes the io permissions bitmap in the current task. */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) { unsigned long i, max_long, bytes, bytes_updated; struct thread_struct * t = ¤t->thread; struct tss_struct * tss; unsigned long *bitmap; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; if (turn_on && !capable(CAP_SYS_RAWIO)) return -EPERM; /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ if (!t->io_bitmap_ptr) { bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!bitmap) return -ENOMEM; memset(bitmap, 0xff, IO_BITMAP_BYTES); t->io_bitmap_ptr = bitmap; } /* * do it in the per-thread copy and in the TSS ... * * Disable preemption via get_cpu() - we must not switch away * because the ->io_bitmap_max value must match the bitmap * contents: */ tss = &per_cpu(init_tss, get_cpu()); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); /* * Search for a (possibly new) maximum. This is simple and stupid, * to keep it obviously correct: */ max_long = 0; for (i = 0; i < IO_BITMAP_LONGS; i++) if (t->io_bitmap_ptr[i] != ~0UL) max_long = i; bytes = (max_long + 1) * sizeof(long); bytes_updated = max(bytes, t->io_bitmap_max); t->io_bitmap_max = bytes; /* * Sets the lazy trigger so that the next I/O operation will * reload the correct bitmap. * Reset the owner so that a process switch will not set * tss->io_bitmap_base to IO_BITMAP_OFFSET. */ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; tss->io_bitmap_owner = NULL; put_cpu(); return 0; }
void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); unsigned long *irq_stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned used = 0; struct thread_info *tinfo; int graph = 0; if (!task) task = current; if (!stack) { unsigned long dummy; stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; } #ifdef CONFIG_FRAME_POINTER if (!bp) { if (task == current) { /* Grab bp right from our regs */ get_bp(bp); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; } } #endif /* * Print function call entries in all stacks, starting at the * current stack address. If the stacks consist of nested * exceptions */ tinfo = task_thread_info(task); for (;;) { char *id; unsigned long *estack_end; estack_end = in_exception_stack(cpu, (unsigned long)stack, &used, &id); if (estack_end) { if (ops->stack(data, id) < 0) break; bp = ops->walk_stack(tinfo, stack, bp, ops, data, estack_end, &graph); ops->stack(data, "<EOE>"); /* * We link to the next stack via the * second-to-last pointer (index -2 to end) in the * exception stack: */ stack = (unsigned long *) estack_end[-2]; continue; } if (irq_stack_end) { unsigned long *irq_stack; irq_stack = irq_stack_end - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); if (in_irq_stack(stack, irq_stack, irq_stack_end)) { if (ops->stack(data, "IRQ") < 0) break; bp = ops->walk_stack(tinfo, stack, bp, ops, data, irq_stack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last * pointer (index -1 to end) in the IRQ stack: */ stack = (unsigned long *) (irq_stack_end[-1]); bp = fixup_bp_irq_link(bp, stack, irq_stack, irq_stack_end); irq_stack_end = NULL; ops->stack(data, "EOI"); continue; } } break; } /* * This handles the process stack: */ bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); put_cpu(); }
/* * Queue up a socket with data pending. If there are idle nfsd * processes, wake 'em up. * */ static void svc_sock_enqueue(struct svc_sock *svsk) { struct svc_serv *serv = svsk->sk_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; if (!(svsk->sk_flags & ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) return; if (test_bit(SK_DEAD, &svsk->sk_flags)) return; cpu = get_cpu(); pool = svc_pool_for_cpu(svsk->sk_server, cpu); put_cpu(); spin_lock_bh(&pool->sp_lock); if (!list_empty(&pool->sp_threads) && !list_empty(&pool->sp_sockets)) printk(KERN_ERR "svc_sock_enqueue: threads and sockets both waiting??\n"); if (test_bit(SK_DEAD, &svsk->sk_flags)) { /* Don't enqueue dead sockets */ dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); goto out_unlock; } /* Mark socket as busy. It will remain in this state until the * server has processed all pending data and put the socket back * on the idle list. We update SK_BUSY atomically because * it also guards against trying to enqueue the svc_sock twice. */ if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { /* Don't enqueue socket while already enqueued */ dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); goto out_unlock; } BUG_ON(svsk->sk_pool != NULL); svsk->sk_pool = pool; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 > svc_sock_wspace(svsk)) && !test_bit(SK_CLOSE, &svsk->sk_flags) && !test_bit(SK_CONN, &svsk->sk_flags)) { /* Don't enqueue while not enough space for reply */ dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, svc_sock_wspace(svsk)); svsk->sk_pool = NULL; clear_bit(SK_BUSY, &svsk->sk_flags); goto out_unlock; } clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, rq_list); dprintk("svc: socket %p served by daemon %p\n", svsk->sk_sk, rqstp); svc_thread_dequeue(pool, rqstp); if (rqstp->rq_sock) printk(KERN_ERR "svc_sock_enqueue: server %p, rq_sock=%p!\n", rqstp, rqstp->rq_sock); rqstp->rq_sock = svsk; atomic_inc(&svsk->sk_inuse); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); BUG_ON(svsk->sk_pool != pool); wake_up(&rqstp->rq_wait); } else {