Ejemplo n.º 1
0
void *pmqthread(void *param)
{
	int mustgetcpu = 0;
	struct params *par = param;
	cpu_set_t mask;
	int policy = SCHED_FIFO;
	struct sched_param schedp;
	struct timespec ts;

	memset(&schedp, 0, sizeof(schedp));
	schedp.sched_priority = par->priority;
	sched_setscheduler(0, policy, &schedp);

	if (par->cpu != -1) {
		CPU_ZERO(&mask);
		CPU_SET(par->cpu, &mask);
		if(sched_setaffinity(0, sizeof(mask), &mask) == -1)
			fprintf(stderr,	"WARNING: Could not set CPU affinity "
				"to CPU #%d\n", par->cpu);
	} else
		mustgetcpu = 1;

	par->tid = gettid();

	while (!par->shutdown) {
		if (par->sender) {

			/* Optionally force receiver timeout */
			if (par->forcetimeout) {
				struct timespec senddelay;
				
				senddelay.tv_sec = par->forcetimeout;
				senddelay.tv_nsec = 0;
				clock_nanosleep(CLOCK_MONOTONIC, 0, &senddelay, NULL);
			}

			/* Send message: Start of latency measurement ... */
			gettimeofday(&par->sent, NULL);
			if (mq_send(par->testmq, testmsg, strlen(testmsg), 1) != 0) {
				fprintf(stderr, "could not send test message\n");
				par->shutdown = 1;
			}
			par->samples++;
			if(par->max_cycles && par->samples >= par->max_cycles)
				par->shutdown = 1;
			if (mustgetcpu)
				par->cpu = get_cpu();
			/* Wait until receiver ready */
			if (par->timeout) {
				clock_gettime(CLOCK_REALTIME, &ts);
				ts.tv_sec += par->timeout;
	
				if (mq_timedreceive(par->syncmq, par->recvsyncmsg, MSG_SIZE, NULL, &ts)
				    != strlen(syncmsg)) {
					fprintf(stderr, "could not receive sync message\n");
					par->shutdown = 1;				
				}
			} 
			if (mq_receive(par->syncmq, par->recvsyncmsg, MSG_SIZE, NULL) !=
			    strlen(syncmsg)) {
				perror("could not receive sync message");
				par->shutdown = 1;				
			}
			if (!par->shutdown && strcmp(syncmsg, par->recvsyncmsg)) {
				fprintf(stderr, "ERROR: Sync message mismatch detected\n");
				fprintf(stderr, "  %s != %s\n", syncmsg, par->recvsyncmsg);
				par->shutdown = 1;
			}
		} else {
			/* Receiver */
			if (par->timeout) {
				clock_gettime(CLOCK_REALTIME, &ts);
				par->timeoutcount = 0;
				ts.tv_sec += par->timeout;
				do {
					if (mq_timedreceive(par->testmq, par->recvtestmsg,
					    MSG_SIZE, NULL, &ts) != strlen(testmsg)) {
						if (!par->forcetimeout || errno != ETIMEDOUT) {
							perror("could not receive test message");
							par->shutdown = 1;
							break;
						}
						if (errno == ETIMEDOUT) {
							par->timeoutcount++;
							clock_gettime(CLOCK_REALTIME, &ts);
							ts.tv_sec += par->timeout;
						}
					} else
						break;
				}
				while (1);
			} else {
				if (mq_receive(par->testmq, par->recvtestmsg, MSG_SIZE, NULL) !=
				    strlen(testmsg)) {
					perror("could not receive test message");
					par->shutdown = 1;
				}
			}
			/* ... Received the message: End of latency measurement */
			gettimeofday(&par->received, NULL);

			if (!par->shutdown && strcmp(testmsg, par->recvtestmsg)) {
				fprintf(stderr, "ERROR: Test message mismatch detected\n");
				fprintf(stderr, "  %s != %s\n", testmsg, par->recvtestmsg);
				par->shutdown = 1;
			}
			par->samples++;
			timersub(&par->received, &par->neighbor->sent,
			    &par->diff);

			if (par->diff.tv_usec < par->mindiff)
				par->mindiff = par->diff.tv_usec;
			if (par->diff.tv_usec > par->maxdiff)
				par->maxdiff = par->diff.tv_usec;
			par->sumdiff += (double) par->diff.tv_usec;
			if (par->tracelimit && par->maxdiff > par->tracelimit) {
				char tracing_enabled_file[MAX_PATH];

				strcpy(tracing_enabled_file, get_debugfileprefix());
				strcat(tracing_enabled_file, "tracing_enabled");
				int tracing_enabled =
				    open(tracing_enabled_file, O_WRONLY);
				if (tracing_enabled >= 0) {
					write(tracing_enabled, "0", 1);
					close(tracing_enabled);
				} else
					snprintf(par->error, sizeof(par->error),
					    "Could not access %s\n",
					    tracing_enabled_file);
				par->shutdown = 1;
				par->neighbor->shutdown = 1;
			}

			if (par->max_cycles && par->samples >= par->max_cycles)
				par->shutdown = 1;
			if (mustgetcpu)
				par->cpu = get_cpu();
			clock_nanosleep(CLOCK_MONOTONIC, 0, &par->delay, NULL);

			/* Tell receiver that we are ready for the next measurement */
			if (mq_send(par->syncmq, syncmsg, strlen(syncmsg), 1) != 0) {
				fprintf(stderr, "could not send sync message\n");
				par->shutdown = 1;
			}
		}
	}
	par->stopped = 1;
	return NULL;
}
Ejemplo n.º 2
0
void lru_add_drain(void)
{
	drain_cpu_pagevecs(get_cpu());
	put_cpu();
}
Ejemplo n.º 3
0
/*
 * this changes the io permissions bitmap in the current task.
 */
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
	struct thread_struct *t = &current->thread;
	struct tss_struct *tss;
	unsigned int i, max_long, bytes, bytes_updated;

	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
		return -EINVAL;
#ifdef CONFIG_GRKERNSEC_IO
	if (turn_on) {
		gr_handle_ioperm();
		return -EPERM;
	}
#endif
	if (turn_on && !capable(CAP_SYS_RAWIO))
		return -EPERM;

	/*
	 * If it's the first ioperm() call in this thread's lifetime, set the
	 * IO bitmap up. ioperm() is much less timing critical than clone(),
	 * this is why we delay this operation until now:
	 */
	if (!t->io_bitmap_ptr) {
		unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);

		if (!bitmap)
			return -ENOMEM;

		memset(bitmap, 0xff, IO_BITMAP_BYTES);
		t->io_bitmap_ptr = bitmap;
		set_thread_flag(TIF_IO_BITMAP);
	}

	/*
	 * do it in the per-thread copy and in the TSS ...
	 *
	 * Disable preemption via get_cpu() - we must not switch away
	 * because the ->io_bitmap_max value must match the bitmap
	 * contents:
	 */
	tss = init_tss + get_cpu();

	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);

	/*
	 * Search for a (possibly new) maximum. This is simple and stupid,
	 * to keep it obviously correct:
	 */
	max_long = 0;
	for (i = 0; i < IO_BITMAP_LONGS; i++)
		if (t->io_bitmap_ptr[i] != ~0UL)
			max_long = i;

	bytes = (max_long + 1) * sizeof(unsigned long);
	bytes_updated = max(bytes, t->io_bitmap_max);

	t->io_bitmap_max = bytes;

	/* Update the TSS: */
	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);

	put_cpu();

	return 0;
}
Ejemplo n.º 4
0
/* collect final CPU utilization raw data */
void
measure_cpu_stop()
{
  get_cpu(lib_end_count);
}
Ejemplo n.º 5
0
void
cpu_stop_internal(void)
{
  get_cpu(lib_end_count);
}
Ejemplo n.º 6
0
void *semathread(void *param)
{
	int mustgetcpu = 0;
	struct params *par = param;
	cpu_set_t mask;
	int policy = SCHED_FIFO;
	struct sched_param schedp;
	struct sembuf sb = { 0, 0, 0};
	sigset_t sigset;

	sigemptyset(&sigset);
	pthread_sigmask(SIG_SETMASK, &sigset, NULL);

	memset(&schedp, 0, sizeof(schedp));
	schedp.sched_priority = par->priority;
	sched_setscheduler(0, policy, &schedp);

	if (par->cpu != -1) {
		CPU_ZERO(&mask);
		CPU_SET(par->cpu, &mask);
		if(sched_setaffinity(0, sizeof(mask), &mask) == -1)
			snprintf(par->error, sizeof(par->error),
			    "WARNING: Could not set CPU affinity "
			    "to CPU #%d\n", par->cpu);
	} else {
        	int max_cpus = sysconf(_SC_NPROCESSORS_CONF);

        	if (max_cpus > 1)
		        mustgetcpu = 1;
	        else
			par->cpu = 0;
	}

	if (!wasforked)
		par->tid = gettid();

	while (!par->shutdown) {
		if (par->sender) {
 			sb.sem_num = SEM_WAIT_FOR_SENDER;
 			sb.sem_op = SEM_UNLOCK;
			/*
			 * Unlocking the semaphore:
			 *   Start of latency measurement ...
			 */
			gettimeofday(&par->unblocked, NULL);
			semop(par->semid, &sb, 1);
			par->samples++;
			if(par->max_cycles && par->samples >= par->max_cycles)
				par->shutdown = 1;

			if (mustgetcpu)
				par->cpu = get_cpu();

 			sb.sem_num = SEM_WAIT_FOR_RECEIVER;
 			sb.sem_op = SEM_LOCK;
			semop(par->semid, &sb, 1);

 			sb.sem_num = SEM_WAIT_FOR_SENDER;
			sb.sem_op = SEM_LOCK;
			semop(par->semid, &sb, 1);
		} else {
			/* Receiver */
			struct params *neighbor;

 			if (wasforked)
				neighbor = par + par->num_threads;
			else
				neighbor = par->neighbor;

 			sb.sem_num = SEM_WAIT_FOR_SENDER;
			sb.sem_op = SEM_LOCK;
			semop(par->semid, &sb, 1);

			/*
			 * ... We got the lock:
			 * End of latency measurement
			 */
			gettimeofday(&par->received, NULL);
			par->samples++;
			if (par->max_cycles && par->samples >= par->max_cycles)
				par->shutdown = 1;

			if (mustgetcpu)
				par->cpu = get_cpu();

			timersub(&par->received, &neighbor->unblocked,
			    &par->diff);

			if (par->diff.tv_usec < par->mindiff)
				par->mindiff = par->diff.tv_usec;
			if (par->diff.tv_usec > par->maxdiff)
				par->maxdiff = par->diff.tv_usec;
			par->sumdiff += (double) par->diff.tv_usec;
			if (par->tracelimit && par->maxdiff > par->tracelimit) {
				char tracing_enabled_file[MAX_PATH];

				strcpy(tracing_enabled_file, get_debugfileprefix());
				strcat(tracing_enabled_file, "tracing_enabled");
				int tracing_enabled =
				    open(tracing_enabled_file, O_WRONLY);
				if (tracing_enabled >= 0) {
					write(tracing_enabled, "0", 1);
					close(tracing_enabled);
				} else
					snprintf(par->error, sizeof(par->error),
					    "Could not access %s\n",
					    tracing_enabled_file);
				par->shutdown = 1;
				neighbor->shutdown = 1;
			}

 			sb.sem_num = SEM_WAIT_FOR_RECEIVER;
			sb.sem_op = SEM_UNLOCK;
			semop(par->semid, &sb, 1);

			nanosleep(&par->delay, NULL);

 			sb.sem_num = SEM_WAIT_FOR_SENDER;
			sb.sem_op = SEM_UNLOCK;
			semop(par->semid, &sb, 1);
		}
	}
	if (par->sender) {
		sb.sem_num = SEM_WAIT_FOR_SENDER;
		sb.sem_op = SEM_UNLOCK;
		semop(par->semid, &sb, 1);

		sb.sem_num = SEM_WAIT_FOR_RECEIVER;
		sb.sem_op = SEM_UNLOCK;
		semop(par->semid, &sb, 1);
	}
	par->stopped = 1;
	return NULL;
}
Ejemplo n.º 7
0
static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
				 u32 index, int *nent, int maxnent)
{
	int r;
	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
	unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
				? F(GBPAGES) : 0;
	unsigned f_lm = F(LM);
#else
	unsigned f_gbpages = 0;
	unsigned f_lm = 0;
#endif
	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
	unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
	unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
	unsigned f_la57 = 0;

	/* cpuid 1.edx */
	const u32 kvm_cpuid_1_edx_x86_features =
		F(FPU) | F(VME) | F(DE) | F(PSE) |
		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
		0 /* Reserved, DS, ACPI */ | F(MMX) |
		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
		0 /* HTT, TM, Reserved, PBE */;
	/* cpuid 0x80000001.edx */
	const u32 kvm_cpuid_8000_0001_edx_x86_features =
		F(FPU) | F(VME) | F(DE) | F(PSE) |
		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
		F(PAT) | F(PSE36) | 0 /* Reserved */ |
		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
		F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
	/* cpuid 1.ecx */
	const u32 kvm_cpuid_1_ecx_x86_features =
		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
		 * but *not* advertised to guests via CPUID ! */
		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
		0 /* DS-CPL, VMX, SMX, EST */ |
		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
		F(F16C) | F(RDRAND);
	/* cpuid 0x80000001.ecx */
	const u32 kvm_cpuid_8000_0001_ecx_x86_features =
		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
		F(TOPOEXT) | F(PERFCTR_CORE);

	/* cpuid 0x80000008.ebx */
	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
		F(AMD_SSB_NO) | F(AMD_STIBP);

	/* cpuid 0xC0000001.edx */
	const u32 kvm_cpuid_C000_0001_edx_x86_features =
		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
		F(PMM) | F(PMM_EN);

	/* cpuid 7.0.ebx */
	const u32 kvm_cpuid_7_0_ebx_x86_features =
		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
		F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;

	/* cpuid 0xD.1.eax */
	const u32 kvm_cpuid_D_1_eax_x86_features =
		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;

	/* cpuid 7.0.ecx*/
	const u32 kvm_cpuid_7_0_ecx_x86_features =
		F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);

	/* cpuid 7.0.edx*/
	const u32 kvm_cpuid_7_0_edx_x86_features =
		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP);

	/* all calls to cpuid_count() should be made on the same cpu */
	get_cpu();

	r = -E2BIG;

	if (*nent >= maxnent)
		goto out;

	do_cpuid_1_ent(entry, function, index);
	++*nent;

	switch (function) {
	case 0:
		entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd));
		break;
	case 1:
		entry->edx &= kvm_cpuid_1_edx_x86_features;
		cpuid_mask(&entry->edx, CPUID_1_EDX);
		entry->ecx &= kvm_cpuid_1_ecx_x86_features;
		cpuid_mask(&entry->ecx, CPUID_1_ECX);
		/* we support x2apic emulation even if host does not support
		 * it since we emulate x2apic in software */
		entry->ecx |= F(X2APIC);
		break;
	/* function 2 entries are STATEFUL. That is, repeated cpuid commands
	 * may return different values. This forces us to get_cpu() before
	 * issuing the first command, and also to emulate this annoying behavior
	 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
	case 2: {
		int t, times = entry->eax & 0xff;

		entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
		entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
		for (t = 1; t < times; ++t) {
			if (*nent >= maxnent)
				goto out;

			do_cpuid_1_ent(&entry[t], function, 0);
			entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
			++*nent;
		}
		break;
	}
	/* function 4 has additional index. */
	case 4: {
		int i, cache_type;

		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		/* read more entries until cache_type is zero */
		for (i = 1; ; ++i) {
			if (*nent >= maxnent)
				goto out;

			cache_type = entry[i - 1].eax & 0x1f;
			if (!cache_type)
				break;
			do_cpuid_1_ent(&entry[i], function, i);
			entry[i].flags |=
			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
			++*nent;
		}
		break;
	}
	case 6: /* Thermal management */
		entry->eax = 0x4; /* allow ARAT */
		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	case 7: {
		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		/* Mask ebx against host capability word 9 */
		if (index == 0) {
			entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
			cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
			// TSC_ADJUST is emulated
			entry->ebx |= F(TSC_ADJUST);
			entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
			f_la57 = entry->ecx & F(LA57);
			cpuid_mask(&entry->ecx, CPUID_7_ECX);
			/* Set LA57 based on hardware capability. */
			entry->ecx |= f_la57;
			entry->ecx |= f_umip;
			/* PKU is not yet implemented for shadow paging. */
			if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
				entry->ecx &= ~F(PKU);
			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
			cpuid_mask(&entry->edx, CPUID_7_EDX);
			/*
			 * We emulate ARCH_CAPABILITIES in software even
			 * if the host doesn't support it.
			 */
			entry->edx |= F(ARCH_CAPABILITIES);
		} else {
			entry->ebx = 0;
			entry->ecx = 0;
			entry->edx = 0;
		}
		entry->eax = 0;
		break;
	}
	case 9:
		break;
	case 0xa: { /* Architectural Performance Monitoring */
		struct x86_pmu_capability cap;
		union cpuid10_eax eax;
		union cpuid10_edx edx;

		perf_get_x86_pmu_capability(&cap);

		/*
		 * Only support guest architectural pmu on a host
		 * with architectural pmu.
		 */
		if (!cap.version)
			memset(&cap, 0, sizeof(cap));

		eax.split.version_id = min(cap.version, 2);
		eax.split.num_counters = cap.num_counters_gp;
		eax.split.bit_width = cap.bit_width_gp;
		eax.split.mask_length = cap.events_mask_len;

		edx.split.num_counters_fixed = cap.num_counters_fixed;
		edx.split.bit_width_fixed = cap.bit_width_fixed;
		edx.split.reserved = 0;

		entry->eax = eax.full;
		entry->ebx = cap.events_mask;
		entry->ecx = 0;
		entry->edx = edx.full;
		break;
	}
	/* function 0xb has additional index. */
	case 0xb: {
		int i, level_type;

		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		/* read more entries until level_type is zero */
		for (i = 1; ; ++i) {
			if (*nent >= maxnent)
				goto out;

			level_type = entry[i - 1].ecx & 0xff00;
			if (!level_type)
				break;
			do_cpuid_1_ent(&entry[i], function, i);
			entry[i].flags |=
			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
			++*nent;
		}
		break;
	}
	case 0xd: {
		int idx, i;
		u64 supported = kvm_supported_xcr0();

		entry->eax &= supported;
		entry->ebx = xstate_required_size(supported, false);
		entry->ecx = entry->ebx;
		entry->edx &= supported >> 32;
		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		if (!supported)
			break;

		for (idx = 1, i = 1; idx < 64; ++idx) {
			u64 mask = ((u64)1 << idx);
			if (*nent >= maxnent)
				goto out;

			do_cpuid_1_ent(&entry[i], function, idx);
			if (idx == 1) {
				entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
				cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
				entry[i].ebx = 0;
				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
					entry[i].ebx =
						xstate_required_size(supported,
								     true);
			} else {
				if (entry[i].eax == 0 || !(supported & mask))
					continue;
				if (WARN_ON_ONCE(entry[i].ecx & 1))
					continue;
			}
			entry[i].ecx = 0;
			entry[i].edx = 0;
			entry[i].flags |=
			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
			++*nent;
			++i;
		}
		break;
	}
	/* Intel PT */
	case 0x14: {
		int t, times = entry->eax;

		if (!f_intel_pt)
			break;

		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
		for (t = 1; t <= times; ++t) {
			if (*nent >= maxnent)
				goto out;
			do_cpuid_1_ent(&entry[t], function, t);
			entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
			++*nent;
		}
		break;
	}
	case KVM_CPUID_SIGNATURE: {
		static const char signature[12] = "KVMKVMKVM\0\0";
		const u32 *sigptr = (const u32 *)signature;
		entry->eax = KVM_CPUID_FEATURES;
		entry->ebx = sigptr[0];
		entry->ecx = sigptr[1];
		entry->edx = sigptr[2];
		break;
	}
	case KVM_CPUID_FEATURES:
		entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
			     (1 << KVM_FEATURE_ASYNC_PF) |
			     (1 << KVM_FEATURE_PV_EOI) |
			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
			     (1 << KVM_FEATURE_PV_UNHALT) |
			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
			     (1 << KVM_FEATURE_PV_SEND_IPI);

		if (sched_info_on())
			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);

		entry->ebx = 0;
		entry->ecx = 0;
		entry->edx = 0;
		break;
	case 0x80000000:
		entry->eax = min(entry->eax, 0x8000001f);
		break;
	case 0x80000001:
		entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
		cpuid_mask(&entry->edx, CPUID_8000_0001_EDX);
		entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
		cpuid_mask(&entry->ecx, CPUID_8000_0001_ECX);
		break;
	case 0x80000007: /* Advanced power management */
		/* invariant TSC is CPUID.80000007H:EDX[8] */
		entry->edx &= (1 << 8);
		/* mask against host */
		entry->edx &= boot_cpu_data.x86_power;
		entry->eax = entry->ebx = entry->ecx = 0;
		break;
	case 0x80000008: {
		unsigned g_phys_as = (entry->eax >> 16) & 0xff;
		unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
		unsigned phys_as = entry->eax & 0xff;

		if (!g_phys_as)
			g_phys_as = phys_as;
		entry->eax = g_phys_as | (virt_as << 8);
		entry->edx = 0;
		/*
		 * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
		 * hardware cpuid
		 */
		if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
			entry->ebx |= F(AMD_IBPB);
		if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
			entry->ebx |= F(AMD_IBRS);
		if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
			entry->ebx |= F(VIRT_SSBD);
		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
		cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
		/*
		 * The preference is to use SPEC CTRL MSR instead of the
		 * VIRT_SPEC MSR.
		 */
		if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
		    !boot_cpu_has(X86_FEATURE_AMD_SSBD))
			entry->ebx |= F(VIRT_SSBD);
		break;
	}
	case 0x80000019:
		entry->ecx = entry->edx = 0;
		break;
	case 0x8000001a:
		break;
	case 0x8000001d:
		break;
	/*Add support for Centaur's CPUID instruction*/
	case 0xC0000000:
		/*Just support up to 0xC0000004 now*/
		entry->eax = min(entry->eax, 0xC0000004);
		break;
	case 0xC0000001:
		entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
		cpuid_mask(&entry->edx, CPUID_C000_0001_EDX);
		break;
	case 3: /* Processor serial number */
	case 5: /* MONITOR/MWAIT */
	case 0xC0000002:
	case 0xC0000003:
	case 0xC0000004:
	default:
		entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
		break;
	}

	kvm_x86_ops->set_supported_cpuid(function, entry);

	r = 0;

out:
	put_cpu();

	return r;
}
Ejemplo n.º 8
0
void PlatformDisableSchedulerInterrupts(void)
{
	get_cpu();
}
Ejemplo n.º 9
0
static unsigned long fill_arg(struct syscallrecord *rec, unsigned int argnum)
{
	struct syscallentry *entry;
	unsigned int call;
	enum argtype argtype;

	call = rec->nr;
	entry = syscalls[call].entry;

	if (argnum > entry->num_args)
		return 0;

	argtype = get_argtype(entry, argnum);

	switch (argtype) {
	case ARG_UNDEFINED:
		if (RAND_BOOL())
			return (unsigned long) rand64();
		return (unsigned long) get_writable_address(page_size);

	case ARG_FD:
		if (RAND_BOOL()) {
			unsigned int i;
			/* If this is the 2nd or more ARG_FD, make it unique */
			for (i = 0; i < argnum; i++) {
				enum argtype arg;
				arg = get_argtype(entry, i);
				if (arg == ARG_FD)
					return get_new_random_fd();
			}
		}
		return get_random_fd();

	case ARG_LEN:
		return (unsigned long) get_len();

	case ARG_ADDRESS:
		return handle_arg_address(rec, argnum);

	case ARG_NON_NULL_ADDRESS:
		return (unsigned long) get_non_null_address();

	case ARG_MMAP:
		return (unsigned long) get_map();

	case ARG_PID:
		return (unsigned long) get_pid();

	case ARG_RANGE:
		return handle_arg_range(entry, argnum);

	case ARG_OP:	/* Like ARG_LIST, but just a single value. */
		return handle_arg_op(entry, argnum);

	case ARG_LIST:
		return handle_arg_list(entry, argnum);

	case ARG_CPU:
		return (unsigned long) get_cpu();

	case ARG_PATHNAME:
		return (unsigned long) generate_pathname();

	case ARG_IOVEC:
		return handle_arg_iovec(entry, rec, argnum);

	case ARG_IOVECLEN:
	case ARG_SOCKADDRLEN:
		/* We already set the len in the ARG_IOVEC/ARG_SOCKADDR case
		 * So here we just return what we had set there. */
		return get_argval(rec, argnum);

	case ARG_SOCKADDR:
		return handle_arg_sockaddr(entry, rec, argnum);

	case ARG_MODE_T:
		return handle_arg_mode_t();

	case ARG_SOCKETINFO:
		return (unsigned long) get_rand_socketinfo();
	}

	BUG("unreachable!\n");
}
Ejemplo n.º 10
0
/*
 * Handle debug exception notifications.
 *
 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
 *
 * NOTIFY_DONE returned if one of the following conditions is true.
 * i) When the causative address is from user-space and the exception
 * is a valid one, i.e. not triggered as a result of lazy debug register
 * switching
 * ii) When there are more bits than trap<n> set in DR6 register (such
 * as BD, BS or BT) indicating that more than one debug condition is
 * met and requires some more action in do_debug().
 *
 * NOTIFY_STOP returned for all other cases
 *
 */
static int __kprobes hw_breakpoint_handler(struct die_args *args)
{
	int i, cpu, rc = NOTIFY_STOP;
	struct perf_event *bp;
	unsigned long dr7, dr6;
	unsigned long *dr6_p;

	/* The DR6 value is pointed by args->err */
	dr6_p = (unsigned long *)ERR_PTR(args->err);
	dr6 = *dr6_p;

	/* If it's a single step, TRAP bits are random */
	if (dr6 & DR_STEP)
		return NOTIFY_DONE;

	/* Do an early return if no trap bits are set in DR6 */
	if ((dr6 & DR_TRAP_BITS) == 0)
		return NOTIFY_DONE;

	get_debugreg(dr7, 7);
	/* Disable breakpoints during exception handling */
	set_debugreg(0UL, 7);
	/*
	 * Assert that local interrupts are disabled
	 * Reset the DRn bits in the virtualized register value.
	 * The ptrace trigger routine will add in whatever is needed.
	 */
	current->thread.debugreg6 &= ~DR_TRAP_BITS;
	cpu = get_cpu();

	/* Handle all the breakpoints that were triggered */
	for (i = 0; i < HBP_NUM; ++i) {
		if (likely(!(dr6 & (DR_TRAP0 << i))))
			continue;

		/*
		 * The counter may be concurrently released but that can only
		 * occur from a call_rcu() path. We can then safely fetch
		 * the breakpoint, use its callback, touch its counter
		 * while we are in an rcu_read_lock() path.
		 */
		rcu_read_lock();

		bp = per_cpu(bp_per_reg[i], cpu);
		/*
		 * Reset the 'i'th TRAP bit in dr6 to denote completion of
		 * exception handling
		 */
		(*dr6_p) &= ~(DR_TRAP0 << i);
		/*
		 * bp can be NULL due to lazy debug register switching
		 * or due to concurrent perf counter removing.
		 */
		if (!bp) {
			rcu_read_unlock();
			break;
		}

		perf_bp_event(bp, args->regs);

		/*
		 * Set up resume flag to avoid breakpoint recursion when
		 * returning back to origin.
		 */
		if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
			args->regs->flags |= X86_EFLAGS_RF;

		rcu_read_unlock();
	}
	/*
	 * Further processing in do_debug() is needed for a) user-space
	 * breakpoints (to generate signals) and b) when the system has
	 * taken exception due to multiple causes
	 */
	if ((current->thread.debugreg6 & DR_TRAP_BITS) ||
	    (dr6 & (~DR_TRAP_BITS)))
		rc = NOTIFY_DONE;

	set_debugreg(dr7, 7);
	put_cpu();

	return rc;
}
Ejemplo n.º 11
0
static int __kprobes hw_breakpoint_handler(struct die_args *args)
{
	int cpu, i, rc = NOTIFY_STOP;
	struct perf_event *bp;
	unsigned int cmf, resume_mask;

	/*
	 * Do an early return if none of the channels triggered.
	 */
	cmf = sh_ubc->triggered_mask();
	if (unlikely(!cmf))
		return NOTIFY_DONE;

	/*
	 * By default, resume all of the active channels.
	 */
	resume_mask = sh_ubc->active_mask();

	/*
	 * Disable breakpoints during exception handling.
	 */
	sh_ubc->disable_all();

	cpu = get_cpu();
	for (i = 0; i < sh_ubc->num_events; i++) {
		unsigned long event_mask = (1 << i);

		if (likely(!(cmf & event_mask)))
			continue;

		/*
		 * The counter may be concurrently released but that can only
		 * occur from a call_rcu() path. We can then safely fetch
		 * the breakpoint, use its callback, touch its counter
		 * while we are in an rcu_read_lock() path.
		 */
		rcu_read_lock();

		bp = per_cpu(bp_per_reg[i], cpu);
		if (bp)
			rc = NOTIFY_DONE;

		/*
		 * Reset the condition match flag to denote completion of
		 * exception handling.
		 */
		sh_ubc->clear_triggered_mask(event_mask);

		/*
		 * bp can be NULL due to concurrent perf counter
		 * removing.
		 */
		if (!bp) {
			rcu_read_unlock();
			break;
		}

		/*
		 * Don't restore the channel if the breakpoint is from
		 * ptrace, as it always operates in one-shot mode.
		 */
		if (bp->overflow_handler == ptrace_triggered)
			resume_mask &= ~(1 << i);

		perf_bp_event(bp, args->regs);

		/* Deliver the signal to userspace */
		if (arch_check_va_in_userspace(bp->attr.bp_addr,
					       bp->attr.bp_len)) {
			siginfo_t info;

			info.si_signo = args->signr;
			info.si_errno = notifier_to_errno(rc);
			info.si_code = TRAP_HWBKPT;

			force_sig_info(args->signr, &info, current);
		}

		rcu_read_unlock();
	}

	if (cmf == 0)
		rc = NOTIFY_DONE;

	sh_ubc->enable_all(resume_mask);

	put_cpu();

	return rc;
}
Ejemplo n.º 12
0
/**
 * information about the cache: level, associativity...
 */
int generic_cache_info(int cpu, int id, char* output, size_t len)
{
    char tmp[_HW_DETECT_MAX_OUTPUT], tmp2[_HW_DETECT_MAX_OUTPUT];
    char tmppath[_HW_DETECT_MAX_OUTPUT];
    struct stat statbuf;

    if (cpu == -1) cpu = get_cpu();
    if (cpu == -1) return -1;

    snprintf(path,sizeof(path), "/sys/devices/system/cpu/cpu%i/cache/index%i/", cpu, id);
    memset(output, 0, len);
    if(stat(path, &statbuf)) //path doesn't exist
        return -1;

    strncpy(tmppath, path, _HW_DETECT_MAX_OUTPUT);
    strncat(tmppath, "level", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1);

    if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) {
        snprintf(tmp2,_HW_DETECT_MAX_OUTPUT-1, "Level %s", tmp);
        strncat(output, tmp2, (len-strlen(output))-1);
    }

    strncpy(tmppath, path, _HW_DETECT_MAX_OUTPUT);
    strncat(tmppath, "type", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1);
    if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) {
        if(!strcmp(tmp, "Unified")) {
            strncpy(tmp2, output,_HW_DETECT_MAX_OUTPUT-1);
            snprintf(output, len, "%s ", tmp);
            strncat(output, tmp2, (len-strlen(output))-1);
        }
        else {
            strncat(output, " ", (len-strlen(output))-1);
            strncat(output, tmp, (len-strlen(output))-1);
        }
    }
    strncat(output, " Cache,", (len-strlen(output))-1);

    strncpy(tmppath, path, _HW_DETECT_MAX_OUTPUT);
    strncat(tmppath, "size", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1);
    if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) {
        strncat(output, " ", (len-strlen(output))-1);
        strncat(output, tmp, (len-strlen(output))-1);
    }

    strncpy(tmppath, path, _HW_DETECT_MAX_OUTPUT);
    strncat(tmppath, "ways_of_associativity", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1);
    if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) {
        strncat(output, ", ", (len-strlen(output))-1);
        strncat(output, tmp, (len-strlen(output))-1);
        strncat(output, "-way set associative", (len-strlen(output))-1);
    }

    strncpy(tmppath, path,_HW_DETECT_MAX_OUTPUT);
    strncat(tmppath, "coherency_line_size", (_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1);
    if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) {
        strncat(output, ", ", (len-strlen(output))-1);
        strncat(output, tmp, (len-strlen(output))-1);
        strncat(output, " Byte cachelines", (len-strlen(output))-1);
    }

    strncpy(tmppath, path,_HW_DETECT_MAX_OUTPUT);
    strncat(tmppath, "shared_cpu_map",(_HW_DETECT_MAX_OUTPUT-strlen(tmppath))-1);
    if(read_file(tmppath, tmp, _HW_DETECT_MAX_OUTPUT)) {
        cpu_map_to_list(tmp, tmp2, _HW_DETECT_MAX_OUTPUT);
        snprintf(tmppath,_HW_DETECT_MAX_OUTPUT, "cpu%i ", cpu);
        if(!strcmp(tmp2, tmppath))
        {
            strncat(output, ", exclusive for ", (len-strlen(output))-1);
            strncat(output, tmppath, (len-strlen(output))-1);
        }
        else
        {
            strncat(output, ", shared among ", (len-strlen(output))-1);
            strncat(output, tmp2, (len-strlen(output))-1);
        }
    }
    return 0;
}
Ejemplo n.º 13
0
/*
 * Handle debug exception notifications.
 *
 * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
 *
 * NOTIFY_DONE returned if one of the following conditions is true.
 * i) When the causative address is from user-space and the exception
 * is a valid one, i.e. not triggered as a result of lazy debug register
 * switching
 * ii) When there are more bits than trap<n> set in DR6 register (such
 * as BD, BS or BT) indicating that more than one debug condition is
 * met and requires some more action in do_debug().
 *
 * NOTIFY_STOP returned for all other cases
 *
 */
static int __kprobes hw_breakpoint_handler(struct die_args *args)
{
	int i, cpu, rc = NOTIFY_STOP;
	struct perf_event *bp;
	unsigned long dr7, dr6;
	unsigned long *dr6_p;

	/* The DR6 value is pointed by args->err */
	dr6_p = (unsigned long *)ERR_PTR(args->err);
	dr6 = *dr6_p;

	/* If it's a single step, TRAP bits are random */
	if (dr6 & DR_STEP)
		return NOTIFY_DONE;

	/* Do an early return if no trap bits are set in DR6 */
	if ((dr6 & DR_TRAP_BITS) == 0)
		return NOTIFY_DONE;

	get_debugreg(dr7, 7);
	/* Disable breakpoints during exception handling */
	set_debugreg(0UL, 7);
	/*
	 * Assert that local interrupts are disabled
	 * Reset the DRn bits in the virtualized register value.
	 * The ptrace trigger routine will add in whatever is needed.
	 */
	current->thread.debugreg6 &= ~DR_TRAP_BITS;
	cpu = get_cpu();

	/* Handle all the breakpoints that were triggered */
	for (i = 0; i < HBP_NUM; ++i) {
		if (likely(!(dr6 & (DR_TRAP0 << i))))
			continue;

		/*
		 * The counter may be concurrently released but that can only
		 * occur from a call_rcu() path. We can then safely fetch
		 * the breakpoint, use its callback, touch its counter
		 * while we are in an rcu_read_lock() path.
		 */
		rcu_read_lock();

		bp = per_cpu(bp_per_reg[i], cpu);
		if (bp)
			rc = NOTIFY_DONE;
		/*
		 * Reset the 'i'th TRAP bit in dr6 to denote completion of
		 * exception handling
		 */
		(*dr6_p) &= ~(DR_TRAP0 << i);
		/*
		 * bp can be NULL due to lazy debug register switching
		 * or due to concurrent perf counter removing.
		 */
		if (!bp) {
			rcu_read_unlock();
			break;
		}

		perf_bp_event(bp, args->regs);

		rcu_read_unlock();
	}
	if (dr6 & (~DR_TRAP_BITS))
		rc = NOTIFY_DONE;

	set_debugreg(dr7, 7);
	put_cpu();

	return rc;
}
int vmadump_restore_cpu(cr_rstrt_proc_req_t *ctx, struct file *file,
			struct pt_regs *regs) {
    struct vmadump_restore_tmps *x86tmps;
    struct thread_struct *threadtmp;
    struct pt_regs *regtmp;
    int r;
    int idx, i, cpu;
    uint16_t fsindex, gsindex;
#if HAVE_STRUCT_N_DESC_STRUCT
    struct n_desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
#else
    struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
#endif

    /* XXX: Note allocation assumes i387tmp and threadtmp are never active at the same time */
    x86tmps = kmalloc(sizeof(*x86tmps), GFP_KERNEL);
    if (!x86tmps) return -ENOMEM;
    regtmp = VMAD_REGTMP(x86tmps);
    threadtmp = VMAD_THREADTMP(x86tmps);

    r = read_kern(ctx, file, regtmp, sizeof(*regtmp));
    if (r != sizeof(*regtmp)) goto bad_read;

    /* Don't let the user pick funky segments */
    if ((regtmp->cs != __USER_CS && regtmp->cs != __USER32_CS) &&
	(regtmp->ss != __USER_DS && regtmp->ss != __USER32_DS)) {
	r = -EINVAL;
	goto bad_read;
    }

    /* Set our process type */
    if (regtmp->cs == __USER32_CS)
	set_thread_flag(TIF_IA32);
    else
	clear_thread_flag(TIF_IA32);	

    /* Only restore bottom 9 bits of eflags.  Restoring anything else
     * is bad bad mojo for security. (0x200 = interrupt enable) */
#if HAVE_PT_REGS_EFLAGS
    regtmp->eflags = 0x200 | (regtmp->eflags & 0x000000FF);
#elif HAVE_PT_REGS_FLAGS
    regtmp->flags = 0x200 | (regtmp->flags & 0x000000FF);
#else
    #error
#endif
    memcpy(regs, regtmp, sizeof(*regtmp));

    /* Restore FPU info (and later general "extended state") */
    r = vmadump_restore_i387(ctx, file, VMAD_I387TMP(x86tmps));
    if (r < 0) goto bad_read;
	
    /* XXX FIX ME: RESTORE DEBUG INFORMATION ?? */
    /* Here we read it but ignore it. */
    r = vmadump_restore_debugreg(ctx, file);
    if (r < 0) goto bad_read;

    /* user(r)sp, since we don't use the ptrace entry path in BLCR */
#if HAVE_THREAD_USERSP
    r = read_kern(ctx, file, &threadtmp->usersp, sizeof(threadtmp->usersp));
    if (r != sizeof(threadtmp->usersp)) goto bad_read;
    current->thread.usersp = threadtmp->usersp;
    vmad_write_oldrsp(threadtmp->usersp);
#elif HAVE_THREAD_USERRSP
    r = read_kern(ctx, file, &threadtmp->userrsp, sizeof(threadtmp->userrsp));
    if (r != sizeof(threadtmp->userrsp)) goto bad_read;
    current->thread.userrsp = threadtmp->userrsp;
    vmad_write_oldrsp(threadtmp->userrsp);
#else
    #error
#endif

    /*-- restore segmentation related stuff */

    /* Restore FS_BASE MSR */
    r = read_kern(ctx, file, &threadtmp->fs, sizeof(threadtmp->fs));
    if (r != sizeof(threadtmp->fs)) goto bad_read;
    if (threadtmp->fs >= TASK_SIZE) {
	r = -EINVAL;
	goto bad_read;
    }
    current->thread.fs = threadtmp->fs;
    if ((r = checking_wrmsrl(MSR_FS_BASE, threadtmp->fs)))
	goto bad_read;
	
    /* Restore GS_KERNEL_BASE MSR */
    r = read_kern(ctx, file, &threadtmp->gs, sizeof(threadtmp->gs));
    if (r != sizeof(threadtmp->gs)) goto bad_read;
    if (threadtmp->gs >= TASK_SIZE) {
	r = -EINVAL;
	goto bad_read;
    }
    current->thread.gs = threadtmp->gs;
    if ((r = checking_wrmsrl(MSR_KERNEL_GS_BASE, threadtmp->gs)))
	goto bad_read;

    /* Restore 32 bit segment stuff */
    r = read_kern(ctx, file, &fsindex, sizeof(fsindex));
    if (r != sizeof(fsindex)) goto bad_read;

    r = read_kern(ctx, file, &gsindex, sizeof(gsindex));
    if (r != sizeof(gsindex)) goto bad_read;

    r = read_kern(ctx, file, tls_array, sizeof(tls_array));
    if (r != sizeof(tls_array)) goto bad_read;

    /* Sanitize fs, gs.  These segment descriptors should load one
     * of the TLS entries and have DPL = 3.  If somebody is doing
     * some other LDT monkey business, I'm currently not
     * supporting that here.  Also, I'm presuming that the offsets
     * to the GDT_ENTRY_TLS_MIN is the same in both kernels. */
    idx = fsindex >> 3;
    if (idx<GDT_ENTRY_TLS_MIN || idx>GDT_ENTRY_TLS_MAX || (fsindex&7) != 3)
	fsindex = 0;
    idx = gsindex >> 3;
    if (idx<GDT_ENTRY_TLS_MIN || idx>GDT_ENTRY_TLS_MAX || (gsindex&7) != 3)
	gsindex = 0;

    /* Sanitize the TLS entries...
     * Make sure the following bits are set/not set:
     *  bit 12   : S    =  1    (code/data - not system)
     *  bit 13-14: DPL  = 11    (priv level = 3 (user))
     *  bit 21   :      =  0    (reserved)
     *
     * If the entry isn't valid, zero the whole descriptor.
     */
    for (i=0; i < GDT_ENTRY_TLS_ENTRIES; i++) {
	if (tls_array[i].b != 0 && 
	    (tls_array[i].b & 0x00207000) != 0x00007000) {
	    r = -EINVAL;
	    goto bad_read;
	}
    }

    /* Ok load this crap */
    cpu = get_cpu();	/* load_TLS can't get pre-empted. */
    memcpy(current->thread.tls_array, tls_array,
	   sizeof(current->thread.tls_array));
    current->thread.fsindex = fsindex;
    current->thread.gsindex = gsindex;
    load_TLS(&current->thread, cpu);

    loadsegment(fs, current->thread.fsindex);
    load_gs_index(current->thread.gsindex);
    put_cpu();

    /* In case cr_restart and child don't have same ABI */
    if (regtmp->cs == __USER32_CS) {
	loadsegment(ds, __USER32_DS);
	loadsegment(es, __USER32_DS);
    } else {
	loadsegment(ds, __USER_DS);
	loadsegment(es, __USER_DS);
    }

#if HAVE_THREAD_INFO_SYSENTER_RETURN
    {
	void *sysenter_return;
	r = read_kern(ctx, file, &sysenter_return, sizeof(sysenter_return));
	if (r != sizeof(sysenter_return)) goto bad_read;
	current_thread_info()->sysenter_return = sysenter_return;
    }
#endif
    
    kfree(x86tmps);
    return 0;

 bad_read:
    kfree(x86tmps);
    if (r >= 0) r = -EIO;
    return r;
}
Ejemplo n.º 15
0
void open_random_event(int mmap_enabled, int overflow_enabled) {

	int fd;

	int i,trinity_type;

	i=find_empty_event();

	/* return if no free events */
	if (i<0) return;

	event_data[i].overflows=0;
	event_data[i].throttles=0;


	/* repeat until we create a valid event */
	while(1) {
		/* call trinity random perf_event_open() code */
		//generic_sanitise(0);
		trinity_type=syscall_perf_event_open.sanitise(&shm->syscall[0]);

		memcpy(&event_data[i].attr,
			(struct perf_event_attr *)shm->syscall[0].a1,
			sizeof(struct perf_event_attr));
		event_data[i].pid=shm->syscall[0].a2;
		event_data[i].cpu=get_cpu();
		event_data[i].group_fd=shm->syscall[0].a4;
		event_data[i].flags=shm->syscall[0].a5;

		post_perf_event_open(&shm->syscall[0]);

		/* Randomly make part of a group 1/4 of the time */
		if (rand()%4==2) {
			int j;
			j=find_random_active_event();

			/* is it a master? */
			/* can we set a group leader that isn't itself */
			/* a leader? */
//			if (event_data[j].group_fd==-1) {
			event_data[i].group_fd=event_data[j].fd;
//			}

		}

		/* Randomly try to use a kprobe */
		if (event_data[i].attr.type==PERF_TYPE_TRACEPOINT) {
			if (rand()%10==5) {
				event_data[i].attr.config=kprobe_id;
			}
		}


		if (ignore_but_dont_skip.open) return;

		/* Debugging code */
		/* We don't usually log failed opens as there are so many */

		if (logging&TYPE_OPEN) {
#if LOG_FAILURES
			if (trigger_failure_logging) {
				/* uncomment if failing opens causing crashes */
//				static int quit_next=0;
//				if (event_data[i].attr.type==PERF_TYPE_TRACEPOINT) {
				sprintf(log_buffer,"# O -1 %d %d %d %lx ",
					event_data[i].pid,
					event_data[i].cpu,
					event_data[i].group_fd,
					event_data[i].flags);
				write(log_fd,log_buffer,strlen(log_buffer));
	                	perf_log_attr(&event_data[i].attr);
//				fsync(log_fd);
//				}
//				if (quit_next==1) exit(1);

//				if (quit_next) quit_next--;

//				if ((event_data[i].attr.read_format==0x2d2d2d))
//				quit_next=2;

			}
#endif
	        }

		/* Actually try to open the event */
		fd=perf_event_open(
			&event_data[i].attr,
			event_data[i].pid,
			event_data[i].cpu,
			event_data[i].group_fd,
			event_data[i].flags);
		stats.open_attempts++;
		stats.total_syscalls++;

		int which_type=event_data[i].attr.type;

		if ((which_type<0) || (which_type>MAX_OPEN_TYPE-1)) {
			which_type=MAX_OPEN_TYPE-1;
		}

		/* If we succede, break out of the infinite loop */
		if (fd>0) {
			stats.open_type_success[which_type]++;
			stats.open_trinity_type_success[trinity_type]++;
			break;
		}
#if 0
		/* Track source of UNKNOWN errnos */
		if (errno==16) {
			printf("Event t=%d c=%llx pid=%d cpu=%d %s\n",
				event_data[i].attr.type,
				event_data[i].attr.config,
				event_data[i].pid,
				event_data[i].cpu,
				strerror(errno));
		}
#endif

		/* Otherwise, track the errors */
		if (errno<MAX_ERRNOS) {
			stats.open_errno_count[errno]++;
			stats.open_type_fail[which_type]++;
			stats.open_trinity_type_fail[trinity_type]++;
		}

		/* no more file descriptors, so give up */
		if (errno==EMFILE) return;

	}

	/* We successfully opened an event! */

	stats.open_successful++;
	stats.current_open++;

	if (logging&TYPE_OPEN) {
		sprintf(log_buffer,"O %d %d %d %d %lx ",
				fd,
				event_data[i].pid,
				event_data[i].cpu,
				event_data[i].group_fd,
				event_data[i].flags);
		write(log_fd,log_buffer,strlen(log_buffer));
		perf_log_attr(&event_data[i].attr);
#if FSYNC_EVERY
		fsync(log_fd);
#endif
	}

	event_data[i].fd=fd;
	event_data[i].active=1;
	active_events++;

	/* if we are member of a group, update size of group */
	/* this is needed for calcuating "proper" read size  */
	/* Also I don't think we adjust this on close        */
	if (event_data[i].group_fd!=-1) {
		int j=lookup_event(event_data[i].group_fd);

		event_data[j].number_in_group++;
		event_data[j].read_size=update_read_size(j);
	}

	/* Setup mmap buffer */

	if (mmap_enabled) {
		setup_mmap(i);
	}

	/* Setup overflow 50% of the time */
	if ((overflow_enabled) && (rand()%2)) {

	if (!ignore_but_dont_skip.overflow) {

		int fcntl_result;

		if (logging&TYPE_OVERFLOW) {
			sprintf(log_buffer,"o %d\n",event_data[i].fd);
			write(log_fd,log_buffer,strlen(log_buffer));
		}

		memset(&event_data[i].sa, 0, sizeof(struct sigaction));
		event_data[i].sa.sa_sigaction = our_handler;
		event_data[i].sa.sa_flags = SA_SIGINFO;

		if (sigaction( SIGRTMIN+2, &event_data[i].sa, NULL) < 0) {
			printf("Error setting up signal handler\n");
     		}

		fcntl_result=fcntl(event_data[i].fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
		if (fcntl_result<0) fprintf(stderr,"F1 error!\n");
		fcntl_result=fcntl(event_data[i].fd, F_SETSIG, SIGRTMIN+2);
		if (fcntl_result<0) fprintf(stderr,"F1 error!\n");
		fcntl_result=fcntl(event_data[i].fd, F_SETOWN,getpid());
		if (fcntl_result<0) fprintf(stderr,"F1 error!\n");
	}

	}

	event_data[i].number_in_group=1;

	event_data[i].read_size=update_read_size(i);

}
Ejemplo n.º 16
0
Archivo: qmp.c Proyecto: 8tab/qemu
TraceEventInfoList *qmp_trace_event_get_state(const char *name,
                                              bool has_vcpu, int64_t vcpu,
                                              Error **errp)
{
    Error *err = NULL;
    TraceEventInfoList *events = NULL;
    TraceEventIter iter;
    TraceEvent *ev;
    bool is_pattern = trace_event_is_pattern(name);
    CPUState *cpu;

    /* Check provided vcpu */
    cpu = get_cpu(has_vcpu, vcpu, &err);
    if (err) {
        error_propagate(errp, err);
        return NULL;
    }

    /* Check events */
    if (!check_events(has_vcpu, true, is_pattern, name, errp)) {
        return NULL;
    }

    /* Get states (all errors checked above) */
    trace_event_iter_init(&iter, name);
    while ((ev = trace_event_iter_next(&iter)) != NULL) {
        TraceEventInfoList *elem;
        bool is_vcpu = trace_event_is_vcpu(ev);
        if (has_vcpu && !is_vcpu) {
            continue;
        }

        elem = g_new(TraceEventInfoList, 1);
        elem->value = g_new(TraceEventInfo, 1);
        elem->value->vcpu = is_vcpu;
        elem->value->name = g_strdup(trace_event_get_name(ev));

        if (!trace_event_get_state_static(ev)) {
            elem->value->state = TRACE_EVENT_STATE_UNAVAILABLE;
        } else {
            if (has_vcpu) {
                if (is_vcpu) {
                    if (trace_event_get_vcpu_state_dynamic(cpu, ev)) {
                        elem->value->state = TRACE_EVENT_STATE_ENABLED;
                    } else {
                        elem->value->state = TRACE_EVENT_STATE_DISABLED;
                    }
                }
                /* else: already skipped above */
            } else {
                if (trace_event_get_state_dynamic(ev)) {
                    elem->value->state = TRACE_EVENT_STATE_ENABLED;
                } else {
                    elem->value->state = TRACE_EVENT_STATE_DISABLED;
                }
            }
        }
        elem->next = events;
        events = elem;
    }

    return events;
}
Ejemplo n.º 17
0
/**
 * ixgbe_fcoe_ddp_setup - called to set up ddp context
 * @netdev: the corresponding net_device
 * @xid: the exchange id requesting ddp
 * @sgl: the scatter-gather list for this request
 * @sgc: the number of scatter-gather items
 *
 * Returns : 1 for success and 0 for no ddp
 */
static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
				struct scatterlist *sgl, unsigned int sgc,
				int target_mode)
{
	struct ixgbe_adapter *adapter;
	struct ixgbe_hw *hw;
	struct ixgbe_fcoe *fcoe;
	struct ixgbe_fcoe_ddp *ddp;
	struct scatterlist *sg;
	unsigned int i, j, dmacount;
	unsigned int len;
	static const unsigned int bufflen = IXGBE_FCBUFF_MIN;
	unsigned int firstoff = 0;
	unsigned int lastsize;
	unsigned int thisoff = 0;
	unsigned int thislen = 0;
	u32 fcbuff, fcdmarw, fcfltrw, fcrxctl;
	dma_addr_t addr = 0;
	struct pci_pool *pool;
	unsigned int cpu;

	if (!netdev || !sgl)
		return 0;

	adapter = netdev_priv(netdev);
	if (xid >= IXGBE_FCOE_DDP_MAX) {
		e_warn(drv, "xid=0x%x out-of-range\n", xid);
		return 0;
	}

	/* no DDP if we are already down or resetting */
	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
	    test_bit(__IXGBE_RESETTING, &adapter->state))
		return 0;

	fcoe = &adapter->fcoe;
	if (!fcoe->pool) {
		e_warn(drv, "xid=0x%x no ddp pool for fcoe\n", xid);
		return 0;
	}

	ddp = &fcoe->ddp[xid];
	if (ddp->sgl) {
		e_err(drv, "xid 0x%x w/ non-null sgl=%p nents=%d\n",
		      xid, ddp->sgl, ddp->sgc);
		return 0;
	}
	ixgbe_fcoe_clear_ddp(ddp);

	/* setup dma from scsi command sgl */
	dmacount = pci_map_sg(adapter->pdev, sgl, sgc, DMA_FROM_DEVICE);
	if (dmacount == 0) {
		e_err(drv, "xid 0x%x DMA map error\n", xid);
		return 0;
	}

	/* alloc the udl from per cpu ddp pool */
	cpu = get_cpu();
	pool = *per_cpu_ptr(fcoe->pool, cpu);
	ddp->udl = pci_pool_alloc(pool, GFP_ATOMIC, &ddp->udp);
	if (!ddp->udl) {
		e_err(drv, "failed allocated ddp context\n");
		goto out_noddp_unmap;
	}
	ddp->pool = pool;
	ddp->sgl = sgl;
	ddp->sgc = sgc;

	j = 0;
	for_each_sg(sgl, sg, dmacount, i) {
		addr = sg_dma_address(sg);
		len = sg_dma_len(sg);
		while (len) {
			/* max number of buffers allowed in one DDP context */
			if (j >= IXGBE_BUFFCNT_MAX) {
				*per_cpu_ptr(fcoe->pcpu_noddp, cpu) += 1;
				goto out_noddp_free;
			}

			/* get the offset of length of current buffer */
			thisoff = addr & ((dma_addr_t)bufflen - 1);
			thislen = min((bufflen - thisoff), len);
			/*
			 * all but the 1st buffer (j == 0)
			 * must be aligned on bufflen
			 */
			if ((j != 0) && (thisoff))
				goto out_noddp_free;
			/*
			 * all but the last buffer
			 * ((i == (dmacount - 1)) && (thislen == len))
			 * must end at bufflen
			 */
			if (((i != (dmacount - 1)) || (thislen != len))
			    && ((thislen + thisoff) != bufflen))
				goto out_noddp_free;

			ddp->udl[j] = (u64)(addr - thisoff);
			/* only the first buffer may have none-zero offset */
			if (j == 0)
				firstoff = thisoff;
			len -= thislen;
			addr += thislen;
			j++;
		}
	}
Ejemplo n.º 18
0
static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
{
	struct ipcomp_data *ipcd = x->data;
	const int plen = skb->len;
	int dlen = IPCOMP_SCRATCH_SIZE;
	const u8 *start = skb->data;
	const int cpu = get_cpu();
	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
	int len;

	if (err)
		goto out;

	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
		err = -EINVAL;
		goto out;
	}

	len = dlen - plen;
	if (len > skb_tailroom(skb))
		len = skb_tailroom(skb);

	__skb_put(skb, len);

	len += plen;
	skb_copy_to_linear_data(skb, scratch, len);

	while ((scratch += len, dlen -= len) > 0) {
		skb_frag_t *frag;
		struct page *page;

		err = -EMSGSIZE;
		if (WARN_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS))
			goto out;

		frag = skb_shinfo(skb)->frags + skb_shinfo(skb)->nr_frags;
		page = alloc_page(GFP_ATOMIC);

		err = -ENOMEM;
		if (!page)
			goto out;

		__skb_frag_set_page(frag, page);

		len = PAGE_SIZE;
		if (dlen < len)
			len = dlen;

		frag->page_offset = 0;
		skb_frag_size_set(frag, len);
		memcpy(skb_frag_address(frag), scratch, len);

		skb->truesize += len;
		skb->data_len += len;
		skb->len += len;

		skb_shinfo(skb)->nr_frags++;
	}

	err = 0;

out:
	put_cpu();
	return err;
}
Ejemplo n.º 19
0
dotraplinkage void __kprobes
do_general_protection(struct pt_regs *regs, long error_code)
{
	struct task_struct *tsk;

	conditional_sti(regs);

#ifdef CONFIG_X86_32
	if (regs->flags & X86_VM_MASK)
		goto gp_in_vm86;
#endif

	tsk = current;
	if (!user_mode(regs))
		goto gp_in_kernel;

#ifdef CONFIG_X86_32
{
	int cpu;
	int ok;

	cpu = get_cpu();
	ok = check_lazy_exec_limit(cpu, regs, error_code);
	put_cpu();

	if (ok)
		return;

	if (print_fatal_signals) {
		printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
			error_code, error_code/8, regs->ip, smp_processor_id());
		printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
			current->mm->context.exec_limit,
			current->mm->context.user_cs.a,
			current->mm->context.user_cs.b);
	}
}
#endif /*CONFIG_X86_32*/

	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 13;

	if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
			printk_ratelimit()) {
		printk(KERN_INFO
			"%s[%d] general protection ip:%lx sp:%lx error:%lx",
			tsk->comm, task_pid_nr(tsk),
			regs->ip, regs->sp, error_code);
		print_vma_addr(" in ", regs->ip);
		printk("\n");
	}

	force_sig(SIGSEGV, tsk);
	return;

#ifdef CONFIG_X86_32
gp_in_vm86:
	local_irq_enable();
	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
	return;
#endif

gp_in_kernel:
	if (fixup_exception(regs))
		return;

	tsk->thread.error_code = error_code;
	tsk->thread.trap_no = 13;
	if (notify_die(DIE_GPF, "general protection fault", regs,
				error_code, 13, SIGSEGV) == NOTIFY_STOP)
		return;
	die("general protection fault", regs, error_code);
}
Ejemplo n.º 20
0
/*
 *  Ok, this is the main fork-routine.
 *
 * It copies the process, and if successful kick-starts
 * it and waits for it to finish using the VM if required.
 */
long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
{
	struct task_struct *p;
	int trace = 0;
	long pid;

	if (unlikely(current->ptrace)) {
		trace = fork_traceflag (clone_flags);
		if (trace)
			clone_flags |= CLONE_PTRACE;
	}

	p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr);
	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	pid = IS_ERR(p) ? PTR_ERR(p) : p->pid;

	if (!IS_ERR(p)) {
		struct completion vfork;

		if (clone_flags & CLONE_VFORK) {
			p->vfork_done = &vfork;
			init_completion(&vfork);
		}

		if ((p->ptrace & PT_PTRACED) || (clone_flags & CLONE_STOPPED)) {
			/*
			 * We'll start up with an immediate SIGSTOP.
			 */
			sigaddset(&p->pending.signal, SIGSTOP);
			set_tsk_thread_flag(p, TIF_SIGPENDING);
		}

		if (!(clone_flags & CLONE_STOPPED)) {
			/*
			 * Do the wakeup last. On SMP we treat fork() and
			 * CLONE_VM separately, because fork() has already
			 * created cache footprint on this CPU (due to
			 * copying the pagetables), hence migration would
			 * probably be costy. Threads on the other hand
			 * have less traction to the current CPU, and if
			 * there's an imbalance then the scheduler can
			 * migrate this fresh thread now, before it
			 * accumulates a larger cache footprint:
			 */
			if (clone_flags & CLONE_VM)
				wake_up_forked_thread(p);
			else
				wake_up_forked_process(p);
		} else {
			int cpu = get_cpu();

			p->state = TASK_STOPPED;
			if (cpu_is_offline(task_cpu(p)))
				set_task_cpu(p, cpu);

			put_cpu();
		}
		++total_forks;

		if (unlikely (trace)) {
			current->ptrace_message = pid;
			ptrace_notify ((trace << 8) | SIGTRAP);
		}

		if (clone_flags & CLONE_VFORK) {
			wait_for_completion(&vfork);
			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
		} else
			/*
			 * Let the child process run first, to avoid most of the
			 * COW overhead when the child exec()s afterwards.
			 */
			set_need_resched();
	}
	return pid;
}
Ejemplo n.º 21
0
void
measure_cpu_start()
{
  cpu_method = PROC_STAT;
  get_cpu(lib_start_count);
}
static inline u16 call_pnp_bios(u16 func, u16 arg1, u16 arg2, u16 arg3,
				u16 arg4, u16 arg5, u16 arg6, u16 arg7,
				void *ts1_base, u32 ts1_size,
				void *ts2_base, u32 ts2_size)
{
	unsigned long flags;
	u16 status;
	struct desc_struct save_desc_40;
	int cpu;

	/*
	 * PnP BIOSes are generally not terribly re-entrant.
	 * Also, don't rely on them to save everything correctly.
	 */
	if (pnp_bios_is_utter_crap)
		return PNP_FUNCTION_NOT_SUPPORTED;

	cpu = get_cpu();
	save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
	get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;

	/* On some boxes IRQ's during PnP BIOS calls are deadly.  */
	spin_lock_irqsave(&pnp_bios_lock, flags);

	/* The lock prevents us bouncing CPU here */
	if (ts1_size)
		Q2_SET_SEL(smp_processor_id(), PNP_TS1, ts1_base, ts1_size);
	if (ts2_size)
		Q2_SET_SEL(smp_processor_id(), PNP_TS2, ts2_base, ts2_size);

	__asm__ __volatile__("pushl %%ebp\n\t"
			     "pushl %%edi\n\t"
			     "pushl %%esi\n\t"
			     "pushl %%ds\n\t"
			     "pushl %%es\n\t"
			     "pushl %%fs\n\t"
			     "pushl %%gs\n\t"
			     "pushfl\n\t"
			     "movl %%esp, pnp_bios_fault_esp\n\t"
			     "movl $1f, pnp_bios_fault_eip\n\t"
			     "lcall %5,%6\n\t"
			     "1:popfl\n\t"
			     "popl %%gs\n\t"
			     "popl %%fs\n\t"
			     "popl %%es\n\t"
			     "popl %%ds\n\t"
			     "popl %%esi\n\t"
			     "popl %%edi\n\t"
			     "popl %%ebp\n\t":"=a"(status)
			     :"0"((func) | (((u32) arg1) << 16)),
			     "b"((arg2) | (((u32) arg3) << 16)),
			     "c"((arg4) | (((u32) arg5) << 16)),
			     "d"((arg6) | (((u32) arg7) << 16)),
			     "i"(PNP_CS32), "i"(0)
			     :"memory");
	spin_unlock_irqrestore(&pnp_bios_lock, flags);

	get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
	put_cpu();

	/* If we get here and this is set then the PnP BIOS faulted on us. */
	if (pnp_bios_is_utter_crap) {
		printk(KERN_ERR
		       "PnPBIOS: Warning! Your PnP BIOS caused a fatal error. Attempting to continue\n");
		printk(KERN_ERR
		       "PnPBIOS: You may need to reboot with the \"pnpbios=off\" option to operate stably\n");
		printk(KERN_ERR
		       "PnPBIOS: Check with your vendor for an updated BIOS\n");
	}

	return status;
}
Ejemplo n.º 23
0
void
cpu_start_internal(void)
{
  get_cpu(lib_start_count);
  return;
}
Ejemplo n.º 24
0
static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                        u32 index, int *nent, int maxnent)
{
    int r;
    unsigned f_nx = is_efer_nx() ? F(NX) : 0;
#ifdef CONFIG_X86_64
    unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
                         ? F(GBPAGES) : 0;
    unsigned f_lm = F(LM);
#else
    unsigned f_gbpages = 0;
    unsigned f_lm = 0;
#endif
    unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
    unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;

    /* cpuid 1.edx */
    const u32 kvm_supported_word0_x86_features =
        F(FPU) | F(VME) | F(DE) | F(PSE) |
        F(TSC) | F(MSR) | F(PAE) | F(MCE) |
        F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
        F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
        F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
        0 /* Reserved, DS, ACPI */ | F(MMX) |
        F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
        0 /* HTT, TM, Reserved, PBE */;
    /* cpuid 0x80000001.edx */
    const u32 kvm_supported_word1_x86_features =
        F(FPU) | F(VME) | F(DE) | F(PSE) |
        F(TSC) | F(MSR) | F(PAE) | F(MCE) |
        F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
        F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
        F(PAT) | F(PSE36) | 0 /* Reserved */ |
        f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
        F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
        0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
    /* cpuid 1.ecx */
    const u32 kvm_supported_word4_x86_features =
        F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
        0 /* DS-CPL, VMX, SMX, EST */ |
        0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
        F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
        F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
        F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
        0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
        F(F16C) | F(RDRAND);
    /* cpuid 0x80000001.ecx */
    const u32 kvm_supported_word6_x86_features =
        F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
        F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
        F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
        0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);

    /* cpuid 0xC0000001.edx */
    const u32 kvm_supported_word5_x86_features =
        F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
        F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
        F(PMM) | F(PMM_EN);

    /* cpuid 7.0.ebx */
    const u32 kvm_supported_word9_x86_features =
        F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
        F(BMI2) | F(ERMS) | f_invpcid | F(RTM);

    /* all calls to cpuid_count() should be made on the same cpu */
    get_cpu();

    r = -E2BIG;

    if (*nent >= maxnent)
        goto out;

    do_cpuid_1_ent(entry, function, index);
    ++*nent;

    switch (function) {
    case 0:
        entry->eax = min(entry->eax, (u32)0xd);
        break;
    case 1:
        entry->edx &= kvm_supported_word0_x86_features;
        cpuid_mask(&entry->edx, 0);
        entry->ecx &= kvm_supported_word4_x86_features;
        cpuid_mask(&entry->ecx, 4);
        /* we support x2apic emulation even if host does not support
         * it since we emulate x2apic in software */
        entry->ecx |= F(X2APIC);
        break;
    /* function 2 entries are STATEFUL. That is, repeated cpuid commands
     * may return different values. This forces us to get_cpu() before
     * issuing the first command, and also to emulate this annoying behavior
     * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
    case 2: {
        int t, times = entry->eax & 0xff;

        entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
        entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
        for (t = 1; t < times; ++t) {
            if (*nent >= maxnent)
                goto out;

            do_cpuid_1_ent(&entry[t], function, 0);
            entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
            ++*nent;
        }
        break;
    }
    /* function 4 has additional index. */
    case 4: {
        int i, cache_type;

        entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
        /* read more entries until cache_type is zero */
        for (i = 1; ; ++i) {
            if (*nent >= maxnent)
                goto out;

            cache_type = entry[i - 1].eax & 0x1f;
            if (!cache_type)
                break;
            do_cpuid_1_ent(&entry[i], function, i);
            entry[i].flags |=
                KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
            ++*nent;
        }
        break;
    }
    case 7: {
        entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
        /* Mask ebx against host capability word 9 */
        if (index == 0) {
            entry->ebx &= kvm_supported_word9_x86_features;
            cpuid_mask(&entry->ebx, 9);
            // TSC_ADJUST is emulated
            entry->ebx |= F(TSC_ADJUST);
        } else
            entry->ebx = 0;
        entry->eax = 0;
        entry->ecx = 0;
        entry->edx = 0;
        break;
    }
    case 9:
        break;
    case 0xa: { /* Architectural Performance Monitoring */
        struct x86_pmu_capability cap;
        union cpuid10_eax eax;
        union cpuid10_edx edx;

        perf_get_x86_pmu_capability(&cap);

        /*
         * Only support guest architectural pmu on a host
         * with architectural pmu.
         */
        if (!cap.version)
            memset(&cap, 0, sizeof(cap));

        eax.split.version_id = min(cap.version, 2);
        eax.split.num_counters = cap.num_counters_gp;
        eax.split.bit_width = cap.bit_width_gp;
        eax.split.mask_length = cap.events_mask_len;

        edx.split.num_counters_fixed = cap.num_counters_fixed;
        edx.split.bit_width_fixed = cap.bit_width_fixed;
        edx.split.reserved = 0;

        entry->eax = eax.full;
        entry->ebx = cap.events_mask;
        entry->ecx = 0;
        entry->edx = edx.full;
        break;
    }
    /* function 0xb has additional index. */
    case 0xb: {
        int i, level_type;

        entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
        /* read more entries until level_type is zero */
        for (i = 1; ; ++i) {
            if (*nent >= maxnent)
                goto out;

            level_type = entry[i - 1].ecx & 0xff00;
            if (!level_type)
                break;
            do_cpuid_1_ent(&entry[i], function, i);
            entry[i].flags |=
                KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
            ++*nent;
        }
        break;
    }
    case 0xd: {
        int idx, i;

        entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
        for (idx = 1, i = 1; idx < 64; ++idx) {
            if (*nent >= maxnent)
                goto out;

            do_cpuid_1_ent(&entry[i], function, idx);
            if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
                continue;
            entry[i].flags |=
                KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
            ++*nent;
            ++i;
        }
        break;
    }
    case KVM_CPUID_SIGNATURE: {
        static const char signature[12] = "KVMKVMKVM\0\0";
        const u32 *sigptr = (const u32 *)signature;
        entry->eax = KVM_CPUID_FEATURES;
        entry->ebx = sigptr[0];
        entry->ecx = sigptr[1];
        entry->edx = sigptr[2];
        break;
    }
    case KVM_CPUID_FEATURES:
        entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
                     (1 << KVM_FEATURE_NOP_IO_DELAY) |
                     (1 << KVM_FEATURE_CLOCKSOURCE2) |
                     (1 << KVM_FEATURE_ASYNC_PF) |
                     (1 << KVM_FEATURE_PV_EOI) |
                     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);

        if (sched_info_on())
            entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);

        entry->ebx = 0;
        entry->ecx = 0;
        entry->edx = 0;
        break;
    case 0x80000000:
        entry->eax = min(entry->eax, 0x8000001a);
        break;
    case 0x80000001:
        entry->edx &= kvm_supported_word1_x86_features;
        cpuid_mask(&entry->edx, 1);
        entry->ecx &= kvm_supported_word6_x86_features;
        cpuid_mask(&entry->ecx, 6);
        break;
    case 0x80000008: {
        unsigned g_phys_as = (entry->eax >> 16) & 0xff;
        unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
        unsigned phys_as = entry->eax & 0xff;

        if (!g_phys_as)
            g_phys_as = phys_as;
        entry->eax = g_phys_as | (virt_as << 8);
        entry->ebx = entry->edx = 0;
        break;
    }
    case 0x80000019:
        entry->ecx = entry->edx = 0;
        break;
    case 0x8000001a:
        break;
    case 0x8000001d:
        break;
    /*Add support for Centaur's CPUID instruction*/
    case 0xC0000000:
        /*Just support up to 0xC0000004 now*/
        entry->eax = min(entry->eax, 0xC0000004);
        break;
    case 0xC0000001:
        entry->edx &= kvm_supported_word5_x86_features;
        cpuid_mask(&entry->edx, 5);
        break;
    case 3: /* Processor serial number */
    case 5: /* MONITOR/MWAIT */
    case 6: /* Thermal management */
    case 0x80000007: /* Advanced power management */
    case 0xC0000002:
    case 0xC0000003:
    case 0xC0000004:
    default:
        entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
        break;
    }

    kvm_x86_ops->set_supported_cpuid(function, entry);

    r = 0;

out:
    put_cpu();

    return r;
}
Ejemplo n.º 25
0
static unsigned long fill_arg(struct syscallrecord *rec, unsigned int argnum)
{
	struct syscallentry *entry;
	unsigned int call;
	enum argtype argtype;

	call = rec->nr;
	entry = syscalls[call].entry;

	if (argnum > entry->num_args)
		return 0;

	argtype = get_argtype(entry, argnum);

	switch (argtype) {
	case ARG_UNDEFINED:
		return (unsigned long) rand64();

	case ARG_FD:
		return get_random_fd();

	case ARG_LEN:
		return (unsigned long) get_len();

	case ARG_ADDRESS:
		return handle_arg_address(rec, argnum);

	case ARG_NON_NULL_ADDRESS:
		return (unsigned long) get_non_null_address();

	case ARG_MMAP:
		return (unsigned long) get_map();

	case ARG_PID:
		return (unsigned long) get_pid();

	case ARG_RANGE:
		return handle_arg_range(entry, argnum);

	case ARG_OP:	/* Like ARG_LIST, but just a single value. */
		return handle_arg_op(entry, argnum);

	case ARG_LIST:
		return handle_arg_list(entry, argnum);

	case ARG_RANDPAGE:
		return handle_arg_randpage();

	case ARG_CPU:
		return (unsigned long) get_cpu();

	case ARG_PATHNAME:
		return (unsigned long) generate_pathname();

	case ARG_IOVEC:
		return handle_arg_iovec(entry, rec, argnum);

	case ARG_IOVECLEN:
	case ARG_SOCKADDRLEN:
		/* We already set the len in the ARG_IOVEC/ARG_SOCKADDR case
		 * So here we just return what we had set there. */
		return get_argval(rec, argnum);

	case ARG_SOCKADDR:
		return handle_arg_sockaddr(entry, rec, argnum);

	case ARG_MODE_T:
		return handle_arg_mode_t();
	}

	BUG("unreachable!\n");
}
Ejemplo n.º 26
0
void dump_trace(struct task_struct *task, struct pt_regs *regs,
		unsigned long *stack, unsigned long bp,
		const struct stacktrace_ops *ops, void *data)
{
	const unsigned cpu = get_cpu();
	struct thread_info *tinfo;
	unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
	unsigned long dummy;
	unsigned used = 0;
	int graph = 0;
	int done = 0;

	if (!task)
		task = current;

	if (!stack) {
		if (regs)
			stack = (unsigned long *)regs->sp;
		else if (task != current)
			stack = (unsigned long *)task->thread.sp;
		else
			stack = &dummy;
	}

	if (!bp)
		bp = stack_frame(task, regs);
	/*
	 * Print function call entries in all stacks, starting at the
	 * current stack address. If the stacks consist of nested
	 * exceptions
	 */
	tinfo = task_thread_info(task);
	while (!done) {
		unsigned long *stack_end;
		enum stack_type stype;
		char *id;

		stype = analyze_stack(cpu, task, stack, &stack_end,
				      irq_stack, &used, &id);

		/* Default finish unless specified to continue */
		done = 1;

		switch (stype) {

		/* Break out early if we are on the thread stack */
		case STACK_IS_NORMAL:
			break;

		case STACK_IS_EXCEPTION:

			if (ops->stack(data, id) < 0)
				break;

			bp = ops->walk_stack(tinfo, stack, bp, ops,
					     data, stack_end, &graph);
			ops->stack(data, "<EOE>");
			/*
			 * We link to the next stack via the
			 * second-to-last pointer (index -2 to end) in the
			 * exception stack:
			 */
			stack = (unsigned long *) stack_end[-2];
			done = 0;
			break;

		case STACK_IS_IRQ:

			if (ops->stack(data, "IRQ") < 0)
				break;
			bp = ops->walk_stack(tinfo, stack, bp,
				     ops, data, stack_end, &graph);
			/*
			 * We link to the next stack (which would be
			 * the process stack normally) the last
			 * pointer (index -1 to end) in the IRQ stack:
			 */
			stack = (unsigned long *) (stack_end[-1]);
			irq_stack = NULL;
			ops->stack(data, "EOI");
			done = 0;
			break;

		case STACK_IS_UNKNOWN:
			ops->stack(data, "UNK");
			break;
		}
	}

	/*
	 * This handles the process stack:
	 */
	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
	put_cpu();
}
Ejemplo n.º 27
0
Archivo: swap.c Proyecto: 7799/linux
void lru_add_drain(void)
{
	lru_add_drain_cpu(get_cpu());
	put_cpu();
}
Ejemplo n.º 28
0
/*
 * this changes the io permissions bitmap in the current task.
 */
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
	unsigned long i, max_long, bytes, bytes_updated;
	struct thread_struct * t = &current->thread;
	struct tss_struct * tss;
	unsigned long *bitmap;

	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
		return -EINVAL;
	if (turn_on && !capable(CAP_SYS_RAWIO))
		return -EPERM;

	/*
	 * If it's the first ioperm() call in this thread's lifetime, set the
	 * IO bitmap up. ioperm() is much less timing critical than clone(),
	 * this is why we delay this operation until now:
	 */
	if (!t->io_bitmap_ptr) {
		bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
		if (!bitmap)
			return -ENOMEM;

		memset(bitmap, 0xff, IO_BITMAP_BYTES);
		t->io_bitmap_ptr = bitmap;
	}

	/*
	 * do it in the per-thread copy and in the TSS ...
	 *
	 * Disable preemption via get_cpu() - we must not switch away
	 * because the ->io_bitmap_max value must match the bitmap
	 * contents:
	 */
	tss = &per_cpu(init_tss, get_cpu());

	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);

	/*
	 * Search for a (possibly new) maximum. This is simple and stupid,
	 * to keep it obviously correct:
	 */
	max_long = 0;
	for (i = 0; i < IO_BITMAP_LONGS; i++)
		if (t->io_bitmap_ptr[i] != ~0UL)
			max_long = i;

	bytes = (max_long + 1) * sizeof(long);
	bytes_updated = max(bytes, t->io_bitmap_max);

	t->io_bitmap_max = bytes;

	/*
	 * Sets the lazy trigger so that the next I/O operation will
	 * reload the correct bitmap.
	 * Reset the owner so that a process switch will not set
	 * tss->io_bitmap_base to IO_BITMAP_OFFSET.
	 */
	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
	tss->io_bitmap_owner = NULL;

	put_cpu();

	return 0;
}
void dump_trace(struct task_struct *task, struct pt_regs *regs,
		unsigned long *stack, unsigned long bp,
		const struct stacktrace_ops *ops, void *data)
{
	const unsigned cpu = get_cpu();
	unsigned long *irq_stack_end =
		(unsigned long *)per_cpu(irq_stack_ptr, cpu);
	unsigned used = 0;
	struct thread_info *tinfo;
	int graph = 0;

	if (!task)
		task = current;

	if (!stack) {
		unsigned long dummy;
		stack = &dummy;
		if (task && task != current)
			stack = (unsigned long *)task->thread.sp;
	}

#ifdef CONFIG_FRAME_POINTER
	if (!bp) {
		if (task == current) {
			/* Grab bp right from our regs */
			get_bp(bp);
		} else {
			/* bp is the last reg pushed by switch_to */
			bp = *(unsigned long *) task->thread.sp;
		}
	}
#endif

	/*
	 * Print function call entries in all stacks, starting at the
	 * current stack address. If the stacks consist of nested
	 * exceptions
	 */
	tinfo = task_thread_info(task);
	for (;;) {
		char *id;
		unsigned long *estack_end;
		estack_end = in_exception_stack(cpu, (unsigned long)stack,
						&used, &id);

		if (estack_end) {
			if (ops->stack(data, id) < 0)
				break;

			bp = ops->walk_stack(tinfo, stack, bp, ops,
					     data, estack_end, &graph);
			ops->stack(data, "<EOE>");
			/*
			 * We link to the next stack via the
			 * second-to-last pointer (index -2 to end) in the
			 * exception stack:
			 */
			stack = (unsigned long *) estack_end[-2];
			continue;
		}
		if (irq_stack_end) {
			unsigned long *irq_stack;
			irq_stack = irq_stack_end -
				(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);

			if (in_irq_stack(stack, irq_stack, irq_stack_end)) {
				if (ops->stack(data, "IRQ") < 0)
					break;
				bp = ops->walk_stack(tinfo, stack, bp,
					ops, data, irq_stack_end, &graph);
				/*
				 * We link to the next stack (which would be
				 * the process stack normally) the last
				 * pointer (index -1 to end) in the IRQ stack:
				 */
				stack = (unsigned long *) (irq_stack_end[-1]);
				bp = fixup_bp_irq_link(bp, stack, irq_stack,
						       irq_stack_end);
				irq_stack_end = NULL;
				ops->stack(data, "EOI");
				continue;
			}
		}
		break;
	}

	/*
	 * This handles the process stack:
	 */
	bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph);
	put_cpu();
}
Ejemplo n.º 30
0
/*
 * Queue up a socket with data pending. If there are idle nfsd
 * processes, wake 'em up.
 *
 */
static void
svc_sock_enqueue(struct svc_sock *svsk)
{
	struct svc_serv	*serv = svsk->sk_server;
	struct svc_pool *pool;
	struct svc_rqst	*rqstp;
	int cpu;

	if (!(svsk->sk_flags &
	      ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
		return;
	if (test_bit(SK_DEAD, &svsk->sk_flags))
		return;

	cpu = get_cpu();
	pool = svc_pool_for_cpu(svsk->sk_server, cpu);
	put_cpu();

	spin_lock_bh(&pool->sp_lock);

	if (!list_empty(&pool->sp_threads) &&
	    !list_empty(&pool->sp_sockets))
		printk(KERN_ERR
			"svc_sock_enqueue: threads and sockets both waiting??\n");

	if (test_bit(SK_DEAD, &svsk->sk_flags)) {
		/* Don't enqueue dead sockets */
		dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk);
		goto out_unlock;
	}

	/* Mark socket as busy. It will remain in this state until the
	 * server has processed all pending data and put the socket back
	 * on the idle list.  We update SK_BUSY atomically because
	 * it also guards against trying to enqueue the svc_sock twice.
	 */
	if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) {
		/* Don't enqueue socket while already enqueued */
		dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
		goto out_unlock;
	}
	BUG_ON(svsk->sk_pool != NULL);
	svsk->sk_pool = pool;

	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
	if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2
	     > svc_sock_wspace(svsk))
	    && !test_bit(SK_CLOSE, &svsk->sk_flags)
	    && !test_bit(SK_CONN, &svsk->sk_flags)) {
		/* Don't enqueue while not enough space for reply */
		dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
			svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg,
			svc_sock_wspace(svsk));
		svsk->sk_pool = NULL;
		clear_bit(SK_BUSY, &svsk->sk_flags);
		goto out_unlock;
	}
	clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);


	if (!list_empty(&pool->sp_threads)) {
		rqstp = list_entry(pool->sp_threads.next,
				   struct svc_rqst,
				   rq_list);
		dprintk("svc: socket %p served by daemon %p\n",
			svsk->sk_sk, rqstp);
		svc_thread_dequeue(pool, rqstp);
		if (rqstp->rq_sock)
			printk(KERN_ERR 
				"svc_sock_enqueue: server %p, rq_sock=%p!\n",
				rqstp, rqstp->rq_sock);
		rqstp->rq_sock = svsk;
		atomic_inc(&svsk->sk_inuse);
		rqstp->rq_reserved = serv->sv_max_mesg;
		atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
		BUG_ON(svsk->sk_pool != pool);
		wake_up(&rqstp->rq_wait);
	} else {