Exemplo n.º 1
0
void multi_thread_test(bool insert_only) {
    TestSharedContext shared (insert_only);

    pthread_attr_t join_attr;
    void *join_status;
    ::pthread_attr_init(&join_attr);
    ::pthread_attr_setdetachstate(&join_attr, PTHREAD_CREATE_JOINABLE);

    TestThreadContext contexts[THREAD_COUNT];
    pthread_t *threads = new pthread_t[THREAD_COUNT];
    for (int i = 0; i < THREAD_COUNT; ++i) {
        contexts[i].id = i;
        contexts[i].shared = &shared;
        int rc = ::pthread_create(threads + i, &join_attr, test_work, contexts + i);
        EXPECT_EQ(0, rc) << "pthread_create failed";
    }

    for (int i = 0; i < THREAD_COUNT; ++i) {
        int rc = ::pthread_join(threads[i], &join_status);
        EXPECT_EQ(0, rc) << "pthread_join failed";
    }

    mfence();

    ::pthread_attr_destroy(&join_attr);
    delete[] threads;

    std::cout << "done all! checking results..." << std::endl;
    EXPECT_TRUE(shared.the_queue.unsafe_consistent());

    for (int i = 0; i < THREAD_COUNT; ++i) {
        int rc = ::pthread_join(threads[i], &join_status);
        EXPECT_EQ(0, rc) << "pthread_join failed";
    }

    mfence();
    uint32_t total_inserted = 0;
    uint32_t total_deleted = 0;
    for (int i = 0; i < THREAD_COUNT; ++i) {
        total_inserted += contexts[i].inserted_count;
        total_deleted += contexts[i].deleted_count;
    }

    EXPECT_EQ(total_inserted - total_deleted, shared.the_queue.unsafe_size());

    uint32_t dequeued = 0;
    while (true) {
        DummyEntry* deq = shared.the_queue.dequeue();
        if (deq == NULL) {
            break;
        }
        delete deq;
        ++dequeued;
    }
    EXPECT_EQ(total_inserted - total_deleted, dequeued);
}
Exemplo n.º 2
0
void
acsyscall(void)
{
	panic("acsyscall");
#if 0
	Proc *p;

	/*
	 * If we saved the Ureg into m->proc->dbgregs,
	 * There's nothing else we have to do.
	 * Otherwise, we should m->proc->dbgregs = u;
	 */
	DBG("acsyscall: cpu%d\n", machp()->machno);

	_pmcupdate(m);
	p = m->proc;
	p->actime1 = fastticks(nil);
	m->syscall++;	/* would also count it in the TS core */
	m->icc->rc = ICCSYSCALL;
	m->cr2 = cr2get();
	fpuprocsave(p);
	_pmcupdate(m);
	mfence();
	m->icc->fn = nil;
	ready(p);
	/*
	 * The next call is probably going to make us jmp
	 * into user code, forgetting all our state in this
	 * stack, upon the next syscall.
	 * We don't nest calls in the current stack for too long.
	 */
	acsched();
#endif
}
Exemplo n.º 3
0
void *test_work(void *t) {
    TestThreadContext &context = *reinterpret_cast<TestThreadContext*>(t);
    TestSharedContext &shared = *context.shared;
    tlr_t rand (context.id);

    mfence();
    std::cout << "Worker-" << context.id << " started" << std::endl;
    context.inserted_count = 0;
    context.deleted_count = 0;
    // start!
    for (int i = 0; i < REP_COUNT; ++i) {
        uint32_t key = rand.nextInt32() % (REP_COUNT * 3);
        bool del = (rand.nextInt32() % 5) == 0;
        if (!shared.insert_only && del && context.inserted_count < context.deleted_count) {
            ++context.deleted_count;
            delete shared.the_queue.dequeue();
        } else {
            ++context.inserted_count;
            shared.the_queue.enqueue(new DummyEntry(key));
        }
    }

    std::cout << "Worker-" << context.id << " inserted " << context.inserted_count
        << " entries. deleted=" << context.deleted_count << std::endl;
    ::pthread_exit(NULL);
    return NULL;
}
Exemplo n.º 4
0
Arquivo: main.c Projeto: 99years/plan9
/*
 * Rendezvous with other cores. Set roles for those that came
 * up online, and wait until they are initialized.
 * Sync TSC with them.
 * We assume other processors that could boot had time to
 * set online to 1 by now.
 */
static void
nixsquids(void)
{
	Mach *mp;
	int i;
	uvlong now, start;

	for(i = 1; i < MACHMAX; i++)
		if((mp = sys->machptr[i]) != nil && mp->online != 0){
			/*
			 * Inter-core calls. A ensure *mp->iccall and mp->icargs
			 * go into different cache lines.
			 */
			mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0);
			mp->icc->fn = nil;
			if(i < initialTCs){
				conf.nmach++;
				mp->nixtype = NIXTC;
			}
			ainc(&active.nbooting);
		}
	sys->epoch = rdtsc();
	mfence();
	wrmsr(0x10, sys->epoch);
	m->rdtsc = rdtsc();
	active.thunderbirdsarego = 1;
	start = fastticks2us(fastticks(nil));
	do{
		now = fastticks2us(fastticks(nil));
	}while(active.nbooting > 0 && now - start < 1000000)
		;
	if(active.nbooting > 0)
		print("cpu0: %d cores couldn't start\n", active.nbooting);
	active.nbooting = 0;
}
Exemplo n.º 5
0
void
addwaitstat(uintptr_t pc, uint64_t t0, int type)
{
	uint i;
	uint64_t w;

	if(waitstats.on == 0)
		return;

	cycles(&w);
	w -= t0;
	mfence();
	for(i = 0; i < NWstats; i++)
		if(waitstats.pcs[i] == pc){
			ainc(&waitstats.ns[i]);
			if(w > waitstats.wait[i])
				waitstats.wait[i] = w;	/* race but ok */
			waitstats.total[i] += w;		/* race but ok */
			return;
		}
	if(!canlock(&waitstatslk))
		return;

	for(i = 0; i < NWstats; i++)
		if(waitstats.pcs[i] == pc){
			ainc(&waitstats.ns[i]);
			if(w > waitstats.wait[i])
				waitstats.wait[i] = w;	/* race but ok */
			waitstats.total[i] += w;
			unlock(&waitstatslk);
			return;
		}

	for(i = 0; i < NWstats; i++)
		if(waitstats.pcs[i] == 0){
			waitstats.ns[i] = 1;
			waitstats.type[i] = type;
			waitstats.wait[i] = w;
			waitstats.total[i] = w;
			mfence();
			waitstats.pcs[i] = pc;
			waitstats.npcs++;
			break;
		}

	unlock(&waitstatslk);
}
Exemplo n.º 6
0
static inline tweed_task_func_t steal_task(struct generic_task_desc * task,
                                           struct worker_desc * thief) {
#ifdef TWEED_USE_CAS
    tweed_task_func_t func = task->f.func;
    int success = cmpxchg128((uint64_t *)&(task->f.func),
                             (uint64_t)func, TWEED_TASK_NEW, 
                             (uint64_t)thief, TWEED_TASK_STOLEN);
    return success ? func : NULL;
#else 
    task->balarm = TWEED_TASK_STOLEN;
    mfence();      
    tweed_task_func_t func = task->f.func; 
    task->thief = thief;
    mfence();      
    return func;
#endif
} 
Exemplo n.º 7
0
void
startwaitstats(int on)
{
	newwaitstats();
	mfence();
	waitstats.on = on;
	print("lockstats %s\n", on?"on":"off");
}
Exemplo n.º 8
0
static void
lapic_write32(enum LAPIC_REGISTERS reg, uint32_t val)
{

	if (x2apic_mode) {
		mfence();
		wrmsr(MSR_APIC_000 + reg, val);
	} else {
		*(volatile uint32_t *)(lapic_map + reg * LAPIC_MEM_MUL) = val;
	}
}
Exemplo n.º 9
0
void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
  StubCodeMark mark(this, "ICache", "flush_icache_stub");

  address start = __ pc();
#ifdef AMD64

  const Register addr  = c_rarg0;
  const Register lines = c_rarg1;
  const Register magic = c_rarg2;

  Label flush_line, done;

  __ testl(lines, lines);
  __ jcc(Assembler::zero, done);

  // Force ordering wrt cflush.
  // Other fence and sync instructions won't do the job.
  __ mfence();

  __ bind(flush_line);
  __ clflush(Address(addr, 0));
  __ addptr(addr, ICache::line_size);
  __ decrementl(lines);
  __ jcc(Assembler::notZero, flush_line);

  __ mfence();

  __ bind(done);

#else
  const Address magic(rsp, 3*wordSize);
  __ lock(); __ addl(Address(rsp, 0), 0);
#endif // AMD64
  __ movptr(rax, magic); // Handshake with caller to make sure it happened!
  __ ret(0);

  // Must be set here so StubCodeMark destructor can call the flush stub.
  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
}
Exemplo n.º 10
0
static void
lapic_write_icr(uint32_t vhi, uint32_t vlo)
{
	uint64_t v;

	if (x2apic_mode) {
		v = ((uint64_t)vhi << 32) | vlo;
		mfence();
		wrmsr(MSR_APIC_000 + LAPIC_ICR_LO, v);
	} else {
		lapic_write32(LAPIC_ICR_HI, vhi);
		lapic_write32(LAPIC_ICR_LO, vlo);
	}
}
Exemplo n.º 11
0
static void HOT OPTIMIZE3 stress_memthrash_mfence(const args_t *args, size_t mem_size)
{
	uint32_t i;
	const uint32_t max = mwc16();

	(void)args;

	for (i = 0; !thread_terminate && (i < max); i++) {
		size_t offset = mwc32() % mem_size;
		volatile uint8_t *ptr = mem + offset;

		*ptr = i & 0xff;
		mfence();
	}
}
Exemplo n.º 12
0
/*
 * Main scheduling loop done by the application core.
 * Some of functions run will not return.
 * The system call handler will reset the stack and
 * call acsched again.
 * We loop because some functions may return and we should
 * wait for another call.
 */
void
acsched(void)
{
	acmmuswitch();
	for(;;){
		acstackok();
		mwait(&machp()->icc->fn);
		if(machp()->icc->flushtlb)
			acmmuswitch();
		DBG("acsched: cpu%d: fn %#p\n", machp()->machno, machp()->icc->fn);
		machp()->icc->fn();
		DBG("acsched: cpu%d: idle\n", machp()->machno);
		mfence();
		machp()->icc->fn = nil;
	}
}
Exemplo n.º 13
0
/*
 * run an arbitrary function with arbitrary args on an ap core
 * first argument is always pml4 for process
 * make a field and a struct for the args cache line.
 *
 * Returns the return-code for the ICC or -1 if the process was
 * interrupted while issuing the ICC.
 */
int
runac(Mach *mp, APfunc func, int flushtlb, void *a, int32_t n)
{
	Proc *up = externup();
	uint8_t *dpg, *spg;

	if (n > sizeof(mp->NIX.icc->data))
		panic("runac: args too long");

	if(mp->online == 0)
		panic("Bad core");
	if(mp->proc != nil && mp->proc != up)
		panic("runapfunc: mach is busy with another proc?");

	memmove(mp->NIX.icc->data, a, n);
	if(flushtlb){
		DBG("runac flushtlb: cppml4 %#p %#p\n", mp->MMU.pml4->pa, machp()->MMU.pml4->pa);
		dpg = UINT2PTR(mp->MMU.pml4->va);
		spg = UINT2PTR(machp()->MMU.pml4->va);
		/* We should copy less:
		 *	memmove(dgp, spg, machp()->MMU.pml4->daddr * sizeof(PTE));
		 */
		memmove(dpg, spg, PTSZ);
		if(0){
			print("runac: upac pml4 %#p\n", up->ac->MMU.pml4->pa);
			dumpptepg(4, up->ac->MMU.pml4->pa);
		}
	}
	mp->NIX.icc->flushtlb = flushtlb;
	mp->NIX.icc->rc = ICCOK;

	DBG("runac: exotic proc on cpu%d\n", mp->machno);
	if(waserror()){
		qunlock(&up->debug);
		nexterror();
	}
	qlock(&up->debug);
	up->nicc++;
	up->state = Exotic;
	up->psstate = 0;
	qunlock(&up->debug);
	poperror();
	mfence();
	mp->NIX.icc->fn = func;
	sched();
	return mp->NIX.icc->rc;
}
Exemplo n.º 14
0
void
testicc(int i)
{
	Mach *mp;

	if((mp = sys->machptr[i]) != nil && mp->online != 0){
		if(mp->nixtype != NIXAC){
			print("testicc: core %d is not an AC\n", i);
			return;
		}
		print("calling core %d... ", i);
		mp->icc->flushtlb = 0;
		snprint(( char *)mp->icc->data, ICCLNSZ, "<%d>", i);
		mfence();
		mp->icc->fn = testiccfn;
		mwait(&mp->icc->fn);
	}
}
Exemplo n.º 15
0
/*
 * Rendezvous with other cores. Set roles for those that came
 * up online, and wait until they are initialized.
 * Sync TSC with them.
 * We assume other processors that could boot had time to
 * set online to 1 by now.
 */
static void
nixsquids(void)
{
	Mach *m = machp();
	Mach *mp;
	int i;
	uint64_t now, start;

	/* Not AC for now :-) */
	for(i = 1; i <= MACHMAX; i++)
	//for(i = 1; i < MACHMAX; i++)
		if((mp = sys->machptr[i]) != nil && mp->online){
			/*
			 * Inter-core calls. A ensure *mp->iccall and mp->icargs
			 * go into different cache lines.
			 */
			mp->icc = mallocalign(sizeof *m->icc, ICCLNSZ, 0, 0);
			mp->icc->fn = nil;
			if(i < numtcs){
				sys->nmach++;
				mp->nixtype = NIXTC;
				sys->nc[NIXTC]++;
			}//else
				//sys->nc[NIXAC]++;
			ainc(&active.nbooting);
		}
	sys->epoch = rdtsc();
	mfence();
	wrmsr(0x10, sys->epoch);
	m->rdtsc = rdtsc();
	active.thunderbirdsarego = 1;
	start = fastticks2us(fastticks(nil));
	do{
		now = fastticks2us(fastticks(nil));
	}while(active.nbooting > 0 && now - start < 1000000)
		;
	if(active.nbooting > 0)
		print("cpu0: %d cores couldn't start\n", active.nbooting);
	active.nbooting = 0;
}
Exemplo n.º 16
0
void
putac(Mach *m)
{
	mfence();
	m->proc = nil;
}
Exemplo n.º 17
0
static inline void release_barrier(atomic_t *b)
{
	mfence();
	atomic_set(b, 1);
}
Exemplo n.º 18
0
static inline void barrier_wait(atomic_t *b)
{
	while (atomic_read(b) == 0)
		asm("pause");
	mfence();
}
Exemplo n.º 19
0
void
kforkexecac(Proc *p, int core, char *ufile, char **argv)
{
	Mach *m = machp();
	Khdr hdr;
	Tos *tos;
	Chan *chan;
	int argc, i, n, sno;
	char *a, *elem, *file, *args;
	int32_t hdrsz, magic, textsz, datasz, bsssz;
	uintptr_t textlim, datalim, bsslim, entry, tbase, tsize, dbase, dsize, bbase, bsize, sbase, ssize, stack;
	Mach *mp;
	//	static Pgrp *kpgrp;

	panic("kexec not done\n");
	// XXX: since this is kernel code we can't do attachimage,
	// we should be reading the file into kernel memory.
	// this only matters if we are using ufile.
	// YYY: look at dev reboot for help.

	file = nil;
	elem = nil;
	chan = nil;
	mp = nil;

	USED(chan);

	if(waserror()){
		DBG("kforkexecac: failing: %s\n", m->externup->errstr);
		if(file)
			free(file);
		if(elem)
			free(elem);
		if(chan)
			cclose(chan);
		if(core > 0 && mp != nil)
			mp->proc = nil;
		if(core != 0)
			p->ac = nil;
		nexterror();
	}

	if(core != 0)
		p->ac = getac(p, core);

	argc = 0;
	if(ufile != nil){
		panic("ufile not implemented yet");
		file = validnamedup(ufile, 1);
		DBG("kforkexecac: up %#p file %s\n", m->externup, file);
		chan = namec(file, Aopen, OEXEC, 0);
		kstrdup(&elem, m->externup->genbuf);

		hdrsz = chan->dev->read(chan, &hdr, sizeof(Khdr), 0);
		DBG("wrote ufile\n");

		if(hdrsz < 2)
			error(Ebadexec);
	}else{
		/* somebody already wrote in our text segment */
		for(sno = 0; sno < NSEG; sno++)
			if(p->seg[sno] != nil)
				if((p->seg[sno]->type & SG_EXEC) != 0)
					break;
		if(sno == NSEG)
			error("kforkexecac: no text segment!");
		hdr = *(Khdr*)p->seg[sno]->base;
		hdrsz = sizeof(Khdr);
	}

//	p = (char*)&hdr;
	magic = l2be(hdr.magic);
	DBG("badexec3\n");

	if(hdrsz != sizeof(Khdr) || magic != AOUT_MAGIC)
		error(Ebadexec);
	if(magic & HDR_MAGIC){
		entry = vl2be(hdr.hdr[0]);
		hdrsz = sizeof(Khdr);
	}
	else{
		entry = l2be(hdr.entry);
		hdrsz = sizeof(Exec);
	}

	textsz = l2be(hdr.text);
	datasz = l2be(hdr.data);
	bsssz = l2be(hdr.bss);

	panic("aki broke it before it even got working.");
/* TODO(aki): figure out what to do with this.
	tbase = p->seg[TSEG]->base;
	tsize = tbase - p->seg[TSEG]->top;
	dbase = p->seg[DSEG]->base;
	dsize = dbase - p->seg[DSEG]->top;
	bbase = p->seg[BSEG]->base;
	bsize = bbase - p->seg[BSEG]->top;
	sbase = p->seg[SSEG]->base;
	ssize = sbase - p->seg[SSEG]->top;
*/

	// XXX: we are no longer contiguous.
	textlim = ROUNDUP(hdrsz+textsz, BIGPGSZ);
	// XXX: we are going to be at least two pages here.
	datalim = BIGPGROUND(datasz);
	bsslim = BIGPGROUND(datalim+bsssz);

	// XXX: this is pretty fragile
	memmove((void*)dbase, (void*)(entry+textsz), datasz);
	DBG("writing data dbase %#p tbase %#p textsz %ld datasz %ld\n", dbase, tbase, textsz, datasz);
//	memmove((void*)dbase, (void*)"testing data", 13);
	/*
	 * Check the binary header for consistency,
	 * e.g. the entry point is within the text segment and
	 * the segments don't overlap each other.
	 */
	// XXX: max instruction size on amd64 is 15 bytes provide a check for consistency.
	DBG("kexec: entry %#p tbase %#p hdrsz %ld  textsz %ld\n", entry, tbase, hdrsz, textsz);
	if(entry < tbase+hdrsz || entry >= tbase+hdrsz+textsz)
		error(Ebadexec);
	// XXX: what about the kernel stack we are making here?
	DBG("kexec: testing if sizes overflow limits\n");
	if(textsz >= textlim || datasz > datalim || bsssz > bsslim)
		error(Ebadexec);
	DBG("kexec: do the top of the segments overflow limits?\n");
	if(textlim >= tbase+tsize || datalim >= dbase+dsize || bsslim >= bbase+bsize)
		error(Ebadexec);

	DBG("kexec: is bss below data?\n");
	if(bsslim < datalim)
		error(Ebadexec);
	/*
	Interesting thought, the previously allocated segments for
	data and text are shared and constant.  The BSS and the stack
	are not.  What you really want is the ability to make an
	executable text and data and then create child executables on
	top of that.  This will lower external fragmentation and allow
	a bunch of communicating shared memory processes (ie.  go) in
	kernel space.

	Fundamentally this means that the allocation of the text and
	the data should be separate from the bss and the stack.  This
	will require that you change the linkers as well to allow the
	separation of data and bss sections.
	*/

	/*
	 * Stack is a pointer into the temporary stack
	 * segment, and will move as items are pushed.
	 */

	 // need to work something out here with the stack.
	stack = sbase+ssize-sizeof(Tos);


	 /*
	  * XXX: When we are linking this how do we set the tos? We will need to change trap right?
	  */
	tos = (Tos*)stack;
	tos->cyclefreq = m->cyclefreq;
	cycles((uint64_t*)&tos->pcycles);
	tos->pcycles = -tos->pcycles;
	tos->kcycles = tos->pcycles;
	tos->clock = 0;

	DBG("kexec: argument processing\n");
	if(0)
	for(i = 0;; i++, argv++){
		a = *(char**)validaddr(argv, sizeof(char**), 0);
		if(a == nil)
			break;
		a = validaddr(a, 1, 0);
		n = ((char*)vmemchr(a, 0, 0x7fffffff) - a) + 1;

		if(argc > 0 && i == 0)
			continue;

		stack -= n;
		if(stack < sbase+ssize-4096)
			error(Enovmem);
		args = UINT2PTR(stack);
		memmove(args, a, n);
		args[n-1] = 0;
		argc++;
	}
	// DBG("kexec: ensuring we have argc\n");
	if(0)
	if(argc < 1)
		error(Ebadexec);

	a = args = UINT2PTR(stack);
	stack = sysexecstack(stack, argc);
	// XXX: look through math on this. look at ../../9/port/ exec.c
	// YYY: this looks like a Jimism for 9k.
	// DBG("kexec: ensuring the stack \n");
	if(0)
	if(stack-(argc+1)*sizeof(char**)-BIGPGSZ < sbase+ssize-4096)
		error(Ebadexec);

	argv = (char**)stack;
	*--argv = nil;
	// XXX: replace USTKTOP with a new variable representing the top of stack.
	if(0)
	for(i = 0; i < argc; i++){
		*--argv = args + (USTKTOP-sbase+ssize);
		args += strlen(args) + 1;
	}

	DBG("argsing\n");
	n = args - a;
	if(0)
	if(n <= 0)
		error(Egreg);
	if(n > 128)
		n = 128;
	DBG("kexec: allocating args\n");
	// XXX: hangs in smalloc, not sure why.
//	args = smalloc(n);
//	if(waserror()){
//		DBG("erroring\n");
//		free(args);
//		nexterror();
//	}
//	DBG("kexec: moving args\n");
//	memmove(args, a, n);
//	if(0)
//	while(n > 0 && (args[n-1] & 0xc0) == 0x80)
//		n--;
//	args[n-1] = '\0';

	kstrdup(&p->text, "kexecproc");
	p->args = nil;
	//elem;
//	elem = nil;
//	p->args = args;
//	p->nargs = n;
	poperror();				/* p (m->externup->args) */





/*
	qlock(&p->debug);

	sysprocsetup(p);
	qunlock(&p->debug);
*/

	// why is this sched and not ureg?
	p->sched.pc = entry;
	// the real question here is how do you set up the stack?
	p->sched.sp = PTR2UINT(stack-BY2SE);
	p->sched.sp = STACKALIGN(p->sched.sp);


	// XXX: what does it imply if you have a kproc that runs on an ac?
	if(core > 0){
		DBG("kexec: coring %d\n", core);
		mp = p->ac;
		mp->icc->flushtlb = 1;
		mp->icc->rc = ICCOK;

		DBG("kexec: exotic proc on cpu%d\n", mp->machno);
		qlock(&p->debug);
		if(waserror()){
			DBG("kexec: had error");
			qunlock(&p->debug);
			nexterror();
		}
		p->nicc++;
		p->state = Exotic;
		p->psstate = 0;
		DBG("kexec: unlocking");
		qunlock(&p->debug);
		poperror();
		mfence();
		mp->icc->fn = (void*)entry;
		sched();
	}else{
		DBG("kexec: readying\n");
		ready(p);
		p->newtlb = 1;
		mmuflush();
	}
	DBG("kforkexecac up %#p done\n"
		"textsz %lx datasz %lx bsssz %lx hdrsz %lx\n"
		"textlim %ullx datalim %ullx bsslim %ullx\n", m->externup,
		textsz, datasz, bsssz, hdrsz, textlim, datalim, bsslim);
}
Exemplo n.º 20
0
/*
 * Entered in AP core context, upon traps (system calls go through acsyscall)
 * using up->dbgreg means cores MUST be homogeneous.
 *
 * BUG: We should setup some trapenable() mechanism for the AC,
 * so that code like fpu.c could arrange for handlers specific for
 * the AC, instead of doint that by hand here.
 *
 * All interrupts are masked while in the "kernel"
 */
void
actrap(Ureg *u)
{
	panic("actrap");
#if 0
	char *n;
	ACVctl *v;

	n = nil;

	_pmcupdate(m);
	if(m->proc != nil){
		m->proc->nactrap++;
		m->proc->actime1 = fastticks(nil);
	}
	if(u->type < nelem(acvctl)){
		v = acvctl[u->type];
		if(v != nil){
			DBG("actrap: cpu%d: %ulld\n", machp()->machno, u->type);
			n = v->f(u, v->a);
			if(n != nil)
				goto Post;
			return;
		}
	}
	switch(u->type){
	case IdtDF:
		print("AC: double fault\n");
		dumpregs(u);
		ndnr();
	case IdtIPI:
		m->intr++;
		DBG("actrap: cpu%d: IPI\n", machp()->machno);
		apiceoi(IdtIPI);
		break;
	case IdtTIMER:
		apiceoi(IdtTIMER);
		panic("timer interrupt in an AC");
		break;
	case IdtPF:
		/* this case is here for debug only */
		m->pfault++;
		DBG("actrap: cpu%d: PF cr2 %#ullx\n", machp()->machno, cr2get());
		break;
	default:
		print("actrap: cpu%d: %ulld\n", machp()->machno, u->type);
	}
Post:
	m->icc->rc = ICCTRAP;
	m->cr2 = cr2get();
	memmove(m->proc->dbgreg, u, sizeof *u);
	m->icc->note = n;
	fpuprocsave(m->proc);
	_pmcupdate(m);
	mfence();
	m->icc->fn = nil;
	ready(m->proc);

	mwait(&m->icc->fn);

	if(m->icc->flushtlb)
		acmmuswitch();
	if(m->icc->fn != actrapret)
		acsched();
	DBG("actrap: ret\n");
	memmove(u, m->proc->dbgreg, sizeof *u);
	if(m->proc)
		m->proc->actime += fastticks2us(fastticks(nil) - m->proc->actime1);
#endif
}
Exemplo n.º 21
0
/*
 * bus_dmamap_sync routine for intagp.
 *
 * This is tailored to the usage that drm with the GEM memory manager
 * will be using, since intagp is for intel IGD, and thus shouldn't be
 * used for anything other than gpu-based work. Essentially for the intel GEM
 * driver we use bus_dma as an abstraction to convert our memory into a gtt
 * address and deal with any cache incoherencies that we create.
 *
 * We use the cflush instruction to deal with clearing the caches, since our
 * cache is physically indexed, we can even map then clear the page and it'll
 * work. on i386 we need to check for the presence of cflush() in cpuid,
 * however, all cpus that have a new enough intel GMCH should be suitable.
 */
void	
intagp_dma_sync(bus_dma_tag_t tag, bus_dmamap_t dmam,
    bus_addr_t offset, bus_size_t size, int ops)
{
	bus_dma_segment_t	*segp;
	struct sg_page_map	*spm;
	void			*addr;
	paddr_t	 		 pa;
	bus_addr_t		 poff, endoff, soff;

#ifdef DIAGNOSTIC
	if ((ops & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0 &&
	    (ops & (BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE)) != 0)
		panic("agp_dmamap_sync: mix PRE and POST");
	if (offset >= dmam->dm_mapsize)
		panic("_intagp_dma_sync: bad offset %lu (size = %lu)",
		    offset, dmam->dm_mapsize);
	if (size == 0 || (offset + size) > dmam->dm_mapsize)
		panic("intagp_dma_sync: bad length");
#endif /* DIAGNOSTIC */
	
	/* Coherent mappings need no sync. */
	if (dmam->_dm_flags & BUS_DMA_COHERENT)
		return;

	/*
	 * We need to clflush the object cache in all cases but postwrite.
	 *
	 * - Due to gpu incoherency, postread we need to flush speculative
	 * reads (which are not written back on intel cpus).
	 *
	 * - preread we need to flush data which will very soon be stale from
	 * the caches
	 *
	 * - prewrite we need to make sure our data hits the memory before the 
	 * gpu hoovers it up.
	 *
	 * The chipset also may need flushing, but that fits badly into
	 * bus_dma and it done in the driver.
	 */
	soff = trunc_page(offset);
	endoff = round_page(offset + size);
	if (ops & BUS_DMASYNC_POSTREAD || ops & BUS_DMASYNC_PREREAD ||
	    ops & BUS_DMASYNC_PREWRITE) {
		if (curcpu()->ci_cflushsz == 0) {
			/* save some wbinvd()s. we're MD anyway so it's ok */
			wbinvd();
			return;
		}

		mfence();
		spm = dmam->_dm_cookie;
		switch (spm->spm_buftype) {
		case BUS_BUFTYPE_LINEAR:
			addr = spm->spm_origbuf + soff;
			while (soff < endoff) {
				pmap_flush_cache((vaddr_t)addr, PAGE_SIZE);
				soff += PAGE_SIZE;
				addr += PAGE_SIZE;
			} break;
		case BUS_BUFTYPE_RAW:
			segp = (bus_dma_segment_t *)spm->spm_origbuf;
			poff = 0;

			while (poff < soff) {
				if (poff + segp->ds_len > soff)
					break;
				poff += segp->ds_len;
				segp++;
			}
			/* first time round may not start at seg beginning */
			pa = segp->ds_addr + (soff - poff);
			while (poff < endoff) {
				for (; pa < segp->ds_addr + segp->ds_len &&
				    poff < endoff; pa += PAGE_SIZE) {
					pmap_flush_page(pa);
					poff += PAGE_SIZE;
				}
				segp++;
				if (poff < endoff)
					pa = segp->ds_addr;
			}
			break;
		/* You do not want to load mbufs or uios onto a graphics card */
		case BUS_BUFTYPE_MBUF:
			/* FALLTHROUGH */
		case BUS_BUFTYPE_UIO:
			/* FALLTHROUGH */
		default:
			panic("intagp_dmamap_sync: bad buftype %d",
			    spm->spm_buftype);
			
		}
		mfence();
	}
}