Exemple #1
0
void update_particles_epiphany(Particle* particles, ParticleV* state, int n, int s, float dt, float es, int iter, int cores)
{
	int N = n*s;

	// open device for threads
	int dd = coprthr_dopen(COPRTHR_DEVICE_E32,COPRTHR_O_THREAD);
	printf("dd=%d\n",dd);
	if (dd<0) ERROR("device open failed\n");

	coprthr_program_t prg;
	if (s==1) prg = coprthr_cc_read_bin("./mpi_tfunc.cbin.3.e32", 0);
	else prg = coprthr_cc_read_bin("./mpi_tfunc2.cbin.3.e32", 0); // special off-chip thread function
	coprthr_sym_t thr = coprthr_getsym(prg,"nbody_thread");
	printf("prg=%p thr=%p\n",prg,thr);

	// write data to shared DRAM
	coprthr_mem_t p_mem = coprthr_dmalloc(dd,N*sizeof(Particle), 0);
	coprthr_dwrite(dd,p_mem,0,particles,N*sizeof(Particle),COPRTHR_E_WAIT);

	coprthr_mem_t pn_mem;
	pn_mem = coprthr_dmalloc(dd,N*sizeof(Particle), 0); // special off-chip memory

	coprthr_mem_t v_mem = coprthr_dmalloc(dd,N*sizeof(ParticleV), 0);
	coprthr_dwrite(dd,v_mem,0,state,N*sizeof(ParticleV),COPRTHR_E_WAIT);


	my_args_t args;
	args.n = n;
	args.s = s;
	args.cnt = iter;
	args.dt = dt;
	args.es = es;
	args.p = coprthr_memptr(p_mem,0);
	args.pn = coprthr_memptr(pn_mem,0);
	args.v = coprthr_memptr(v_mem,0);
	args.fbinfo.smem_start = fix.smem_start;
	args.fbinfo.smem_len = fix.smem_len;
	args.fbinfo.line_length = fix.line_length;
	args.fbinfo.xres = var.xres;
	args.fbinfo.yres = var.yres;
	args.fbinfo.xres_virtual = var.xres_virtual;
	args.fbinfo.yres_virtual = var.yres_virtual;
	args.fbinfo.xoffset = var.xoffset;
	args.fbinfo.yoffset = var.yoffset;
	args.fbinfo.bits_per_pixel = var.bits_per_pixel;


	int flag = 0;
	coprthr_mem_t p_mem_switch;
	system("clear");
	coprthr_mpiexec(dd, cores, thr, &args, sizeof(args),0);


	// read back data from memory on device
	p_mem_switch = (s>1 && (iter+2)%2) ? pn_mem : p_mem;
	flag++;
	coprthr_dread(dd,p_mem_switch,0,particles,N*sizeof(Particle),COPRTHR_E_WAIT);

	coprthr_dclose(dd);
}
int main(int argc, char* argv[])
{

	int i;
	int n = SIZE;


	/* open device for threads */
	int dd = coprthr_dopen(COPRTHR_DEVICE_E32,COPRTHR_O_THREAD);


	/* compile thread function */
	coprthr_program_t prg = coprthr_cc_read_bin("./memory_device.e32",0);
	coprthr_sym_t thr = coprthr_getsym(prg,"my_thread");

	printf("dd=%d prg=%p krn=%p\n",dd,prg,thr);

	/* allocate memory shared with coprocessor device */
	coprthr_mem_t aa_mem = coprthr_dmalloc(dd,n*sizeof(int),0);
	coprthr_mem_t bb_mem = coprthr_dmalloc(dd,n*sizeof(int),0);
	coprthr_mem_t cc_mem = coprthr_dmalloc(dd,n*sizeof(int),0);

   int* aa = (int*)coprthr_memptr(aa_mem,0);
   int* bb = (int*)coprthr_memptr(bb_mem,0);
   int* cc = (int*)coprthr_memptr(cc_mem,0);

	
	/* set args to pass to thread on coprocessor device */
	coprthr_mem_t args_mem = coprthr_dmalloc(dd,sizeof(struct my_args),0);
	struct my_args* pargs = (struct my_args*)coprthr_memptr(args_mem,0);
	pargs->n = n;
	pargs->aa = aa,
	pargs->bb = bb,
	pargs->cc = cc;


	/* initialize A, B, and C arrays */
	for (i=0; i<n; i++) {
		aa[i] = i;
		bb[i] = 2*i;
		cc[i] = 3;
	}

	// Execute kernel on coprocessor device
	coprthr_dexec(dd,16,thr,(void*)&args_mem, 0 );
	coprthr_dwait(dd);


	for(i=0; i<n; i++) 
		printf("%d: %d + %d = %d\n",i,aa[i],bb[i],cc[i]);


	/* clean up */
	coprthr_dfree(dd,args_mem);
	coprthr_dfree(dd,aa_mem);
	coprthr_dfree(dd,bb_mem);
	coprthr_dfree(dd,cc_mem);

	coprthr_dclose(dd);
}
Exemple #3
0
void update_stencil_epiphany(float* A, float* B, int ni, int nj, int di, int dj, int niter, float w0, float w1, float w2, float w3, float w4)
{

	int dd = coprthr_dopen(COPRTHR_DEVICE_E32,COPRTHR_O_THREAD);
	printf("dd=%d\n",dd);
	if (dd<0) ERROR("device open failed\n");

	coprthr_mem_t A_mem = coprthr_dmalloc(dd,ni*nj*sizeof(float),0);
	coprthr_mem_t B_mem = coprthr_dmalloc(dd,ni*nj*sizeof(float),0);

	coprthr_program_t prg = coprthr_cc_read_bin("./mpi_tfunc.cbin.3.e32", 0);
	coprthr_sym_t thr = coprthr_getsym(prg,"stencil_thread");
	printf("prg=%p thr=%p\n",prg,thr);

	coprthr_dwrite(dd,A_mem,0,A,ni*nj*sizeof(float),COPRTHR_E_WAIT);
	coprthr_dwrite(dd,B_mem,0,B,ni*nj*sizeof(float),COPRTHR_E_WAIT); // should really copy this on device

	my_args_t args = {
		.ni = ni, .nj = nj,
		.di = di, .dj = dj,
		.niter = niter,
		.A = coprthr_memptr(A_mem,0),
		.B = coprthr_memptr(B_mem,0),
		.w0 = w0, .w1 = w1, .w2 = w2, .w3 = w3, .w4 = w4
	};

	coprthr_mpiexec(dd, di*dj, thr, &args, sizeof(args),0);

	coprthr_dread(dd,B_mem,0,B,ni*nj*sizeof(float),COPRTHR_E_WAIT);

	print_stencil(B, ni, nj);

}

void update_stencil_cpu(float* A, float* B, int ni, int nj, int niter, float w0, float w1, float w2, float w3, float w4)
{
	// this does not handle edges here, which must be initialized in both A and B
	int i, j, iter = niter;
	while(iter--) {
		for (j=1; j<nj-1; j++) {
			for (i=1; i<ni-1; i++) {
				int x = j*ni+i;
				B[x] = w0*A[x-1] + w1*A[x] + w2*A[x+1] + w3*A[x-ni] + w4*A[x+ni];
			}
		}
		float* tmp = B;
		B = A;
		A = tmp;
	}
	if(niter%2 == 0) for (j=1; j<nj-1; j++) for (i=1; i<ni-1; i++) B[j*ni+i] = A[j*ni+i];
}
int main()
{
	int i;

	int dd = coprthr_dopen(TEST_COPRTHR_DEVICE,COPRTHR_O_STREAM);

	printf("dd=%d\n",dd);

	coprthr_program_t prg = coprthr_dcompile(dd,src,sizeof(src),"",0);
	coprthr_kernel_t krn = coprthr_getsym(prg,"my_kern");

	printf("prg=%p krn=%p\n",prg,krn);

	float* a = (float*)malloc(SIZE*sizeof(float));
	float* b = (float*)malloc(SIZE*sizeof(float));
	float* c = (float*)malloc(SIZE*sizeof(float));

	for(i=0; i<SIZE; i++) {
		a[i] = 1.0f * i;
		b[i] = 2.0f * i;
		c[i] = 0.0f;
	}
	
	coprthr_mem_t mema = coprthr_dmalloc(dd,SIZE*sizeof(float),0);
	coprthr_mem_t memb = coprthr_dmalloc(dd,SIZE*sizeof(float),0);
	coprthr_mem_t memc = coprthr_dmalloc(dd,SIZE*sizeof(float),0);

	coprthr_dwrite(dd,mema,0,a,SIZE*sizeof(float),COPRTHR_E_NOWAIT);
	coprthr_dwrite(dd,memb,0,b,SIZE*sizeof(float),COPRTHR_E_NOWAIT);
	coprthr_dwrite(dd,memc,0,c,SIZE*sizeof(float),COPRTHR_E_NOWAIT);

	unsigned int nargs = 3;
	void* args[] = { &mema, &memb, &memc };
	unsigned int nthr = SIZE;

	coprthr_dexec(dd,krn,nargs,args,nthr,0,COPRTHR_E_NOWAIT);

	coprthr_dcopy(dd,memc,0,memb,0,SIZE*sizeof(float),COPRTHR_E_NOWAIT);

	coprthr_kernel_t v_krn[] = { krn, krn };
	unsigned int v_nargs[] = { nargs, nargs };
	void** v_args[] = { args, args };
	unsigned int v_nthr[] = { nthr, nthr };

	coprthr_dnexec(dd,1,v_krn,v_nargs,v_args,v_nthr,0,COPRTHR_E_NOWAIT);

	coprthr_dread(dd,memc,0,c,SIZE*sizeof(float),COPRTHR_E_NOWAIT);

	coprthr_dwait(dd);

	for(i=0; i<SIZE; i++) 
		printf("%f + %f = %f\n",a[i],b[i],c[i]);

	coprthr_dfree(dd,mema);	
	coprthr_dfree(dd,memb);	
	coprthr_dfree(dd,memc);	

	free(a);
	free(b);
	free(c);

	coprthr_dclose(dd);
}
int main()
{
	int i;

	int dd = coprthr_dopen(COPRTHR_DEVICE_X86_64,COPRTHR_O_STREAM);

	printf("dd=%d\n",dd);

	coprthr_program_t prg = coprthr_dcompile(dd,src,sizeof(src),"",0);
	coprthr_kernel_t krn = coprthr_getsym(prg,"my_kern");

	printf("prg=%p krn=%p\n",prg,krn);

	int* a = (int*)malloc(SIZE*sizeof(int));
	int* b = (int*)malloc(SIZE*sizeof(int));
	int* c = (int*)malloc(SIZE*sizeof(int));
	int* d = (int*)malloc(SIZE*sizeof(int));

	for(i=0; i<SIZE; i++) {
		a[i] = 1 * i;
		b[i] = 2 * i;
		c[i] = 0;
		d[i] = 0;
	}
	
	coprthr_mem_t mema = coprthr_dmalloc(dd,SIZE*sizeof(int),0);
	coprthr_mem_t memb = coprthr_dmalloc(dd,SIZE*sizeof(int),0);
	coprthr_mem_t memc = coprthr_dmalloc(dd,SIZE*sizeof(int),0);
	coprthr_mem_t memd = coprthr_dmalloc(dd,SIZE*sizeof(int),0);

	coprthr_event_t ev[10];

	ev[0] = coprthr_dwrite(dd,mema,0,a,SIZE*sizeof(float),COPRTHR_E_NOWAIT);
	ev[1] = coprthr_dwrite(dd,memb,0,b,SIZE*sizeof(float),COPRTHR_E_NOWAIT);
	ev[2] = coprthr_dwrite(dd,memc,0,c,SIZE*sizeof(float),COPRTHR_E_NOWAIT);
	ev[3] = coprthr_dwrite(dd,memd,0,d,SIZE*sizeof(float),COPRTHR_E_NOWAIT);

	for(i=0;i<4;i++) 
		coprthr_dwaitev(dd,ev[i]);

	unsigned int nargs = 4;
	void* args[] = { &mema, &memb, &memc, &memd };
	unsigned int nthr = SIZE;

	ev[4] = coprthr_dexec(dd,krn,nargs,args,nthr,0,COPRTHR_E_NOWAIT);

	ev[5] = coprthr_dread(dd,memc,0,c,SIZE*sizeof(float),COPRTHR_E_NOWAIT);

	for(i=0; i<SIZE; i++) d[i] = 1;
	coprthr_dwrite(dd,memd,0,d,SIZE*sizeof(float),COPRTHR_E_NOW);

	for(i=4;i<6;i++) 
		coprthr_dwaitev(dd,ev[i]);

	for(i=0; i<SIZE; i++) 
		printf("%d + %d = %d\n",a[i],b[i],c[i]);

	coprthr_dfree(dd,mema);	
	coprthr_dfree(dd,memb);	
	coprthr_dfree(dd,memc);	

	free(a);
	free(b);
	free(c);

	coprthr_dclose(dd);
}