コード例 #1
0
ファイル: main.c プロジェクト: 9578577/parallella-examples
void update_particles_epiphany(Particle* particles, ParticleV* state, int n, int s, float dt, float es, int iter, int cores)
{
	int N = n*s;

	// open device for threads
	int dd = coprthr_dopen(COPRTHR_DEVICE_E32,COPRTHR_O_THREAD);
	printf("dd=%d\n",dd);
	if (dd<0) ERROR("device open failed\n");

	coprthr_program_t prg;
	if (s==1) prg = coprthr_cc_read_bin("./mpi_tfunc.cbin.3.e32", 0);
	else prg = coprthr_cc_read_bin("./mpi_tfunc2.cbin.3.e32", 0); // special off-chip thread function
	coprthr_sym_t thr = coprthr_getsym(prg,"nbody_thread");
	printf("prg=%p thr=%p\n",prg,thr);

	// write data to shared DRAM
	coprthr_mem_t p_mem = coprthr_dmalloc(dd,N*sizeof(Particle), 0);
	coprthr_dwrite(dd,p_mem,0,particles,N*sizeof(Particle),COPRTHR_E_WAIT);

	coprthr_mem_t pn_mem;
	pn_mem = coprthr_dmalloc(dd,N*sizeof(Particle), 0); // special off-chip memory

	coprthr_mem_t v_mem = coprthr_dmalloc(dd,N*sizeof(ParticleV), 0);
	coprthr_dwrite(dd,v_mem,0,state,N*sizeof(ParticleV),COPRTHR_E_WAIT);


	my_args_t args;
	args.n = n;
	args.s = s;
	args.cnt = iter;
	args.dt = dt;
	args.es = es;
	args.p = coprthr_memptr(p_mem,0);
	args.pn = coprthr_memptr(pn_mem,0);
	args.v = coprthr_memptr(v_mem,0);
	args.fbinfo.smem_start = fix.smem_start;
	args.fbinfo.smem_len = fix.smem_len;
	args.fbinfo.line_length = fix.line_length;
	args.fbinfo.xres = var.xres;
	args.fbinfo.yres = var.yres;
	args.fbinfo.xres_virtual = var.xres_virtual;
	args.fbinfo.yres_virtual = var.yres_virtual;
	args.fbinfo.xoffset = var.xoffset;
	args.fbinfo.yoffset = var.yoffset;
	args.fbinfo.bits_per_pixel = var.bits_per_pixel;


	int flag = 0;
	coprthr_mem_t p_mem_switch;
	system("clear");
	coprthr_mpiexec(dd, cores, thr, &args, sizeof(args),0);


	// read back data from memory on device
	p_mem_switch = (s>1 && (iter+2)%2) ? pn_mem : p_mem;
	flag++;
	coprthr_dread(dd,p_mem_switch,0,particles,N*sizeof(Particle),COPRTHR_E_WAIT);

	coprthr_dclose(dd);
}
コード例 #2
0
int main(int argc, char* argv[])
{

	int i;
	int n = SIZE;


	/* open device for threads */
	int dd = coprthr_dopen(COPRTHR_DEVICE_E32,COPRTHR_O_THREAD);


	/* compile thread function */
	coprthr_program_t prg = coprthr_cc_read_bin("./memory_device.e32",0);
	coprthr_sym_t thr = coprthr_getsym(prg,"my_thread");

	printf("dd=%d prg=%p krn=%p\n",dd,prg,thr);

	/* allocate memory shared with coprocessor device */
	coprthr_mem_t aa_mem = coprthr_dmalloc(dd,n*sizeof(int),0);
	coprthr_mem_t bb_mem = coprthr_dmalloc(dd,n*sizeof(int),0);
	coprthr_mem_t cc_mem = coprthr_dmalloc(dd,n*sizeof(int),0);

   int* aa = (int*)coprthr_memptr(aa_mem,0);
   int* bb = (int*)coprthr_memptr(bb_mem,0);
   int* cc = (int*)coprthr_memptr(cc_mem,0);

	
	/* set args to pass to thread on coprocessor device */
	coprthr_mem_t args_mem = coprthr_dmalloc(dd,sizeof(struct my_args),0);
	struct my_args* pargs = (struct my_args*)coprthr_memptr(args_mem,0);
	pargs->n = n;
	pargs->aa = aa,
	pargs->bb = bb,
	pargs->cc = cc;


	/* initialize A, B, and C arrays */
	for (i=0; i<n; i++) {
		aa[i] = i;
		bb[i] = 2*i;
		cc[i] = 3;
	}

	// Execute kernel on coprocessor device
	coprthr_dexec(dd,16,thr,(void*)&args_mem, 0 );
	coprthr_dwait(dd);


	for(i=0; i<n; i++) 
		printf("%d: %d + %d = %d\n",i,aa[i],bb[i],cc[i]);


	/* clean up */
	coprthr_dfree(dd,args_mem);
	coprthr_dfree(dd,aa_mem);
	coprthr_dfree(dd,bb_mem);
	coprthr_dfree(dd,cc_mem);

	coprthr_dclose(dd);
}
コード例 #3
0
ファイル: main.c プロジェクト: censix/mpi-epiphany
void update_stencil_epiphany(float* A, float* B, int ni, int nj, int di, int dj, int niter, float w0, float w1, float w2, float w3, float w4)
{

	int dd = coprthr_dopen(COPRTHR_DEVICE_E32,COPRTHR_O_THREAD);
	printf("dd=%d\n",dd);
	if (dd<0) ERROR("device open failed\n");

	coprthr_mem_t A_mem = coprthr_dmalloc(dd,ni*nj*sizeof(float),0);
	coprthr_mem_t B_mem = coprthr_dmalloc(dd,ni*nj*sizeof(float),0);

	coprthr_program_t prg = coprthr_cc_read_bin("./mpi_tfunc.cbin.3.e32", 0);
	coprthr_sym_t thr = coprthr_getsym(prg,"stencil_thread");
	printf("prg=%p thr=%p\n",prg,thr);

	coprthr_dwrite(dd,A_mem,0,A,ni*nj*sizeof(float),COPRTHR_E_WAIT);
	coprthr_dwrite(dd,B_mem,0,B,ni*nj*sizeof(float),COPRTHR_E_WAIT); // should really copy this on device

	my_args_t args = {
		.ni = ni, .nj = nj,
		.di = di, .dj = dj,
		.niter = niter,
		.A = coprthr_memptr(A_mem,0),
		.B = coprthr_memptr(B_mem,0),
		.w0 = w0, .w1 = w1, .w2 = w2, .w3 = w3, .w4 = w4
	};

	coprthr_mpiexec(dd, di*dj, thr, &args, sizeof(args),0);

	coprthr_dread(dd,B_mem,0,B,ni*nj*sizeof(float),COPRTHR_E_WAIT);

	print_stencil(B, ni, nj);

}

void update_stencil_cpu(float* A, float* B, int ni, int nj, int niter, float w0, float w1, float w2, float w3, float w4)
{
	// this does not handle edges here, which must be initialized in both A and B
	int i, j, iter = niter;
	while(iter--) {
		for (j=1; j<nj-1; j++) {
			for (i=1; i<ni-1; i++) {
				int x = j*ni+i;
				B[x] = w0*A[x-1] + w1*A[x] + w2*A[x+1] + w3*A[x-ni] + w4*A[x+ni];
			}
		}
		float* tmp = B;
		B = A;
		A = tmp;
	}
	if(niter%2 == 0) for (j=1; j<nj-1; j++) for (i=1; i<ni-1; i++) B[j*ni+i] = A[j*ni+i];
}