Ejemplo n.º 1
0
/* Code running on SPU */
int main(unsigned long long spe_id __attribute__ ((unused)), unsigned long long argp __attribute__ ((unused)))
{
	deprintf("[SPU] fb_writer_spu is up... (on SPE #%llu)\n", spe_id);
	uint32_t ea_mfc, mbox;
	// send ready message
	spu_write_out_mbox(SPU_READY);

	while (1) {
		/* Check mailbox */
		mbox = spu_read_in_mbox();
		deprintf("[SPU] Message is %u\n", mbox);
		switch (mbox) {
			case SPU_EXIT:
				deprintf("[SPU] fb_writer goes down...\n");
				return 0;
			case SPU_START:
				break;
			default:
				deprintf("[SPU] Cannot handle message\n");
				continue;
		}

		/* Tag Manager setup */
		unsigned int tags;
		tags = mfc_multi_tag_reserve(5);
		if (tags == MFC_TAG_INVALID) {
			deprintf("[SPU] Failed to reserve mfc tags on fb_writer\n");
			return 0;
		}

		/* Framebuffer parms */
		ea_mfc = spu_read_in_mbox();
		deprintf("[SPU] Message on fb_writer is %u\n", ea_mfc);
		spu_mfcdma32(&parms, (unsigned int)ea_mfc,
				sizeof(struct fb_writer_parms_t), tags,
				MFC_GET_CMD);
		deprintf("[SPU] argp = %u\n", (unsigned int)argp);
		DMA_WAIT_TAG(tags);

		/* Copy parms->data to framebuffer */
		deprintf("[SPU] Copying to framebuffer started\n");
		cpy_to_fb(tags);
		deprintf("[SPU] Copying to framebuffer done!\n");

		mfc_multi_tag_release(tags, 5);
		deprintf("[SPU] fb_writer_spu... done!\n");
		/* Send FIN msg */
		spu_write_out_mbox(SPU_FIN);
	}

	return 0;
}
Ejemplo n.º 2
0
int main(unsigned long long spe_id, unsigned long long program_data_ea, unsigned long long env) 
{
	spu_write_out_mbox(SPE_BIRTHDAY_INITIALIZED);
	while (1) {
		unsigned int msg = spu_read_in_mbox();
		switch (msg) {
		case SPE_BIRTHDAY_START:
			main2(spe_id, program_data_ea, env);
			spu_write_out_mbox(SPE_BIRTHDAY_FINISHED);
			break;
		case SPE_BIRTHDAY_STARTMOD:
			main2mod(spe_id, program_data_ea, env);
			spu_write_out_mbox(SPE_BIRTHDAY_FINISHED);
			break;
		case SPE_BIRTHDAY_QUIT:
			return 0;
		}
	}
}
Ejemplo n.º 3
0
int 
main(unsigned long long id) {
  vector unsigned int x = get_vector_param_3();
  vector unsigned int count  = (vector unsigned int){0,0,0,0};
  vector unsigned int result = (vector unsigned int){0,0,0,0};

  spu_ready();

  count  = popc(x);
  result = reduce_word(count);
  
  spu_write_out_mbox(spu_extract(result, 0));

  return SPU_SUCCESS;
}
Ejemplo n.º 4
0
int main(ull id, ull argp, ull envp)
{
	unsigned int cmd;

	mfc_get(&args, argp, sizeof(args), TAG, 0, 0);
	mfc_write_tag_mask(1 << TAG);
	mfc_read_tag_status_all();

	while (1) {
		cmd = spu_read_in_mbox();

		if (unlikely(SPU2_MSG_PPU_TO_SPU_EXIT == cmd))
			break;

		switch (cmd) {
		case SPU2_MSG_PPU_TO_SPU_DO_COPY:
			copy();
			break;
		case SPU2_MSG_PPU_TO_SPU_DO_SCALE:
			scale();
			break;
		case SPU2_MSG_PPU_TO_SPU_DO_ADD:
			add();
			break;
		case SPU2_MSG_PPU_TO_SPU_DO_TRIAD:
			triad();
			break;
		default:
			fprintf(stderr, " [SPU]: Invalid command received in mailbox\n");
		}

		spu_write_out_mbox(SPU2_MSG_SPU_TO_PPU_DONE);
	}

	return 0;
}
Ejemplo n.º 5
0
int main(unsigned long long speid, unsigned long long argp, unsigned long long envp) 
{
	int tgiy0[2];
	int tgiy1[2];
	int tgiu0[2];
	int tgiu1[2];
	int tgiv0[2];
	int tgiv1[2];
	int tgo0[2];
	int tgo1[2];

	tgiu1[0]=1;
	tgiu1[1]=2;
	tgo0[0]=3;
	tgo0[1]=4;
	tgiy0[0]=5;
	tgiy0[1]=6;
	tgiy1[0]=7;
	tgiy1[1]=8;
	tgiu0[0]=9;
	tgiu0[1]=10;
	tgiv0[0]=11;
	tgiv0[1]=12;
	tgiv1[1]=13;
	tgiv1[1]=14;
	tgo1[0]=15;
	tgo1[1]=16;
	
	int selOut = 0;
	int selIn = 0;
	int tag = 31;
	int LineSelIn=0;
	int LineSelOut=0;
		
	int selY0In = 0;
	int selY1In = 0;
	int selCrIn = 0;
	struct img_args *iargs;
	
	iargs =(struct img_args*)memalign(128,sizeof(*iargs));

	unsigned long long Cp;

	int first=1;
	int waiting=0;
	unsigned long long Op;
	unsigned int msg;
	unsigned long long YIp,UIp,VIp,YOp;

	int crblock0;
	int crblock1;
	int srcsmallcroma=0;
;
	int noscale=1;

 	static	int crblockdst1;
	static	int crblockdst0;
	scaler_settings_t sc;
	
	while (spu_stat_in_mbox() == 0);
		msg=spu_read_in_mbox();
	if (msg==RUN){	
		fprintf(stderr,"spu_yuv2argb_scaler: Starting Up\n");
	}

	dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); //getting neccesary data to process image
	printf("spu_yuv2argb_scaler: SRC width %d,DST width %d\n",iargs->srcW,iargs->dstW);
	printf("spu_yuv2argb_scaler: SRC height %d,DST height %d\n",iargs->srcH,iargs->dstH);
	
	printf("spu_yuv2argb_scaler: DST offset %d\n",iargs->offset);
	
	// bad fix for centering image on 1080p)
	//iargs->offset=(iargs->maxwidth-iargs->dstW)/2 + iargs->maxwidth*(1080-iargs->dstH)/2;	
	

	vector unsigned char *widthfilter0=(vector unsigned char*)memalign(128,MAXWIDTH*4+16);
	vector unsigned char *widthfilter1=(vector unsigned char*)memalign(128,MAXWIDTH*4+16);

	vector unsigned char *crwidthfilter0=(vector unsigned char*)memalign(128,MAXWIDTH*2+16);
	vector unsigned char *crwidthfilter1=(vector unsigned char*)memalign(128,MAXWIDTH*2+16);	

	vector float * weightWfilter0=(vector float*)memalign(128,MAXWIDTH*4+16);
	vector float * weightWfilter1=(vector float*)memalign(128,MAXWIDTH*4+16);

	float weightHfilter[MAXHEIGHT+1];

	unsigned long long dmapos[MAXHEIGHT+2];
	unsigned long long dmacromapos[MAXHEIGHT+2];

	
	vector float * Ytemp0=(vector float *)memalign(128,MAXWIDTH*4+16);
	vector float * Ytemp1=(vector float *)memalign(128,MAXWIDTH*4+16);
	vector float * Utemp=(vector float *)memalign(128,MAXWIDTH*2+16);
	vector float * Vtemp=(vector float *)memalign(128,MAXWIDTH*2+16);

	int wfilterpos[MAXWIDTH+2];
	int hfilterpos0[MAXHEIGHT+2];
	int hfilterpos1[MAXHEIGHT+2];
	int crwfilterpos[MAXWIDTH/2+2];

	vector unsigned char *InputY0[2];
	InputY0[0]=(vector unsigned char*)memalign(128,MAXWIDTH); 
	InputY0[1]=(vector unsigned char*)memalign(128,MAXWIDTH); 

	vector unsigned char *InputU0[2];
	InputU0[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); 
	InputU0[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); 
	
	vector unsigned char *InputV0[2];
	InputV0[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); 
	InputV0[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16);

	vector unsigned char *InputY1[2];
	InputY1[0]=(vector unsigned char*)memalign(128,MAXWIDTH); 
	InputY1[1]=(vector unsigned char*)memalign(128,MAXWIDTH); 

	vector unsigned char *InputU1[2];
	InputU1[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); 
	InputU1[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16);

	vector unsigned char *InputV1[2];
	InputV1[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); 
	InputV1[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); 	

	vector unsigned char* Output0[2];
	Output0[0]=(vector unsigned char*)memalign(128,MAXWIDTH*4);	// 1line output
	Output0[1]=(vector unsigned char*)memalign(128,MAXWIDTH*4);	// 1line output

	vector unsigned char* Output1[2];
	Output1[0]=(vector unsigned char*)memalign(128,MAXWIDTH*4);	// 1line output
	Output1[1]=(vector unsigned char*)memalign(128,MAXWIDTH*4);	// 1line output
	

	
	while (msg!=STOP) 
	{
		int h=0;
		int i;
		
		if (first)
		{
			crblock0=(iargs->srcW>>1)&~15; // rounded down
			crblock1=((iargs->srcW>>1) + 15)&~15; //rounded up
			crblockdst1=((iargs->dstW>>1) + 15)&~15;//destination size rounded up.
			crblockdst0=((iargs->dstW>>1) + 7)&~7;//destination size rounded up.

			
			initHFilter(iargs->srcW,iargs->srcH,iargs->dstH,hfilterpos0,hfilterpos1,weightHfilter,dmapos,dmacromapos);
// 			printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[1]]/16.0,dmacromapos[hfilterpos1[0]]/16.0);
// 			printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[1]]/16.0,dmacromapos[hfilterpos1[1]]/16.0);
// 			
// 			for (i=0;i < iargs->dstH>>1;i++)
// 			{
//  			//	printf("Hfilterpos0 dst: %d, src:%d, weight:%f\n",i,hfilterpos0[i],weightHfilter[i]);
//  			//	printf("Hfilterpos1 dst: %d, src:%d, weight:%f\n",i,hfilterpos1[i],1.0-weightHfilter[i]);
// 				printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[2*i+2]]/16.0,dmacromapos[hfilterpos1[2*i+2]]/16.0);
// 				printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[2*i+3]]/16.0,dmacromapos[hfilterpos1[2*i+3]]/16.0);
// 			}
			
			if ((iargs->srcW==iargs->dstW)&&(iargs->srcH==iargs->dstH))
			{
				
				printf("spu_yuv2argb_scaler: No scaling proceeding with direct csc\n");
				noscale=1;
				if ((iargs->srcW%32) != 0)
				{
					srcsmallcroma=1;
					sc.smallcroma=1;
				}
				
			} else {
				
			
				noscale=0;
				printf("spu_yuv2argb_scaler: Scaling, computing shuffle filters\n");
				initWFilter(iargs->srcW,iargs->dstW,1,wfilterpos,widthfilter0,widthfilter1,weightWfilter0,weightWfilter1);

/*				for (i=0;i < iargs->dstW/4;i++)
				{
					printf("filterpos dst: %d, src:%d\n",i,wfilterpos[i]);
					printcharvec("widthfilter0",widthfilter0[i]);
					printcharvec("widthfilter1",widthfilter1[i]);
					printfvec("weightWfilter0",weightWfilter0[i]);
					printfvec("weightWfilter1",weightWfilter1[i]);
				}*/				

				srcsmallcroma=0;
				sc.smallcroma=0;
				if ((iargs->srcW%32) != 0)
				{
					sc.smallcroma=1;
					srcsmallcroma=1;	
					initWcrFilter(iargs->srcW/2,iargs->dstW/2,1,crwfilterpos,crwidthfilter0,crwidthfilter1);	
					printf("spu_yuv2argb_scaler: Computing Crshuffle filter\n");
	
// 					for (i=0;i < (iargs->dstW>>1)/4;i++)
// 					{
// 						printf("crwfilterpos dst: %d, src:%d, weight:%f\n",i,crwfilterpos[i]);
// 						printcharvec("crwidthfilter0",crwidthfilter0[i]);
// 						printcharvec("crwidthfilter1",crwidthfilter1[i]);
// 						printfvec("weightWfilter0",weightWfilter0[i]);
// 						printfvec("weightWfilter1",weightWfilter1[i]);
// 					
// 					}
							
				}
				
				sc.wWfilter0=weightWfilter0;
				sc.wWfilter1=weightWfilter1;
				sc.wfilterpos=wfilterpos;
				sc.sWfilter0=widthfilter0; 
				sc.sWfilter1=widthfilter1;
				sc.crsWfilter0=crwidthfilter0;
				sc.crsWfilter1=crwidthfilter1;
				sc.crfilterpos=crwfilterpos;

				sc.smallcromaline0=0;
				sc.smallcromaline1=0;
				
			}
			first=0;
			printf("spu_yuv2argb_scaler: Initiation completed\n");
		}

	
		YIp = iargs->Ystart[selIn];
		UIp = iargs->Ustart[selIn];
		VIp = iargs->Vstart[selIn];
		Op = iargs->Output[selOut] + iargs->offset*4;

		
		LineSelOut=0;
		selY0In=0; 
		selY1In=0;
		selCrIn=0;

	
		dmaGet(InputY0[0],YIp+dmapos[hfilterpos0[0]],iargs->srcW,tgiy0[0]); 
		dmaGet(InputY1[0],YIp+dmapos[hfilterpos1[0]],iargs->srcW,tgiy1[0]); 
		dmaGet(InputY0[1],YIp+dmapos[hfilterpos0[1]],iargs->srcW,tgiy0[1]); 
		dmaGet(InputY1[1],YIp+dmapos[hfilterpos1[1]],iargs->srcW,tgiy1[1]); 


		dmaGet(InputU0[0],UIp+dmacromapos[hfilterpos0[0]],crblock1,tgiu0[0]);
		dmaGet(InputU0[1],UIp+dmacromapos[hfilterpos0[1]],crblock1,tgiu0[1]);
		dmaGet(InputU1[0],UIp+dmacromapos[hfilterpos1[0]],crblock1,tgiu1[0]);	
		dmaGet(InputU1[1],UIp+dmacromapos[hfilterpos1[1]],crblock1,tgiu1[1]); 
// 
		dmaGet(InputV0[0],VIp+dmacromapos[hfilterpos0[0]],crblock1,tgiv0[0]);
		dmaGet(InputV0[1],VIp+dmacromapos[hfilterpos0[1]],crblock1,tgiv0[1]);
		dmaGet(InputV1[0],VIp+dmacromapos[hfilterpos1[0]],crblock1,tgiv1[0]);	
		dmaGet(InputV1[1],VIp+dmacromapos[hfilterpos1[1]],crblock1,tgiv1[1]);


		LineSelOut=0;
		selY0In=0; 
		selY1In=0;
		selCrIn=0;
	//	printf("New image\n");
		for (h=0; h < iargs->dstH>>1; h++) //we asume that output is allways h/2
		{

			sc.width=iargs->dstW;
			sc.smallcroma=0;
			sc.smallcromaline0=0;
			sc.smallcromaline1=0;

			sc.wHfilter=weightHfilter[2*h];
			dmaWaitTag(tgiy0[selY0In]);
		//	printf("dma: %d\n",2*h+2);
			dmaWaitTag(tgiy1[selY1In]);
		//	printf("dma: %d\n",2*h+2);
			sc.source00=InputY0[selY0In];
			sc.source01=InputY1[selY1In];
			sc.Output=Ytemp0;
			
			if (noscale) {
				unpack(&sc);
			} else {
				scale(&sc);	
			}	
								//first Y line scaled
			dmaGet(InputY0[selY0In],YIp+dmapos[hfilterpos0[2*h+2]],iargs->srcW,tgiy0[selY0In]); 
		//	printf("dma: %d\n",2*h+2);
			if (!noscale) { //if we are scaling we also need the second line
				dmaGet(InputY1[selY1In],YIp+dmapos[hfilterpos1[2*h+2]],iargs->srcW,tgiy1[selY1In]); 
			}
		//	printf("dma: %d\n",2*h+2);
			selY0In=selY0In^1;
			selY1In=selY1In^1;
			

			sc.wHfilter=weightHfilter[2*h+1];
			dmaWaitTag(tgiy0[selY0In]);
			dmaWaitTag(tgiy1[selY0In]);
			sc.source00=InputY0[selY0In];
			sc.source01=InputY1[selY0In];
			sc.Output=Ytemp1;
			if (noscale) {
				unpack(&sc);
			} else {
				scale(&sc);	
			}								//second Y line scaled
			dmaGet(InputY0[selY0In],YIp+dmapos[hfilterpos0[2*h+3]],iargs->srcW,tgiy0[selY0In]); 
			if(!noscale) { //if we are scaling we also need the second line
				dmaGet(InputY1[selY1In],YIp+dmapos[hfilterpos1[2*h+3]],iargs->srcW,tgiy1[selY1In]); 
		
			}
			selY0In=selY0In^1;
			selY1In=selY1In^1;
		//	printf("dma: %d\n",2*h+3);
			if (srcsmallcroma) //these settings applly for both U and V
			{	
				sc.smallcroma=1;
				if ((hfilterpos0[h]&1)==1) {
					sc.smallcromaline0=1;	
				} else {
					sc.smallcromaline0=0;
				}
				if ((hfilterpos1[h]&1)==1){
					sc.smallcromaline1=1;
				} else {
					sc.smallcromaline1=0;
				} 	
				if (((hfilterpos0[h]&1)==0)&&((hfilterpos1[h]&1)==0))
				{
					sc.smallcroma=0; //both lines are 128 bit alligned only when doing extreme downscaling can this happen
				}
			}
// 			if (noscale) {
// 				sc.width=crblockdst0;//crblockdst1;
// 			} else {
// 				sc.width=crblockdst0;
// 			}
			sc.width=iargs->dstW>>1;
			sc.wHfilter=weightHfilter[h];
			
	
			dmaWaitTag(tgiu0[selCrIn]);
			dmaWaitTag(tgiu1[selCrIn]);
			sc.Output=Utemp;
			sc.source00=InputU0[selCrIn];
			sc.source01=InputU1[selCrIn];
		
			if (noscale) {
				unpack(&sc);
			} else {
				scale(&sc);	
			}

			dmaWaitTag(tgiv0[selCrIn]);
			dmaWaitTag(tgiv1[selCrIn]);
			sc.Output=Vtemp;
			sc.source00=InputV0[selCrIn];
			sc.source01=InputV1[selCrIn];
			
			if (noscale) {
				unpack(&sc);
			} else {
				scale(&sc);	
			}

			dmaGet(InputV0[selCrIn],VIp+dmacromapos[hfilterpos0[h+2]],crblock1,tgiu0[selCrIn]); 		//this is allways pos 0 
			dmaGet(InputU0[selCrIn],UIp+dmacromapos[hfilterpos0[h+2]],crblock1,tgiv0[selCrIn]); 

			if(!noscale) {	//if we are scaling we also need the second line
				dmaGet(InputV1[selCrIn],VIp+dmacromapos[hfilterpos1[h+2]],crblock1,tgiu1[selCrIn]);
				dmaGet(InputU1[selCrIn],UIp+dmacromapos[hfilterpos1[h+2]],crblock1,tgiv1[selCrIn]); 
			} 

			selCrIn=selCrIn^1;
			dmaWaitTag(tgo0[LineSelOut]);
			dmaWaitTag(tgo1[LineSelOut]);
							
			yuv420toARGBfloat(Ytemp0,Ytemp1,Utemp,Vtemp,Output0[LineSelOut],Output1[LineSelOut],iargs->dstW,iargs->maxwidth); //colorspace convert results
			
			dmaPut(Output0[LineSelOut],Op,iargs->dstW*4,tgo0[LineSelOut]);
			Op=Op+iargs->maxwidth*4;
			
			dmaPut(Output1[LineSelOut],Op,iargs->dstW*4,tgo1[LineSelOut]);
			Op=Op+iargs->maxwidth*4;
			
			LineSelOut=LineSelOut^1;
		} 
		dmaWaitTag(tgo0[LineSelOut^1]); //wait for last write.
		dmaWaitTag(tgo1[LineSelOut^1]); //wait for last write.

	//	printf("Image done\n");
		if (iargs->MessageForm==INTR)
		{
			while (spu_stat_out_intr_mbox() == 0);
			msg=RDY;
			spu_writech(SPU_WrOutIntrMbox, msg);
			waiting=1;
		}

		if (iargs->MessageForm==HARD)
		{
			while (spu_stat_out_mbox() == 0);
			msg=RDY;
			spu_write_out_mbox(msg);
			waiting=1;
		}
//  		fprintf(stderr,"spu_yuvscaler: Waiting\n");		
		while (waiting){
			
			while (spu_stat_in_mbox() == 0);
			msg=spu_read_in_mbox();
			
			if (msg == RUN){
				selOut = selOut ^ 1; // flips the output buffer pointers
				selIn = selIn ^ 1; // flips the input buffer pointers	
				waiting=0;
			}
			else if (msg == STOP)
			{
// 				fprintf(stderr,"spu_yuvscaler: Stopping\n");
				waiting=0;
			}
			else if (msg == UPDATE)
			{
// 				fprintf(stderr,"spu_yuvscaler: Update\n");
				dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); //getting neccesary data to process the new image	
				first=1; // update filters to reflect the new image!
			//	selOut=0; // no need to change these. that can be done by the run.
			//	selIn=0;
			}
		}
		
		
		

	}
	
	return 0;
}
Ejemplo n.º 6
0
static void barrier(void) {
  spu_write_out_mbox(0);
  spu_read_in_mbox();
}
Ejemplo n.º 7
0
int
main(
    unsigned long long spe_id,
    unsigned long long ppu_vector_a,
    unsigned long long ppu_vector_b)
{
    int i, iter, buf_idx, vec_idx;
    unsigned long long ppu_vector_bases[2] _ALIG(128);
    vector float * pchunk_a, * pchunk_b;
    vector float g_vec = {0,0,0,0};

    ppu_vector_bases[0] = ppu_vector_a;
    ppu_vector_bases[1] = ppu_vector_b;

    const unsigned int spu_num = spu_read_in_mbox();
    unsigned long long get_edge_bytes = spu_num * SUBVEC_SZ_BYTES;

    float buffers[NBUFFERS * BUF_SZ_FLOATS] _ALIG(128);
    int buffer_tags[NBUFFERS][2] _ALIG(128);
    //int buffer_tags[NBUFFERS];

    for (iter = 0; iter < NBUFFERS; ++iter) {
        buffer_tags[iter][0] = mfc_tag_reserve();
        buffer_tags[iter][1] = mfc_tag_reserve();
    }

    // first mfc_get for all
    for (buf_idx = 0; buf_idx < NBUFFERS; ++buf_idx) {
        for (vec_idx = 0; vec_idx < 2; ++vec_idx) {
            mfc_get(buf_ptr_float(buffers, buf_idx, vec_idx),
                    ppu_vector_bases[vec_idx] + get_edge_bytes,
                    CHUNK_SZ_BYTES,
                    buffer_tags[buf_idx][vec_idx],
                    0, 0);
        }
    }
    get_edge_bytes += CHUNK_SZ_BYTES;

    //printf("subvec_sz-chunks: %d\n", SUBVEC_SZ_CHUNKS);
    //printf("%d==%d\n", MAXITER*NBUFFERS*CHUNK_SZ_FLOATS, SUBVEC_SZ_FLOATS);
    int chunksleft = SUBVEC_SZ_CHUNKS;
    while(chunksleft!=0) {
        for (buf_idx = 0; chunksleft !=0 && buf_idx < NBUFFERS; ++buf_idx) {
            const int tag_mask = (1 << buffer_tags[buf_idx][0])
                                 | (1 << buffer_tags[buf_idx][1]);

            mfc_write_tag_mask(tag_mask);
            mfc_read_tag_status_all();

            pchunk_a = buf_ptr_vecfloat(buffers, buf_idx, 0);
            pchunk_b = buf_ptr_vecfloat(buffers, buf_idx, 1);

            for (i = 0; i < CHUNK_SZ_FLOATVECS; ++i) {
                g_vec = spu_madd(pchunk_a[i], pchunk_b[i], g_vec);
            }

            // move this mfc_get to end of loop, check get_edge_bytes variable dynamics
            if (likely(iter != MAXITER - 1)) {
                for (vec_idx = 0; vec_idx < 2; ++vec_idx) {
                    mfc_get(buf_ptr_float(buffers, buf_idx, vec_idx),
                            ppu_vector_bases[vec_idx] + get_edge_bytes,
                            CHUNK_SZ_BYTES,
                            buffer_tags[buf_idx][vec_idx],
                            0, 0);
                }
            }
            get_edge_bytes += CHUNK_SZ_BYTES;
            --chunksleft;
        }
    }

    for (iter = 0; iter < NBUFFERS; ++iter) {
        mfc_tag_release(buffer_tags[iter][0]);
        mfc_tag_release(buffer_tags[iter][1]);
    }

    float_uint_t retval;
    retval.f =
        spu_extract(g_vec, 0) +
        spu_extract(g_vec, 1) +
        spu_extract(g_vec, 2) +
        spu_extract(g_vec, 3);

    //printf("retval: %f\n", retval.f);
    spu_write_out_mbox(retval.i);

    return 0;
}
Ejemplo n.º 8
0
void setup_spu(unsigned int spu_ctrlblock_addr){
  ctrl_dma_tag = mfc_tag_reserve();

  // Get SPU control block
  mfc_get(&spu_ctrlblock,
	  spu_ctrlblock_addr,
	  sizeof(spu_ctrlblock),
	  ctrl_dma_tag,
	  0,0);

  mfc_write_tag_mask(1<<ctrl_dma_tag);
  mfc_read_tag_status_all();

  mcb = (merger_ctrlblock_t*)memalign(128,spu_ctrlblock.num_mergers * sizeof(merger_ctrlblock_t) );
  md = (merger_data_t*)malloc(spu_ctrlblock.num_mergers * sizeof(merger_data_t));

  // Set addresses
  int i;
  for(i = 0; i < spu_ctrlblock.num_mergers; i++){
    // Set head/tail vector addresses
    mcb[i].idx_addr[LEFT] = (unsigned int) &md[i].idx[LEFT][HEAD];
    mcb[i].idx_addr[RIGHT] = (unsigned int) &md[i].idx[RIGHT][HEAD];
    mcb[i].idx_addr[OUT] = (unsigned int) &md[i].idx[PARENT][TAIL];
  }

  // Send merger control blocks
  mfc_put(mcb,
	  spu_ctrlblock.ctrlblocks_addr,
	  spu_ctrlblock.num_mergers * sizeof(merger_ctrlblock_t),
	  ctrl_dma_tag,
	  0,0);

  mfc_read_tag_status_all();

  // Mail PPU telling it we've set the addresses
  spu_write_out_mbox(1);

  // Wait for go-ahead mail
  spu_read_in_mbox();

  // Get merger blocks
  mfc_get(mcb,
	  spu_ctrlblock.ctrlblocks_addr,
	  spu_ctrlblock.num_mergers * sizeof(merger_ctrlblock_t),
	  ctrl_dma_tag,
	  0,0);

  mfc_read_tag_status_all();

  int buffer_idx = 0;
  for(i = 0; i < spu_ctrlblock.num_mergers; i++){
    // Add start address of buffer array to all block addresses
    if(mcb[i].id != 0)
      mcb[i].block_addr[OUT] += (unsigned int) &buffer[0];

    if(!mcb[i].leaf_node){
      mcb[i].block_addr[LEFT] += (unsigned int) &buffer[0];
      mcb[i].block_addr[RIGHT] += (unsigned int) &buffer[0];
    }

    // Setup merger data
    md[i].held_tag[LEFT] = 32;
    md[i].held_tag[RIGHT] = 32;
    md[i].held_tag[OUT] = 32;
    md[i].num_pulled[LEFT] = 0;
    md[i].num_pulled[RIGHT] = 0;
    md[i].mm_depleted[LEFT] = 0;
    md[i].mm_depleted[RIGHT] = 0;
    md[i].depleted[LEFT] = 0;
    md[i].depleted[RIGHT] = 0;
    md[i].done = 0;
    md[i].consumed[LEFT] = 0;
    md[i].consumed[RIGHT] = 0;

    md[i].idx[LEFT][HEAD] = spu_splats(0);
    md[i].idx[LEFT][TAIL] = spu_splats(0);
    md[i].idx[RIGHT][HEAD] = spu_splats(0);
    md[i].idx[RIGHT][TAIL] = spu_splats(0);
    md[i].idx[OUT][HEAD] = spu_splats(0);
    md[i].idx[OUT][TAIL] = spu_splats(0);
    md[i].idx[PARENT][HEAD] = spu_splats(0);
    md[i].idx[PARENT][TAIL] = spu_splats(0);

    md[i].buffer[LEFT] = &buffer[buffer_idx];
    buffer_idx += mcb[i].buffer_size[LEFT];
    md[i].buffer[RIGHT] = &buffer[buffer_idx];
    buffer_idx += mcb[i].buffer_size[RIGHT];
    md[i].buffer[OUT] = &buffer[buffer_idx];
    buffer_idx += mcb[i].buffer_size[OUT];
  }

  // Setup internal nodes
  for(i = 0; i < spu_ctrlblock.num_mergers; i++){
    if(mcb[i].local[OUT] < 255){
      int parent_idx = mcb[i].local[OUT];
      int side = (mcb[i].id+1)&1;
      md[i].buffer[OUT] = md[parent_idx].buffer[side];
      mcb[i].buffer_size[OUT] = mcb[parent_idx].buffer_size[side];
    }
  }
}