int main(unsigned long long spe_id, unsigned long long program_data_ea,unsigned long long env) { char array[MAX] __attribute__((aligned(128))); int func,dma_count; unsigned int tag = 1,count,k,byte_size,chunk_size, transfered_size,dest_inc; unsigned int count1,add_inc; unsigned long int rep; char arr[MAX]; unsigned long int array_size = 32768; unsigned long int data_size; spu_write_decrementer(0); rep = spu_read_in_mbox(); data_size = spu_read_in_mbox(); func = spu_read_in_mbox(); byte_size = data_size; k = byte_size - MAX; chunk_size = byte_size; mfc_get(array, (unsigned int)program_data_ea, chunk_size, tag, 0, 0); mfc_write_tag_mask(1<<tag); mfc_read_tag_status_any(); for(count = 0; count < rep;count++) for(count1 = 0 ; count1 < chunk_size ; count1++) { arr[count1%array_size] = array[count1]; } return 0; }
int main() { while (1){ int res; ppu_addr_t program_data_ea = spu_read_in_mbox(); program_data_ea += ((ppu_addr_t)spu_read_in_mbox())<<32; res = handleCommand( program_data_ea ); // spu_write_out_mbox( res ); spu_write_out_intr_mbox( res ); } return 0; }
/* Code running on SPU */ int main(unsigned long long spe_id __attribute__ ((unused)), unsigned long long argp __attribute__ ((unused))) { deprintf("[SPU] fb_writer_spu is up... (on SPE #%llu)\n", spe_id); uint32_t ea_mfc, mbox; // send ready message spu_write_out_mbox(SPU_READY); while (1) { /* Check mailbox */ mbox = spu_read_in_mbox(); deprintf("[SPU] Message is %u\n", mbox); switch (mbox) { case SPU_EXIT: deprintf("[SPU] fb_writer goes down...\n"); return 0; case SPU_START: break; default: deprintf("[SPU] Cannot handle message\n"); continue; } /* Tag Manager setup */ unsigned int tags; tags = mfc_multi_tag_reserve(5); if (tags == MFC_TAG_INVALID) { deprintf("[SPU] Failed to reserve mfc tags on fb_writer\n"); return 0; } /* Framebuffer parms */ ea_mfc = spu_read_in_mbox(); deprintf("[SPU] Message on fb_writer is %u\n", ea_mfc); spu_mfcdma32(&parms, (unsigned int)ea_mfc, sizeof(struct fb_writer_parms_t), tags, MFC_GET_CMD); deprintf("[SPU] argp = %u\n", (unsigned int)argp); DMA_WAIT_TAG(tags); /* Copy parms->data to framebuffer */ deprintf("[SPU] Copying to framebuffer started\n"); cpy_to_fb(tags); deprintf("[SPU] Copying to framebuffer done!\n"); mfc_multi_tag_release(tags, 5); deprintf("[SPU] fb_writer_spu... done!\n"); /* Send FIN msg */ spu_write_out_mbox(SPU_FIN); } return 0; }
/* Waits for message from PPU to begin work on the current frame */ static void wait_for_begin(uint32_t *mbox_message) { do { while (spu_stat_in_mbox() <= 0) ; *mbox_message = spu_read_in_mbox(); } while (*mbox_message != BEGIN); }
int main(ull id, ull argp, ull envp) { unsigned int cmd; printf(" [SPU]: Hello World! from %llu, argp = %llu\n", id, argp); /* Wait for the PPU to tell us we can exit. This is necessary to * ensure that the PPU has enough time to go through /syfs */ cmd = spu_read_in_mbox(); if (unlikely(1 != cmd)) printf(" [SPU]: Invalid command received in mailbox\n"); return 0; }
int main(uint64_t speid, uint64_t argp, uint64_t envp){ unsigned int data[NUM_STREAMS]; unsigned int num_spus = (unsigned int)argp, i, num_images; struct image my_image __attribute__ ((aligned(16))); int mode = (int)envp; speid = speid; //get rid of warning while(1){ num_images = 0; for (i = 0; i < NUM_STREAMS / num_spus; i++){ //assume NUM_STREAMS is a multiple of num_spus while(spu_stat_in_mbox() == 0); data[i] = spu_read_in_mbox(); if (!data[i]) return 0; num_images++; } for (i = 0; i < num_images; i++){ mfc_get(&my_image, data[i], sizeof(struct image), MY_TAG, 0, 0); mfc_write_tag_mask(1 << MY_TAG); mfc_read_tag_status_all(); switch(mode){ default: case MODE_SIMPLE: process_image_simple(&my_image); break; case MODE_2LINES: process_image_2lines(&my_image); break; case MODE_DOUBLE: process_image_double(&my_image); break; case MODE_DMALIST: process_image_dmalist(&my_image); break; } } data[0] = DONE; spu_write_out_intr_mbox(data[0]); } return 0; }
int main (unsigned long long speid, unsigned long long argp, unsigned long long envp) { /* Signal to PPU side that it should fork now. */ spu_write_out_intr_mbox (0); /* Wait until fork completed. */ spu_read_in_mbox (); /* Trigger watchpoint. */ var = 1; /* Now call some function to trigger breakpoint. */ func (); return 0; }
int main(unsigned long long spe_id, unsigned long long program_data_ea, unsigned long long env) { spu_write_out_mbox(SPE_BIRTHDAY_INITIALIZED); while (1) { unsigned int msg = spu_read_in_mbox(); switch (msg) { case SPE_BIRTHDAY_START: main2(spe_id, program_data_ea, env); spu_write_out_mbox(SPE_BIRTHDAY_FINISHED); break; case SPE_BIRTHDAY_STARTMOD: main2mod(spe_id, program_data_ea, env); spu_write_out_mbox(SPE_BIRTHDAY_FINISHED); break; case SPE_BIRTHDAY_QUIT: return 0; } } }
int main(ull id, ull argp, ull envp) { unsigned int cmd; mfc_get(&args, argp, sizeof(args), TAG, 0, 0); mfc_write_tag_mask(1 << TAG); mfc_read_tag_status_all(); while (1) { cmd = spu_read_in_mbox(); if (unlikely(SPU2_MSG_PPU_TO_SPU_EXIT == cmd)) break; switch (cmd) { case SPU2_MSG_PPU_TO_SPU_DO_COPY: copy(); break; case SPU2_MSG_PPU_TO_SPU_DO_SCALE: scale(); break; case SPU2_MSG_PPU_TO_SPU_DO_ADD: add(); break; case SPU2_MSG_PPU_TO_SPU_DO_TRIAD: triad(); break; default: fprintf(stderr, " [SPU]: Invalid command received in mailbox\n"); } spu_write_out_mbox(SPU2_MSG_SPU_TO_PPU_DONE); } return 0; }
void terminal_func () { spu_write_out_intr_mbox (0); spu_read_in_mbox (); }
int main(unsigned long long speid) { // read SPU id using mailbox unsigned int spu_id = spu_read_in_mbox(); printf ("\n SPU %llx %d\n",speid,spu_id); /*FILE *fp,*fp1; fp =fopen("abcd","r"); fp1 =fopen("pqrs.sh","w"); char ch[5]; char str[10]; fscanf(fp,"%s",ch); fprintf(fp1,"%s","sndfile-concat "); while(!feof(fp)) { strcpy(str,ch); strcat(str,".wav "); printf("%s",str); fprintf(fp1,"%s",str); strcpy(str,""); fscanf(fp,"%s",ch);} fprintf(fp1,"%s","output.wav"); fclose(fp); fclose(fp1); */ if(spu_id == 0) { char filename[10] = "output"; FILE *fp; strcat(filename,"a"); strcat(filename,".txt"); fp = fopen("outputa.txt","r"); fseek(fp, 0L, SEEK_END); int sz = ftell(fp); fseek(fp, 0L, SEEK_SET); char *str = (char *)malloc(sz); fgets(str, sz, fp); int i; char newstr[strlen(str)]; for(i=0; i<strlen(newstr); i++) { newstr[i]=" "; } for(i=0; i<strlen(str); i++) { newstr[i]=str[i]; if(isdigit((int)str[i])) { newstr[i-1]=str[i]; newstr[i]=' '; } } fclose(fp); char opname[10] = "spe"; strcat(opname,"a"); strcat(opname,".txt"); fp=fopen(opname,"w"); fputs(newstr, fp); fclose(fp); char shname[10] = "DB/spe"; strcat(shname,"a"); strcat(shname,".sh"); FILE *fp2,*fp1; fp2 =fopen(opname,"r"); fp1 =fopen(shname,"w"); char ch[5]; char str2[35]; fscanf(fp2,"%s",ch); fprintf(fp1,"%s","sndfile-concat "); while(!feof(fp2)) { strcpy(str2,"/root/Desktop/tts/DB/"); strcat(str2,ch); strcat(str2,".wav "); fprintf(fp1,"%s",str2); strcpy(str2,""); fscanf(fp2,"%s",ch);} fprintf(fp1,"%s","outputa.wav;"); fclose(fp2); fclose(fp1); } else if (spu_id == 1) { char filename[10] = "output"; FILE *fp; strcat(filename,"b"); strcat(filename,".txt"); fp = fopen("outputb.txt","r"); fseek(fp, 0L, SEEK_END); int sz = ftell(fp); fseek(fp, 0L, SEEK_SET); char *str = (char *)malloc(sz); fgets(str, sz, fp); int i; char newstr[strlen(str)]; for(i=0; i<strlen(newstr); i++) { newstr[i]=" "; } for(i=0; i<strlen(str); i++) { newstr[i]=str[i]; if(isdigit((int)str[i])) { newstr[i-1]=str[i]; newstr[i]=' '; } } fclose(fp); char opname[10] = "spe"; strcat(opname,"b"); strcat(opname,".txt"); fp=fopen(opname,"w"); fputs(newstr, fp); fclose(fp); char shname[10] = "DB/spe"; strcat(shname,"b"); strcat(shname,".sh"); FILE *fp2,*fp1; fp2 =fopen(opname,"r"); fp1 =fopen(shname,"w"); char ch[5]; char str2[35]; fscanf(fp2,"%s",ch); fprintf(fp1,"%s","sndfile-concat "); while(!feof(fp2)) { strcpy(str2,"/root/Desktop/tts/DB/"); strcat(str2,ch); strcat(str2,".wav "); fprintf(fp1,"%s",str2); strcpy(str2,""); fscanf(fp2,"%s",ch);} fprintf(fp1,"%s","outputb.wav;"); fclose(fp2); fclose(fp1); } else if (spu_id == 2) { char filename[10] = "output"; FILE *fp; strcat(filename,"c"); strcat(filename,".txt"); fp = fopen("outputc.txt","r"); fseek(fp, 0L, SEEK_END); int sz = ftell(fp); fseek(fp, 0L, SEEK_SET); char *str = (char *)malloc(sz); fgets(str, sz, fp); int i; char newstr[strlen(str)]; for(i=0; i<strlen(newstr); i++) { newstr[i]=" "; } for(i=0; i<strlen(str); i++) { newstr[i]=str[i]; if(isdigit((int)str[i])) { newstr[i-1]=str[i]; newstr[i]=' '; } } fclose(fp); char opname[10] = "spe"; strcat(opname,"c"); strcat(opname,".txt"); fp=fopen(opname,"w"); fputs(newstr, fp); fclose(fp); char shname[10] = "DB/spe"; strcat(shname,"c"); strcat(shname,".sh"); FILE *fp2,*fp1; fp2 =fopen(opname,"r"); fp1 =fopen(shname,"w"); char ch[5]; char str2[35]; fscanf(fp2,"%s",ch); fprintf(fp1,"%s","sndfile-concat "); while(!feof(fp2)) { strcpy(str2,"/root/Desktop/tts/DB/"); strcat(str2,ch); strcat(str2,".wav "); fprintf(fp1,"%s",str2); strcpy(str2,""); fscanf(fp2,"%s",ch);} fprintf(fp1,"%s","outputc.wav;"); fclose(fp2); fclose(fp1); } else if (spu_id == 3) { char filename[10] = "output"; FILE *fp; strcat(filename,"d"); strcat(filename,".txt"); fp = fopen("outputd.txt","r"); fseek(fp, 0L, SEEK_END); int sz = ftell(fp); fseek(fp, 0L, SEEK_SET); char *str = (char *)malloc(sz); fgets(str, sz, fp); int i; char newstr[strlen(str)]; for(i=0; i<strlen(newstr); i++) { newstr[i]=" "; } for(i=0; i<strlen(str); i++) { newstr[i]=str[i]; if(isdigit((int)str[i])) { newstr[i-1]=str[i]; newstr[i]=' '; } } fclose(fp); char opname[10] = "spe"; strcat(opname,"d"); strcat(opname,".txt"); fp=fopen(opname,"w"); fputs(newstr, fp); fclose(fp); char shname[10] = "DB/spe"; strcat(shname,"d"); strcat(shname,".sh"); FILE *fp2,*fp1; fp2 =fopen(opname,"r"); fp1 =fopen(shname,"w"); char ch[5]; char str2[35]; fscanf(fp2,"%s",ch); fprintf(fp1,"%s","sndfile-concat "); while(!feof(fp2)) { strcpy(str2,"/root/Desktop/tts/DB/"); strcat(str2,ch); strcat(str2,".wav "); fprintf(fp1,"%s",str2); strcpy(str2,""); fscanf(fp2,"%s",ch);} fprintf(fp1,"%s","outputd.wav;"); fclose(fp2); fclose(fp1); } else if (spu_id == 3) { char filename[10] = "output"; FILE *fp; strcat(filename,"d"); strcat(filename,".txt"); fp = fopen("outputd.txt","r"); fseek(fp, 0L, SEEK_END); int sz = ftell(fp); fseek(fp, 0L, SEEK_SET); char *str = (char *)malloc(sz); fgets(str, sz, fp); int i; char newstr[strlen(str)]; for(i=0; i<strlen(newstr); i++) { newstr[i]=" "; } for(i=0; i<strlen(str); i++) { newstr[i]=str[i]; if(isdigit((int)str[i])) { newstr[i-1]=str[i]; newstr[i]=' '; } } fclose(fp); char opname[10] = "spe"; strcat(opname,"d"); strcat(opname,".txt"); fp=fopen(opname,"w"); fputs(newstr, fp); fclose(fp); char shname[10] = "DB/spe"; strcat(shname,"d"); strcat(shname,".sh"); FILE *fp2,*fp1; fp2 =fopen(opname,"r"); fp1 =fopen(shname,"w"); char ch[5]; char str2[35]; fscanf(fp2,"%s",ch); fprintf(fp1,"%s","sndfile-concat "); while(!feof(fp2)) { strcpy(str2,"/root/Desktop/tts/DB/"); strcat(str2,ch); strcat(str2,".wav "); fprintf(fp1,"%s",str2); strcpy(str2,""); fscanf(fp2,"%s",ch);} fprintf(fp1,"%s","outputd.wav;"); fclose(fp2); fclose(fp1); } else if (spu_id == 4) { char filename[10] = "output"; FILE *fp; strcat(filename,"e"); strcat(filename,".txt"); fp = fopen("outpute.txt","r"); fseek(fp, 0L, SEEK_END); int sz = ftell(fp); fseek(fp, 0L, SEEK_SET); char *str = (char *)malloc(sz); fgets(str, sz, fp); int i; char newstr[strlen(str)]; for(i=0; i<strlen(newstr); i++) { newstr[i]=" "; } for(i=0; i<strlen(str); i++) { newstr[i]=str[i]; if(isdigit((int)str[i])) { newstr[i-1]=str[i]; newstr[i]=' '; } } fclose(fp); char opname[10] = "spe"; strcat(opname,"e"); strcat(opname,".txt"); fp=fopen(opname,"w"); fputs(newstr, fp); fclose(fp); char shname[10] = "DB/spe"; strcat(shname,"e"); strcat(shname,".sh"); FILE *fp2,*fp1; fp2 =fopen(opname,"r"); fp1 =fopen(shname,"w"); char ch[5]; char str2[35]; fscanf(fp2,"%s",ch); fprintf(fp1,"%s","sndfile-concat "); while(!feof(fp2)) { strcpy(str2,"/root/Desktop/tts/DB/"); strcat(str2,ch); strcat(str2,".wav "); fprintf(fp1,"%s",str2); strcpy(str2,""); fscanf(fp2,"%s",ch);} fprintf(fp1,"%s","outpute.wav;"); fclose(fp2); fclose(fp1); } else if (spu_id == 5) { char filename[10] = "output"; FILE *fp; strcat(filename,"f"); strcat(filename,".txt"); fp = fopen("outputf.txt","r"); fseek(fp, 0L, SEEK_END); int sz = ftell(fp); fseek(fp, 0L, SEEK_SET); char *str = (char *)malloc(sz); fgets(str, sz, fp); int i; char newstr[strlen(str)]; for(i=0; i<strlen(newstr); i++) { newstr[i]=" "; } for(i=0; i<strlen(str); i++) { newstr[i]=str[i]; if(isdigit((int)str[i])) { newstr[i-1]=str[i]; newstr[i]=' '; } } fclose(fp); char opname[10] = "spe"; strcat(opname,"f"); strcat(opname,".txt"); fp=fopen(opname,"w"); fputs(newstr, fp); fclose(fp); char shname[10] = "DB/spe"; strcat(shname,"f"); strcat(shname,".sh"); FILE *fp2,*fp1; fp2 =fopen(opname,"r"); fp1 =fopen(shname,"w"); char ch[5]; char str2[35]; fscanf(fp2,"%s",ch); fprintf(fp1,"%s","sndfile-concat "); while(!feof(fp2)) { strcpy(str2,"/root/Desktop/tts/DB/"); strcat(str2,ch); strcat(str2,".wav "); fprintf(fp1,"%s",str2); strcpy(str2,""); fscanf(fp2,"%s",ch);} fprintf(fp1,"%s","outputf.wav;"); fclose(fp2); fclose(fp1); } /* int count ; FILE *input, *focusWords, *output, *afocusFile ; char inputName[50],outputName[50]; sprintf(inputName,"/home/10a/Desktop/sample/input%d.txt",spu_id); sprintf(outputName,"/home/10a/Desktop/sample/output%d.txt",spu_id); input = fopen(inputName,"r"); output = fopen(outputName,"w"); printf("\n%s,%s\n",inputName,outputName); focusWords = fopen("/home/10a/Desktop/sample/focus.txt","r"); afocusFile = fopen("/home/10a/Desktop/sample/focus1.txt","r"); char line[1000]; char focus[100][100]; char afocus[100][100]; int i = 0,j,n,m; while(fgets(line,sizeof line, focusWords)!=NULL){ for(j=0;line[j]!='\n';j++) focus[i][j] = line[j]; focus[i++][j] = '\0'; } n = i;i=0; while(fgets(line,sizeof line, afocusFile)!=NULL){ for(j=0;line[j]!='\n';j++) afocus[i][j] = line[j]; afocus[i++][j] = '\0'; } m=i; if(n!=0){ while(fgets(line,sizeof line, input)!=NULL){ count = 0; for(i=0;i<n;i++){ if(strstr(line,focus[i])!=NULL) count++; } if(count>0){ for(i=0;i<m;i++){ if(strstr(line,afocus[i])!=NULL) count++; } fprintf(output,"%d %s",count,line); } } }else{ while(fgets(line,sizeof line, input)!=NULL){ count = 0; for(i=0;i<m;i++){ if(strstr(line,afocus[i])!=NULL) count++; } if(count>0) fprintf(output,"%d %s",count,line); } } */ return 0; }
int main(unsigned long long speid, unsigned long long argp, unsigned long long envp) { int tgiy0[2]; int tgiy1[2]; int tgiu0[2]; int tgiu1[2]; int tgiv0[2]; int tgiv1[2]; int tgo0[2]; int tgo1[2]; tgiu1[0]=1; tgiu1[1]=2; tgo0[0]=3; tgo0[1]=4; tgiy0[0]=5; tgiy0[1]=6; tgiy1[0]=7; tgiy1[1]=8; tgiu0[0]=9; tgiu0[1]=10; tgiv0[0]=11; tgiv0[1]=12; tgiv1[1]=13; tgiv1[1]=14; tgo1[0]=15; tgo1[1]=16; int selOut = 0; int selIn = 0; int tag = 31; int LineSelIn=0; int LineSelOut=0; int selY0In = 0; int selY1In = 0; int selCrIn = 0; struct img_args *iargs; iargs =(struct img_args*)memalign(128,sizeof(*iargs)); unsigned long long Cp; int first=1; int waiting=0; unsigned long long Op; unsigned int msg; unsigned long long YIp,UIp,VIp,YOp; int crblock0; int crblock1; int srcsmallcroma=0; ; int noscale=1; static int crblockdst1; static int crblockdst0; scaler_settings_t sc; while (spu_stat_in_mbox() == 0); msg=spu_read_in_mbox(); if (msg==RUN){ fprintf(stderr,"spu_yuv2argb_scaler: Starting Up\n"); } dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); //getting neccesary data to process image printf("spu_yuv2argb_scaler: SRC width %d,DST width %d\n",iargs->srcW,iargs->dstW); printf("spu_yuv2argb_scaler: SRC height %d,DST height %d\n",iargs->srcH,iargs->dstH); printf("spu_yuv2argb_scaler: DST offset %d\n",iargs->offset); // bad fix for centering image on 1080p) //iargs->offset=(iargs->maxwidth-iargs->dstW)/2 + iargs->maxwidth*(1080-iargs->dstH)/2; vector unsigned char *widthfilter0=(vector unsigned char*)memalign(128,MAXWIDTH*4+16); vector unsigned char *widthfilter1=(vector unsigned char*)memalign(128,MAXWIDTH*4+16); vector unsigned char *crwidthfilter0=(vector unsigned char*)memalign(128,MAXWIDTH*2+16); vector unsigned char *crwidthfilter1=(vector unsigned char*)memalign(128,MAXWIDTH*2+16); vector float * weightWfilter0=(vector float*)memalign(128,MAXWIDTH*4+16); vector float * weightWfilter1=(vector float*)memalign(128,MAXWIDTH*4+16); float weightHfilter[MAXHEIGHT+1]; unsigned long long dmapos[MAXHEIGHT+2]; unsigned long long dmacromapos[MAXHEIGHT+2]; vector float * Ytemp0=(vector float *)memalign(128,MAXWIDTH*4+16); vector float * Ytemp1=(vector float *)memalign(128,MAXWIDTH*4+16); vector float * Utemp=(vector float *)memalign(128,MAXWIDTH*2+16); vector float * Vtemp=(vector float *)memalign(128,MAXWIDTH*2+16); int wfilterpos[MAXWIDTH+2]; int hfilterpos0[MAXHEIGHT+2]; int hfilterpos1[MAXHEIGHT+2]; int crwfilterpos[MAXWIDTH/2+2]; vector unsigned char *InputY0[2]; InputY0[0]=(vector unsigned char*)memalign(128,MAXWIDTH); InputY0[1]=(vector unsigned char*)memalign(128,MAXWIDTH); vector unsigned char *InputU0[2]; InputU0[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); InputU0[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); vector unsigned char *InputV0[2]; InputV0[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); InputV0[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); vector unsigned char *InputY1[2]; InputY1[0]=(vector unsigned char*)memalign(128,MAXWIDTH); InputY1[1]=(vector unsigned char*)memalign(128,MAXWIDTH); vector unsigned char *InputU1[2]; InputU1[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); InputU1[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); vector unsigned char *InputV1[2]; InputV1[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); InputV1[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); vector unsigned char* Output0[2]; Output0[0]=(vector unsigned char*)memalign(128,MAXWIDTH*4); // 1line output Output0[1]=(vector unsigned char*)memalign(128,MAXWIDTH*4); // 1line output vector unsigned char* Output1[2]; Output1[0]=(vector unsigned char*)memalign(128,MAXWIDTH*4); // 1line output Output1[1]=(vector unsigned char*)memalign(128,MAXWIDTH*4); // 1line output while (msg!=STOP) { int h=0; int i; if (first) { crblock0=(iargs->srcW>>1)&~15; // rounded down crblock1=((iargs->srcW>>1) + 15)&~15; //rounded up crblockdst1=((iargs->dstW>>1) + 15)&~15;//destination size rounded up. crblockdst0=((iargs->dstW>>1) + 7)&~7;//destination size rounded up. initHFilter(iargs->srcW,iargs->srcH,iargs->dstH,hfilterpos0,hfilterpos1,weightHfilter,dmapos,dmacromapos); // printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[1]]/16.0,dmacromapos[hfilterpos1[0]]/16.0); // printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[1]]/16.0,dmacromapos[hfilterpos1[1]]/16.0); // // for (i=0;i < iargs->dstH>>1;i++) // { // // printf("Hfilterpos0 dst: %d, src:%d, weight:%f\n",i,hfilterpos0[i],weightHfilter[i]); // // printf("Hfilterpos1 dst: %d, src:%d, weight:%f\n",i,hfilterpos1[i],1.0-weightHfilter[i]); // printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[2*i+2]]/16.0,dmacromapos[hfilterpos1[2*i+2]]/16.0); // printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[2*i+3]]/16.0,dmacromapos[hfilterpos1[2*i+3]]/16.0); // } if ((iargs->srcW==iargs->dstW)&&(iargs->srcH==iargs->dstH)) { printf("spu_yuv2argb_scaler: No scaling proceeding with direct csc\n"); noscale=1; if ((iargs->srcW%32) != 0) { srcsmallcroma=1; sc.smallcroma=1; } } else { noscale=0; printf("spu_yuv2argb_scaler: Scaling, computing shuffle filters\n"); initWFilter(iargs->srcW,iargs->dstW,1,wfilterpos,widthfilter0,widthfilter1,weightWfilter0,weightWfilter1); /* for (i=0;i < iargs->dstW/4;i++) { printf("filterpos dst: %d, src:%d\n",i,wfilterpos[i]); printcharvec("widthfilter0",widthfilter0[i]); printcharvec("widthfilter1",widthfilter1[i]); printfvec("weightWfilter0",weightWfilter0[i]); printfvec("weightWfilter1",weightWfilter1[i]); }*/ srcsmallcroma=0; sc.smallcroma=0; if ((iargs->srcW%32) != 0) { sc.smallcroma=1; srcsmallcroma=1; initWcrFilter(iargs->srcW/2,iargs->dstW/2,1,crwfilterpos,crwidthfilter0,crwidthfilter1); printf("spu_yuv2argb_scaler: Computing Crshuffle filter\n"); // for (i=0;i < (iargs->dstW>>1)/4;i++) // { // printf("crwfilterpos dst: %d, src:%d, weight:%f\n",i,crwfilterpos[i]); // printcharvec("crwidthfilter0",crwidthfilter0[i]); // printcharvec("crwidthfilter1",crwidthfilter1[i]); // printfvec("weightWfilter0",weightWfilter0[i]); // printfvec("weightWfilter1",weightWfilter1[i]); // // } } sc.wWfilter0=weightWfilter0; sc.wWfilter1=weightWfilter1; sc.wfilterpos=wfilterpos; sc.sWfilter0=widthfilter0; sc.sWfilter1=widthfilter1; sc.crsWfilter0=crwidthfilter0; sc.crsWfilter1=crwidthfilter1; sc.crfilterpos=crwfilterpos; sc.smallcromaline0=0; sc.smallcromaline1=0; } first=0; printf("spu_yuv2argb_scaler: Initiation completed\n"); } YIp = iargs->Ystart[selIn]; UIp = iargs->Ustart[selIn]; VIp = iargs->Vstart[selIn]; Op = iargs->Output[selOut] + iargs->offset*4; LineSelOut=0; selY0In=0; selY1In=0; selCrIn=0; dmaGet(InputY0[0],YIp+dmapos[hfilterpos0[0]],iargs->srcW,tgiy0[0]); dmaGet(InputY1[0],YIp+dmapos[hfilterpos1[0]],iargs->srcW,tgiy1[0]); dmaGet(InputY0[1],YIp+dmapos[hfilterpos0[1]],iargs->srcW,tgiy0[1]); dmaGet(InputY1[1],YIp+dmapos[hfilterpos1[1]],iargs->srcW,tgiy1[1]); dmaGet(InputU0[0],UIp+dmacromapos[hfilterpos0[0]],crblock1,tgiu0[0]); dmaGet(InputU0[1],UIp+dmacromapos[hfilterpos0[1]],crblock1,tgiu0[1]); dmaGet(InputU1[0],UIp+dmacromapos[hfilterpos1[0]],crblock1,tgiu1[0]); dmaGet(InputU1[1],UIp+dmacromapos[hfilterpos1[1]],crblock1,tgiu1[1]); // dmaGet(InputV0[0],VIp+dmacromapos[hfilterpos0[0]],crblock1,tgiv0[0]); dmaGet(InputV0[1],VIp+dmacromapos[hfilterpos0[1]],crblock1,tgiv0[1]); dmaGet(InputV1[0],VIp+dmacromapos[hfilterpos1[0]],crblock1,tgiv1[0]); dmaGet(InputV1[1],VIp+dmacromapos[hfilterpos1[1]],crblock1,tgiv1[1]); LineSelOut=0; selY0In=0; selY1In=0; selCrIn=0; // printf("New image\n"); for (h=0; h < iargs->dstH>>1; h++) //we asume that output is allways h/2 { sc.width=iargs->dstW; sc.smallcroma=0; sc.smallcromaline0=0; sc.smallcromaline1=0; sc.wHfilter=weightHfilter[2*h]; dmaWaitTag(tgiy0[selY0In]); // printf("dma: %d\n",2*h+2); dmaWaitTag(tgiy1[selY1In]); // printf("dma: %d\n",2*h+2); sc.source00=InputY0[selY0In]; sc.source01=InputY1[selY1In]; sc.Output=Ytemp0; if (noscale) { unpack(&sc); } else { scale(&sc); } //first Y line scaled dmaGet(InputY0[selY0In],YIp+dmapos[hfilterpos0[2*h+2]],iargs->srcW,tgiy0[selY0In]); // printf("dma: %d\n",2*h+2); if (!noscale) { //if we are scaling we also need the second line dmaGet(InputY1[selY1In],YIp+dmapos[hfilterpos1[2*h+2]],iargs->srcW,tgiy1[selY1In]); } // printf("dma: %d\n",2*h+2); selY0In=selY0In^1; selY1In=selY1In^1; sc.wHfilter=weightHfilter[2*h+1]; dmaWaitTag(tgiy0[selY0In]); dmaWaitTag(tgiy1[selY0In]); sc.source00=InputY0[selY0In]; sc.source01=InputY1[selY0In]; sc.Output=Ytemp1; if (noscale) { unpack(&sc); } else { scale(&sc); } //second Y line scaled dmaGet(InputY0[selY0In],YIp+dmapos[hfilterpos0[2*h+3]],iargs->srcW,tgiy0[selY0In]); if(!noscale) { //if we are scaling we also need the second line dmaGet(InputY1[selY1In],YIp+dmapos[hfilterpos1[2*h+3]],iargs->srcW,tgiy1[selY1In]); } selY0In=selY0In^1; selY1In=selY1In^1; // printf("dma: %d\n",2*h+3); if (srcsmallcroma) //these settings applly for both U and V { sc.smallcroma=1; if ((hfilterpos0[h]&1)==1) { sc.smallcromaline0=1; } else { sc.smallcromaline0=0; } if ((hfilterpos1[h]&1)==1){ sc.smallcromaline1=1; } else { sc.smallcromaline1=0; } if (((hfilterpos0[h]&1)==0)&&((hfilterpos1[h]&1)==0)) { sc.smallcroma=0; //both lines are 128 bit alligned only when doing extreme downscaling can this happen } } // if (noscale) { // sc.width=crblockdst0;//crblockdst1; // } else { // sc.width=crblockdst0; // } sc.width=iargs->dstW>>1; sc.wHfilter=weightHfilter[h]; dmaWaitTag(tgiu0[selCrIn]); dmaWaitTag(tgiu1[selCrIn]); sc.Output=Utemp; sc.source00=InputU0[selCrIn]; sc.source01=InputU1[selCrIn]; if (noscale) { unpack(&sc); } else { scale(&sc); } dmaWaitTag(tgiv0[selCrIn]); dmaWaitTag(tgiv1[selCrIn]); sc.Output=Vtemp; sc.source00=InputV0[selCrIn]; sc.source01=InputV1[selCrIn]; if (noscale) { unpack(&sc); } else { scale(&sc); } dmaGet(InputV0[selCrIn],VIp+dmacromapos[hfilterpos0[h+2]],crblock1,tgiu0[selCrIn]); //this is allways pos 0 dmaGet(InputU0[selCrIn],UIp+dmacromapos[hfilterpos0[h+2]],crblock1,tgiv0[selCrIn]); if(!noscale) { //if we are scaling we also need the second line dmaGet(InputV1[selCrIn],VIp+dmacromapos[hfilterpos1[h+2]],crblock1,tgiu1[selCrIn]); dmaGet(InputU1[selCrIn],UIp+dmacromapos[hfilterpos1[h+2]],crblock1,tgiv1[selCrIn]); } selCrIn=selCrIn^1; dmaWaitTag(tgo0[LineSelOut]); dmaWaitTag(tgo1[LineSelOut]); yuv420toARGBfloat(Ytemp0,Ytemp1,Utemp,Vtemp,Output0[LineSelOut],Output1[LineSelOut],iargs->dstW,iargs->maxwidth); //colorspace convert results dmaPut(Output0[LineSelOut],Op,iargs->dstW*4,tgo0[LineSelOut]); Op=Op+iargs->maxwidth*4; dmaPut(Output1[LineSelOut],Op,iargs->dstW*4,tgo1[LineSelOut]); Op=Op+iargs->maxwidth*4; LineSelOut=LineSelOut^1; } dmaWaitTag(tgo0[LineSelOut^1]); //wait for last write. dmaWaitTag(tgo1[LineSelOut^1]); //wait for last write. // printf("Image done\n"); if (iargs->MessageForm==INTR) { while (spu_stat_out_intr_mbox() == 0); msg=RDY; spu_writech(SPU_WrOutIntrMbox, msg); waiting=1; } if (iargs->MessageForm==HARD) { while (spu_stat_out_mbox() == 0); msg=RDY; spu_write_out_mbox(msg); waiting=1; } // fprintf(stderr,"spu_yuvscaler: Waiting\n"); while (waiting){ while (spu_stat_in_mbox() == 0); msg=spu_read_in_mbox(); if (msg == RUN){ selOut = selOut ^ 1; // flips the output buffer pointers selIn = selIn ^ 1; // flips the input buffer pointers waiting=0; } else if (msg == STOP) { // fprintf(stderr,"spu_yuvscaler: Stopping\n"); waiting=0; } else if (msg == UPDATE) { // fprintf(stderr,"spu_yuvscaler: Update\n"); dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); //getting neccesary data to process the new image first=1; // update filters to reflect the new image! // selOut=0; // no need to change these. that can be done by the run. // selIn=0; } } } return 0; }
static void barrier(void) { spu_write_out_mbox(0); spu_read_in_mbox(); }
int main(unsigned long long speid, unsigned long long argp, unsigned long long envp) { int tgi0[2]; int tgo0[2]; int tgio0[2]; tgi0[0]=1; tgi0[1]=2; tgio0[0]=11; tgio0[1]=12; tgo0[0]=13; tgo0[1]=14; /* tgo1[0]=15; tgo1[1]=16;*/ int selOut = 0; int selIn = 0; int msg=RUN; int waiting=0; int tag = 31; struct img_args *iargs; iargs =(struct img_args*)memalign(128,sizeof(*iargs)); dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); printf("spu_blit_yuv422_to_argb: SRC width %d,DST width %d\n",iargs->src_w,iargs->drw_w); printf("spu_blit_yuv422_to_argb: SRC height %d,DST height %d\n",iargs->src_h,iargs->drw_h); while (spu_stat_in_mbox() == 0); msg=spu_read_in_mbox(); // first=0; vector unsigned char *InOutbuffer[2]; vector unsigned char *Inbuffer[2]; vector unsigned char *Outbuffer[2]; int Outwidth=(4*iargs->drw_w+3)&~3; int Inwidth=(2*iargs->src_w+7)&~7; Inbuffer[0]=(vector unsigned char*)memalign(128,Inwidth); Inbuffer[1]=(vector unsigned char*)memalign(128,Inwidth); if (iargs->BLEND) { InOutbuffer[0]=(vector unsigned char*)memalign(128,Outwidth); InOutbuffer[1]=(vector unsigned char*)memalign(128,Outwidth); } Outbuffer[0]=(vector unsigned char*)memalign(128,Outwidth); Outbuffer[1]=(vector unsigned char*)memalign(128,Outwidth); unsigned long long Inp,Outp,InOutp; int i=0; // int update=1; while (msg!=STOP) { selOut = 0; selIn = 0; Inp=iargs->Inp0[0]; InOutp=iargs->Outp0[0]; Outp=iargs->Outp0[0]; dmaGet(Inbuffer[0],Inp,Inwidth,tgi0[0]); Inp=Inp+iargs->Istride[0]*2; dmaGet(Inbuffer[1],Inp,Inwidth,tgi0[1]); Inp=Inp+iargs->Istride[0]*2; // if (iargs->BLEND) // { // dmaGet(InOutbuffer[0],InOutp,Outwidth,tgio0[0]); // InOutp=InOutp+iargs->Ostride[0]*4; // dmaGet(InOutbuffer[1],InOutp,Outwidth,tgio0[1]); // InOutp=InOutp+iargs->Ostride[0]*4; // } selIn=0; selOut=0; for (i=0;i < iargs->drw_h ;i++) { dmaWaitTag(tgi0[selIn]); // if (iargs->BLEND) // dmaWaitTag(tgio0[selIn]); dmaWaitTag(tgo0[selOut]); if (iargs->SourceFormat==YUY2||iargs->SourceFormat==YUYV422) { yuv422_to_argb(Inbuffer[selIn],Outbuffer[selOut],iargs->drw_w); // printf("spe_blitter: YUV422->ARGB\n"); } //yuv420_to_yuv2(Yinbuffer[selIn],Uinbuffer[selIn],Vinbuffer[selIn],Outbuffer[selOut],iargs->Istride[0]); // if (iargs->BLEND) // blend(InOutbuffer[selIn],OutBuffer[selOut],iargs->ALPHA,iargs->SourceFormat); dmaPut(Outbuffer[selOut],Outp,Outwidth,tgo0[selOut]); // if (iargs->BLEND){ // dmaGet(InOutbuffer[selIn],InOutp,Outwidth,tgio0[selIn]); // InOutp=InOutp+iargs->Ostride[0]; // // } dmaGet(Inbuffer[selIn],Inp,Inwidth,tgi0[selIn]); Inp=Inp+iargs->Istride[0]*2; Outp=Outp+iargs->Ostride[0]*4; selIn=selIn^1; selOut=selOut^1; } while (spu_stat_out_intr_mbox() == 0); msg=RDY; spu_writech(SPU_WrOutIntrMbox, msg); waiting=1; while (waiting){ while (spu_stat_in_mbox() == 0); msg=spu_read_in_mbox(); if (msg == RUN){ waiting=0; } else if (msg == STOP) { waiting=0; } else if (msg == UPDATE) { tag=30; dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); //getting neccesary data to process the new image // // update=1; // update filters to reflect the new image! // Outwidth=(iargs->drw_w+3)&~3; // Inwidth=(iargs->src_w+7)&~7; // free(Inbuffer[0]); // free(Inbuffer[1]); // // free(Outbuffer[0]); // free(Outbuffer[1]); // // Inbuffer[0]=(vector unsigned char*)memalign(128,Inwidth); // Inbuffer[1]=(vector unsigned char*)memalign(128,Inwidth); // // if (iargs->BLEND) // { // free(InOutbuffer[0]); // free(InOutbuffer[1]); // InOutbuffer[0]=(vector unsigned char*)memalign(128,Outwidth); // InOutbuffer[1]=(vector unsigned char*)memalign(128,Outwidth); // } // // Outbuffer[0]=(vector unsigned char*)memalign(128,Outwidth); // Outbuffer[1]=(vector unsigned char*)memalign(128,Outwidth); } } } return 0; }
int main( unsigned long long spe_id, unsigned long long ppu_vector_a, unsigned long long ppu_vector_b) { int i, iter, buf_idx, vec_idx; unsigned long long ppu_vector_bases[2] _ALIG(128); vector float * pchunk_a, * pchunk_b; vector float g_vec = {0,0,0,0}; ppu_vector_bases[0] = ppu_vector_a; ppu_vector_bases[1] = ppu_vector_b; const unsigned int spu_num = spu_read_in_mbox(); unsigned long long get_edge_bytes = spu_num * SUBVEC_SZ_BYTES; float buffers[NBUFFERS * BUF_SZ_FLOATS] _ALIG(128); int buffer_tags[NBUFFERS][2] _ALIG(128); //int buffer_tags[NBUFFERS]; for (iter = 0; iter < NBUFFERS; ++iter) { buffer_tags[iter][0] = mfc_tag_reserve(); buffer_tags[iter][1] = mfc_tag_reserve(); } // first mfc_get for all for (buf_idx = 0; buf_idx < NBUFFERS; ++buf_idx) { for (vec_idx = 0; vec_idx < 2; ++vec_idx) { mfc_get(buf_ptr_float(buffers, buf_idx, vec_idx), ppu_vector_bases[vec_idx] + get_edge_bytes, CHUNK_SZ_BYTES, buffer_tags[buf_idx][vec_idx], 0, 0); } } get_edge_bytes += CHUNK_SZ_BYTES; //printf("subvec_sz-chunks: %d\n", SUBVEC_SZ_CHUNKS); //printf("%d==%d\n", MAXITER*NBUFFERS*CHUNK_SZ_FLOATS, SUBVEC_SZ_FLOATS); int chunksleft = SUBVEC_SZ_CHUNKS; while(chunksleft!=0) { for (buf_idx = 0; chunksleft !=0 && buf_idx < NBUFFERS; ++buf_idx) { const int tag_mask = (1 << buffer_tags[buf_idx][0]) | (1 << buffer_tags[buf_idx][1]); mfc_write_tag_mask(tag_mask); mfc_read_tag_status_all(); pchunk_a = buf_ptr_vecfloat(buffers, buf_idx, 0); pchunk_b = buf_ptr_vecfloat(buffers, buf_idx, 1); for (i = 0; i < CHUNK_SZ_FLOATVECS; ++i) { g_vec = spu_madd(pchunk_a[i], pchunk_b[i], g_vec); } // move this mfc_get to end of loop, check get_edge_bytes variable dynamics if (likely(iter != MAXITER - 1)) { for (vec_idx = 0; vec_idx < 2; ++vec_idx) { mfc_get(buf_ptr_float(buffers, buf_idx, vec_idx), ppu_vector_bases[vec_idx] + get_edge_bytes, CHUNK_SZ_BYTES, buffer_tags[buf_idx][vec_idx], 0, 0); } } get_edge_bytes += CHUNK_SZ_BYTES; --chunksleft; } } for (iter = 0; iter < NBUFFERS; ++iter) { mfc_tag_release(buffer_tags[iter][0]); mfc_tag_release(buffer_tags[iter][1]); } float_uint_t retval; retval.f = spu_extract(g_vec, 0) + spu_extract(g_vec, 1) + spu_extract(g_vec, 2) + spu_extract(g_vec, 3); //printf("retval: %f\n", retval.f); spu_write_out_mbox(retval.i); return 0; }
// -- read whatever is sitting in my mailbox ---------------------------------- int as_mbx_read () { return spu_read_in_mbox (); }
void work(param_t param) { printf("SPU[%u] work()\n", param.proc); unsigned int inbox, offset; unsigned int *in = malloc_align(param.bitset_size, ALIGN_EXP); unsigned int *out = malloc_align(param.bitset_size, ALIGN_EXP); unsigned int *use = malloc_align(param.bitset_size, ALIGN_EXP); unsigned int *def = malloc_align(param.bitset_size, ALIGN_EXP); if(in == NULL || out == NULL || use == NULL || def == NULL) { printf("malloc_align() failed\n"); exit(1); } unsigned tag_1, tag_2, tag_3, tag_4; unsigned int tag_id; /* Reserve a tag for application usage */ if ((tag_1 = mfc_tag_reserve()) == MFC_TAG_INVALID) { printf("ERROR: unable to reserve a tag_1\n"); } if ((tag_2 = mfc_tag_reserve()) == MFC_TAG_INVALID) { printf("ERROR: unable to reserve a tag_2\n"); } if ((tag_3 = mfc_tag_reserve()) == MFC_TAG_INVALID) { printf("ERROR: unable to reserve a tag_3\n"); } if ((tag_4 = mfc_tag_reserve()) == MFC_TAG_INVALID) { printf("ERROR: unable to reserve a tag_4\n"); } while(1) { inbox = spu_read_in_mbox(); if(inbox == UINT_MAX) { printf("SPU[%u] received exit signal.. exiting.\n", param.proc); return; } offset = param.bitset_subsets*inbox; mfc_get(in, (unsigned int) (param.bs_in_addr + offset), param.bitset_size, tag_1, 0, 0); mfc_get(out, (unsigned int) (param.bs_out_addr + offset), param.bitset_size, tag_2, 0, 0); mfc_get(use, (unsigned int) (param.bs_use_addr + offset), param.bitset_size, tag_3, 0, 0); mfc_get(def, (unsigned int) (param.bs_def_addr + offset), param.bitset_size, tag_4, 0, 0); mfc_write_tag_mask(1 << tag_1 | 1 << tag_2 | 1 << tag_3 | 1 << tag_4); mfc_read_tag_status_all(); D(printf("SPU[%d] index: %u bitset_subsets: %u offset: %u\n", param.proc, inbox, param.bitset_subsets, offset); printf("SPU[%d]\t&use: %p\n\t&def: %p\n\t&out: %p\n\t&in: %p\n", param.proc, (void*)param.bs_use_addr, (void*)param.bs_def_addr, (void*)param.bs_out_addr, (void*)param.bs_in_addr); void *tmp_ptr = (void*) (param.bs_use_addr + offset); printf("SPU[%d] read\t\t&%p = use(%p)={", param.proc, (void*)use, tmp_ptr); for (int i = 0; i < 100; ++i){ if ( bitset_get_bit(use, i) ) { printf("%d ", i); } } printf("}\n"); tmp_ptr = (void*) (param.bs_def_addr + offset); printf("SPU[%d] read\t\t&%p = def(%p)={", param.proc, (void*)def, tmp_ptr); for (int i = 0; i < 100; ++i){ if ( bitset_get_bit(def, i) ) { printf("%d ", i); } } printf("}\n"); tmp_ptr = (void*) (param.bs_out_addr + offset); printf("SPU[%d] read\t\t&%p = out(%p)={", param.proc, (void*)out, tmp_ptr); for (int i = 0; i < 100; ++i){ if ( bitset_get_bit(out, i) ) { printf("%d ", i); } } printf("}\n"); tmp_ptr = (void*) (param.bs_in_addr + offset); printf("SPU[%d] read\t\t&%p = in (%p)={", param.proc, (void*)in, tmp_ptr); for (int i = 0; i < 100; ++i){ if ( bitset_get_bit(in, i) ) { printf("%d ", i); } } printf("}\n")); bitset_megaop(param, in, out, use, def); D(printf("SPU[%d] calculated\tin={", param.proc); for (int i = 0; i < 100; ++i){ if ( bitset_get_bit(in, i) ) { printf("%d ", i); } } printf("}\n");) mfc_put(in, (unsigned int) (param.bs_in_addr + offset), param.bitset_size, tag_1, 0, 0); mfc_write_tag_mask(1 << tag_1); mfc_read_tag_status_all(); spu_write_out_intr_mbox(inbox); }
void setup_spu(unsigned int spu_ctrlblock_addr){ ctrl_dma_tag = mfc_tag_reserve(); // Get SPU control block mfc_get(&spu_ctrlblock, spu_ctrlblock_addr, sizeof(spu_ctrlblock), ctrl_dma_tag, 0,0); mfc_write_tag_mask(1<<ctrl_dma_tag); mfc_read_tag_status_all(); mcb = (merger_ctrlblock_t*)memalign(128,spu_ctrlblock.num_mergers * sizeof(merger_ctrlblock_t) ); md = (merger_data_t*)malloc(spu_ctrlblock.num_mergers * sizeof(merger_data_t)); // Set addresses int i; for(i = 0; i < spu_ctrlblock.num_mergers; i++){ // Set head/tail vector addresses mcb[i].idx_addr[LEFT] = (unsigned int) &md[i].idx[LEFT][HEAD]; mcb[i].idx_addr[RIGHT] = (unsigned int) &md[i].idx[RIGHT][HEAD]; mcb[i].idx_addr[OUT] = (unsigned int) &md[i].idx[PARENT][TAIL]; } // Send merger control blocks mfc_put(mcb, spu_ctrlblock.ctrlblocks_addr, spu_ctrlblock.num_mergers * sizeof(merger_ctrlblock_t), ctrl_dma_tag, 0,0); mfc_read_tag_status_all(); // Mail PPU telling it we've set the addresses spu_write_out_mbox(1); // Wait for go-ahead mail spu_read_in_mbox(); // Get merger blocks mfc_get(mcb, spu_ctrlblock.ctrlblocks_addr, spu_ctrlblock.num_mergers * sizeof(merger_ctrlblock_t), ctrl_dma_tag, 0,0); mfc_read_tag_status_all(); int buffer_idx = 0; for(i = 0; i < spu_ctrlblock.num_mergers; i++){ // Add start address of buffer array to all block addresses if(mcb[i].id != 0) mcb[i].block_addr[OUT] += (unsigned int) &buffer[0]; if(!mcb[i].leaf_node){ mcb[i].block_addr[LEFT] += (unsigned int) &buffer[0]; mcb[i].block_addr[RIGHT] += (unsigned int) &buffer[0]; } // Setup merger data md[i].held_tag[LEFT] = 32; md[i].held_tag[RIGHT] = 32; md[i].held_tag[OUT] = 32; md[i].num_pulled[LEFT] = 0; md[i].num_pulled[RIGHT] = 0; md[i].mm_depleted[LEFT] = 0; md[i].mm_depleted[RIGHT] = 0; md[i].depleted[LEFT] = 0; md[i].depleted[RIGHT] = 0; md[i].done = 0; md[i].consumed[LEFT] = 0; md[i].consumed[RIGHT] = 0; md[i].idx[LEFT][HEAD] = spu_splats(0); md[i].idx[LEFT][TAIL] = spu_splats(0); md[i].idx[RIGHT][HEAD] = spu_splats(0); md[i].idx[RIGHT][TAIL] = spu_splats(0); md[i].idx[OUT][HEAD] = spu_splats(0); md[i].idx[OUT][TAIL] = spu_splats(0); md[i].idx[PARENT][HEAD] = spu_splats(0); md[i].idx[PARENT][TAIL] = spu_splats(0); md[i].buffer[LEFT] = &buffer[buffer_idx]; buffer_idx += mcb[i].buffer_size[LEFT]; md[i].buffer[RIGHT] = &buffer[buffer_idx]; buffer_idx += mcb[i].buffer_size[RIGHT]; md[i].buffer[OUT] = &buffer[buffer_idx]; buffer_idx += mcb[i].buffer_size[OUT]; } // Setup internal nodes for(i = 0; i < spu_ctrlblock.num_mergers; i++){ if(mcb[i].local[OUT] < 255){ int parent_idx = mcb[i].local[OUT]; int side = (mcb[i].id+1)&1; md[i].buffer[OUT] = md[parent_idx].buffer[side]; mcb[i].buffer_size[OUT] = mcb[parent_idx].buffer_size[side]; } } }