int main(unsigned long long spe, unsigned long long argp, unsigned long long envp) { int i; int tag = 1; /* DMA Transfer 1 : GET input/output parameters */ spu_mfcdma64(&abs_params, mfc_ea2h(argp), mfc_ea2l(argp), sizeof(abs_params_t), tag, MFC_GET_CMD); spu_writech(MFC_WrTagMask, 1 << tag); spu_mfcstat(MFC_TAG_UPDATE_ALL); /* DMA Transfer 2 : GET input data */ spu_mfcdma64(in_spe, mfc_ea2h(abs_params.ea_in), mfc_ea2l(abs_params.ea_in), abs_params.size * sizeof(float), tag, MFC_GET_CMD); spu_writech(MFC_WrTagMask, 1 << tag); spu_mfcstat(MFC_TAG_UPDATE_ALL); /* Calculate absolute values */ for (i = 0; i < abs_params.size; i++) { if (in_spe[i] > 0) { out_spe[i] = in_spe[i]; } else { out_spe[i] = in_spe[i] * -1; } } /* DMA Transfer 3 : PUT output data */ spu_mfcdma64(out_spe, mfc_ea2h(abs_params.ea_out), mfc_ea2l(abs_params.ea_out), abs_params.size * sizeof(float), tag, MFC_PUT_CMD); spu_writech(MFC_WrTagMask, 1 << tag); spu_mfcstat(MFC_TAG_UPDATE_ALL); return 0; }
static inline void enqueue_sync(addr64 lscsa_ea) { unsigned int tag_id = 0; unsigned int cmd = 0xCC; /* Save, Step 14: * Enqueue an MFC_SYNC command (tag 0). */ spu_writech(MFC_TagID, tag_id); spu_writech(MFC_Cmd, cmd); }
int spu_thread_send_event(uint8_t spup,uint32_t data0,uint32_t data1) { uint32_t val = ((spup<<EVENT_PORT_SHIFT) | (data0&EVENT_DATA0_MASK)); if(spup>EVENT_PORT_MAX_NUM) return 0x80010002; if(spu_readchcnt(SPU_RdInMbox)>0) return 0x8001000A; spu_writech(SPU_WrOutMbox,data1); spu_writech(SPU_WrOutIntrMbox,val); return (int)spu_readch(SPU_RdInMbox); }
/* loads program info - blocks until done */ void load_program_info(unsigned long long ea, spe_program_info_t *info) { /* initiate DMA request for program info */ /* spu_mfcdma64(ls_addr, ea_h, ea_l, size, tag_id, cmd); */ spu_mfcdma64(info, mfc_ea2h(ea), mfc_ea2l(ea), sizeof(spe_program_info_t), SPUDMA_PROGRAMINFO, MFC_GET_CMD); /* wait for request to complete */ spu_writech(MFC_WrTagMask, 1 << SPUDMA_PROGRAMINFO); mfc_read_tag_status_all(); /* assign to global for debugging purposes */ speid = info->speId; #if defined(_DEBUG) && _DEBUG > 1 printf("Program info:\n\tSpe ID: %d\n\tNum Pixels: %d\n\tSpp: %d\n\tNum Spes %d\n\tDepth: %d\n", info->speId, info->numPixels, info->samplesPerPixel, info->numSpes, info->depth); #endif }
int main(unsigned long long spe, unsigned long long argp, unsigned long long envp) { int i; for (i = 0; i < 10000; i++) { spu_mfcdma64(&counter, mfc_ea2h(argp), mfc_ea2l(argp), sizeof(int), 0, MFC_GET_CMD); spu_writech(MFC_WrTagMask, 1 << 0); spu_mfcstat(MFC_TAG_UPDATE_ALL); counter++; spu_mfcdma64(&counter, mfc_ea2h(argp), mfc_ea2l(argp), sizeof(int), 0, MFC_PUT_CMD); spu_writech(MFC_WrTagMask, 1 << 0); spu_mfcstat(MFC_TAG_UPDATE_ALL); } return 0; }
static inline void restore_srr0(void) { unsigned int offset; unsigned int srr0; /* Restore, Step 14: * Restore the SPU SRR0 data from the LSCSA. */ offset = LSCSA_QW_OFFSET(srr0); srr0 = regs_spill[offset].slot[0]; spu_writech(SPU_WrSRR0, srr0); }
static inline void restore_event_mask(void) { unsigned int offset; unsigned int event_mask; /* Restore, Step 15: * Restore the SPU_RdEventMsk data from the LSCSA. */ offset = LSCSA_QW_OFFSET(event_mask); event_mask = regs_spill[offset].slot[0]; spu_writech(SPU_WrEventMask, event_mask); }
static inline void restore_tag_mask(void) { unsigned int offset; unsigned int tag_mask; /* Restore, Step 16: * Restore the SPU_RdTagMsk data from the LSCSA. */ offset = LSCSA_QW_OFFSET(tag_mask); tag_mask = regs_spill[offset].slot[0]; spu_writech(MFC_WrTagMask, tag_mask); }
int main(unsigned long long spu_id __attribute__ ((unused)), unsigned long long parm) { int i, j; int left, cnt; float time; unsigned int tag_id; vector float dt_v, dt_inv_mass_v; // Reserve a tag ID tag_id = mfc_tag_reserve(); spu_writech(MFC_WrTagMask, -1); // Input parameter parm is a pointer to the particle parameter context. // Fetch the context, waiting for it to complete. spu_mfcdma32((void *)(&ctx), (unsigned int)parm, sizeof(parm_context), tag_id, MFC_GET_CMD); (void)spu_mfcstat(MFC_TAG_UPDATE_ALL); dt_v = spu_splats(ctx.dt); // For each step in time for (time=0; time<END_OF_TIME; time += ctx.dt) { // For each block of particles for (i=0; i<ctx.particles; i+=PARTICLES_PER_BLOCK) { // Determine the number of particles in this block. left = ctx.particles - i; cnt = (left < PARTICLES_PER_BLOCK) ? left : PARTICLES_PER_BLOCK; // Fetch the data - position, velocity and inverse_mass. Wait for the DMA to complete // before performing computation. spu_mfcdma32((void *)(pos), (unsigned int)(ctx.pos_v+i), cnt * sizeof(vector float), tag_id, MFC_GETB_CMD); spu_mfcdma32((void *)(vel), (unsigned int)(ctx.vel_v+i), cnt * sizeof(vector float), tag_id, MFC_GET_CMD); spu_mfcdma32((void *)(inv_mass), (unsigned int)(ctx.inv_mass+i), cnt * sizeof(float), tag_id, MFC_GET_CMD); (void)spu_mfcstat(MFC_TAG_UPDATE_ALL); // Compute the step in time for the block of particles for (j=0; j<cnt; j++) { pos[j] = spu_madd(vel[j], dt_v, pos[j]); dt_inv_mass_v = spu_mul(dt_v, spu_splats(inv_mass[j])); vel[j] = spu_madd(dt_inv_mass_v, ctx.force_v, vel[j]); } // Put the position and velocity data back into system memory spu_mfcdma32((void *)(pos), (unsigned int)(ctx.pos_v+i), cnt * sizeof(vector float), tag_id, MFC_PUT_CMD); spu_mfcdma32((void *)(vel), (unsigned int)(ctx.vel_v+i), cnt * sizeof(vector float), tag_id, MFC_PUT_CMD); } } // Wait for final DMAs to complete before terminating SPU thread. (void)spu_mfcstat(MFC_TAG_UPDATE_ALL); return (0); }
static inline void write_ppuint_mb(void) { unsigned int offset; unsigned int data; /* Restore, Step 12: * Write the MFC_WrInt_MB channel with the PPUINT_MB * data from LSCSA. */ offset = LSCSA_QW_OFFSET(ppuint_mb); data = regs_spill[offset].slot[0]; spu_writech(SPU_WrOutIntrMbox, data); }
void GetSPEAddr( unsigned int ea, unsigned int *PPE_addr ) { //#ifdef _DEBUG //#if _DBGLVL > 1 // printf( "Getting SPE address @ %#x to %#x\n", ea, (unsigned int)PPE_addr ); //#endif //#endif Printf1( "SPE[%u]: Getting SPE address @ %#x to %#x\n", SPE_id, ea, (unsigned int)PPE_addr ); // Get STRUCTURE spu_mfcdma32( PPE_addr, ea, 16, 30, MFC_GET_CMD ); spu_writech( MFC_WrTagMask, 1 << 30 ); spu_mfcstat( MFC_TAG_UPDATE_ALL ); }
void GetShader( unsigned int EA, unsigned int size, unsigned int *shader ) { //#ifdef _DEBUG //#if _DBGLVL > 1 // printf( "Getting shader @ %#x to %#x(%u)\n", EA, (unsigned int)shader, size ); // printf( "Shader size is %u\n", size ); //#endif //#endif Printf1( "SPE[%u]: Getting shader @ %#x to %#x(%u)\n", SPE_id, EA, (unsigned int)shader, size ); // Get shader size = ( size + 15 ) &~ 15; spu_mfcdma32( shader, EA, size, 29, MFC_GET_CMD ); spu_writech( MFC_WrTagMask, 1 << 29 ); spu_mfcstat( MFC_TAG_UPDATE_ALL ); }
int spu_thread_receive_event(uint32_t spuq,uint32_t *data0,uint32_t *data1,uint32_t *data2) { int ret; if(spu_readchcnt(SPU_RdInMbox)>0) return 0x8001000A; spu_writech(SPU_WrOutMbox,spuq); spu_stop(0x110); ret = spu_readch(SPU_RdInMbox); if(ret) return ret; *data0 = spu_readch(SPU_RdInMbox); *data1 = spu_readch(SPU_RdInMbox); *data2 = spu_readch(SPU_RdInMbox); return ret; }
static inline void restore_decr(void) { unsigned int offset; unsigned int decr_running; unsigned int decr; /* Restore, Step 6(moved): * If the LSCSA "decrementer running" flag is set * then write the SPU_WrDec channel with the * decrementer value from LSCSA. */ offset = LSCSA_QW_OFFSET(decr_status); decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING; if (decr_running) { offset = LSCSA_QW_OFFSET(decr); decr = regs_spill[offset].slot[0]; spu_writech(SPU_WrDec, decr); } }
static inline void fetch_regs_from_mem(addr64 lscsa_ea) { unsigned int ls = (unsigned int)®s_spill[0]; unsigned int size = sizeof(regs_spill); unsigned int tag_id = 0; unsigned int cmd = 0x40; /* GET */ spu_writech(MFC_LSA, ls); spu_writech(MFC_EAH, lscsa_ea.ui[0]); spu_writech(MFC_EAL, lscsa_ea.ui[1]); spu_writech(MFC_Size, size); spu_writech(MFC_TagID, tag_id); spu_writech(MFC_Cmd, cmd); }
void GetOperation( unsigned int ea, Operation_t *data ) { //#ifdef _DEBUG //#if _DBGLVL > 1 // printf( "Getting operation @ %#x to %#x(%u)\n", ea, (unsigned int)data, sizeof( Operation_t ) ); //#endif //#endif Printf1( "SPE[%u]: Getting operation @ %#x to %#x(%u)\n", SPE_id, ea, (unsigned int)data, sizeof( Operation_t ) ); // Get STRUCTURE spu_mfcdma32( data, ea, 32, 30, MFC_GET_CMD ); // Waiting spu_writech( MFC_WrTagMask, 1 << 30 ); spu_mfcstat( MFC_TAG_UPDATE_ALL ); // printf( "---->%#x\n", (unsigned int)data->EA_shader ); // printf( "---->%#x\n", (unsigned int)data->shaderSize ); // printf( "---->%#x\n", (unsigned int)data->obj[0] ); // printf( "---->%#x\n", (unsigned int)data->obj[1] ); // printf( "---->%#x\n", (unsigned int)data->obj[2] ); // printf( "---->%#x\n", (unsigned int)data->scalars[0] ); // printf( "---->%#x\n", (unsigned int)data->scalars[1] ); // printf( "---->%#x\n", (unsigned int)data->scalars[2] ); }
static inline void spill_regs_to_mem(addr64 lscsa_ea) { unsigned int ls = (unsigned int)®s_spill[0]; unsigned int size = sizeof(regs_spill); unsigned int tag_id = 0; unsigned int cmd = 0x20; /* PUT */ /* Save, Step 13: * Enqueue a PUT command (tag 0) to send the LSCSA * to the CSA. */ spu_writech(MFC_LSA, ls); spu_writech(MFC_EAH, lscsa_ea.ui[0]); spu_writech(MFC_EAL, lscsa_ea.ui[1]); spu_writech(MFC_Size, size); spu_writech(MFC_TagID, tag_id); spu_writech(MFC_Cmd, cmd); }
static inline void restore_upper_240kb(addr64 lscsa_ea) { unsigned int ls = 16384; unsigned int list = (unsigned int)&dma_list[0]; unsigned int size = sizeof(dma_list); unsigned int tag_id = 0; unsigned int cmd = 0x44; /* GETL */ /* Restore, Step 4: * Enqueue the GETL command (tag 0) to the MFC SPU command * queue to transfer the upper 240 kb of LS from CSA. */ spu_writech(MFC_LSA, ls); spu_writech(MFC_EAH, lscsa_ea.ui[0]); spu_writech(MFC_EAL, list); spu_writech(MFC_Size, size); spu_writech(MFC_TagID, tag_id); spu_writech(MFC_Cmd, cmd); }
static inline void save_upper_240kb(addr64 lscsa_ea) { unsigned int ls = 16384; unsigned int list = (unsigned int)&dma_list[0]; unsigned int size = sizeof(dma_list); unsigned int tag_id = 0; unsigned int cmd = 0x24; /* PUTL */ /* Save, Step 7: * Enqueue the PUTL command (tag 0) to the MFC SPU command * queue to transfer the remaining 240 kb of LS to CSA. */ spu_writech(MFC_LSA, ls); spu_writech(MFC_EAH, lscsa_ea.ui[0]); spu_writech(MFC_EAL, list); spu_writech(MFC_Size, size); spu_writech(MFC_TagID, tag_id); spu_writech(MFC_Cmd, cmd); }
/* loads the scene using DMA - blocks until done */ void load_scene(unsigned long long ea, scene_t *scene) { unsigned int i = 0; object3d_t *objects = 0; pointlight_t *lights = 0; point_t *v = 0; #if defined(_DEBUG) && _DEBUG > 2 printf("Transferring %d bytes to LSaddr(%8lX) from EAadd(%8lX:%8lX) for SCENE\n", sizeof(scene_t), &scene, mfc_ea2h(ea), mfc_ea2l(ea)); #endif /* DMA request for scene */ spu_mfcdma64(scene, mfc_ea2h(ea), mfc_ea2l(ea), sizeof(scene_t), SPUDMA_SCENE, MFC_GET_CMD); /* wait for request to complete */ spu_writech(MFC_WrTagMask, 1 << SPUDMA_SCENE); mfc_read_tag_status_all(); /* copy over objects */ objects = _malloc_align(sizeof(object3d_t) * scene->nObjects, 4); #if defined(_DEBUG) && _DEBUG > 2 printf("Transferring %d bytes to LSaddr(%8lX) from EAadd(%8lX:%8lX) for OBJECTS\n", sizeof(object3d_t) * scene->nObjects, objects, mfc_ea2h(scene->objects_ea), mfc_ea2l(scene->objects_ea)); #endif /* initiate DMA */ spu_mfcdma64(objects, mfc_ea2h(scene->objects_ea), mfc_ea2l(scene->objects_ea), sizeof(object3d_t) * scene->nObjects, SPUDMA_OBJECTS, MFC_GET_CMD); /* copy over lights */ lights = _malloc_align(sizeof(pointlight_t) * scene->nLights, 4); #if defined(_DEBUG) && _DEBUG > 2 printf("Transferring %d bytes to LSaddr(%8X) from EAadd(%8lX:%8lX) for LIGHTS\n", sizeof(pointlight_t) * scene->nLights, lights, mfc_ea2h(scene->lights_ea), mfc_ea2l(scene->lights_ea)); #endif /* initiate DMA for lights */ spu_mfcdma64(lights, mfc_ea2h(scene->lights_ea), mfc_ea2l(scene->lights_ea), sizeof(pointlight_t) * scene->nLights, SPUDMA_LIGHTS, MFC_GET_CMD); /* wait for objects to complete */ spu_writech(MFC_WrTagMask, 1 << SPUDMA_OBJECTS); mfc_read_tag_status_all(); /* assign local store pointer to objects */ scene->objects = objects; /* iterate each object locally */ for(; i < scene->nObjects; ++i) { if(objects[i].geometryType == GEOMETRY_POLYGON) { /* allocate memory for vertex */ v = _malloc_align(sizeof(point_t) * objects[i].poly_obj.nVerticies, 4); /* initiate DMA to get verticies */ spu_mfcdma64(v, mfc_ea2h(objects[i].poly_obj.vertex_ea), mfc_ea2l(objects[i].poly_obj.vertex_ea), sizeof(point_t) * objects[i].poly_obj.nVerticies, SPUDMA_VERTEXES, MFC_GET_CMD); /* assign local store pointer - WARNING - safe? */ objects[i].poly_obj.vertex = v; } } /* wait for all DMA to finish (vertexes, lights) */ spu_writech(MFC_WrTagMask, 1 << SPUDMA_LIGHTS | 1 << SPUDMA_VERTEXES ); mfc_read_tag_status_all(); /* assign local store lights pointer */ scene->lights = lights; }
void process_buffer(int buffer, int cnt, vector float dt_v) { int i; volatile vector float *p_inv_mass_v; vector float force_v, inv_mass_v; vector float pos0, pos1, pos2, pos3; vector float vel0, vel1, vel2, vel3; vector float dt_inv_mass_v, dt_inv_mass_v_0, dt_inv_mass_v_1, dt_inv_mass_v_2, dt_inv_mass_v_3; vector unsigned char splat_word_0 = (vector unsigned char){0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; vector unsigned char splat_word_1 = (vector unsigned char){4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7}; vector unsigned char splat_word_2 = (vector unsigned char){8, 9,10,11, 8, 9,10,11, 8, 9,10,11, 8, 9,10,11}; vector unsigned char splat_word_3 = (vector unsigned char){12,13,14,15,12,13,14,15,12,13,14,15,12,13,14,15}; p_inv_mass_v = (volatile vector float *)&inv_mass[buffer][0]; force_v = ctx.force_v; // Compute the step in time for the block of particles, four // particle at a time. for (i=0; i<cnt; i+=4) { inv_mass_v = *p_inv_mass_v++; pos0 = pos[buffer][i+0]; pos1 = pos[buffer][i+1]; pos2 = pos[buffer][i+2]; pos3 = pos[buffer][i+3]; vel0 = vel[buffer][i+0]; vel1 = vel[buffer][i+1]; vel2 = vel[buffer][i+2]; vel3 = vel[buffer][i+3]; dt_inv_mass_v = spu_mul(dt_v, inv_mass_v); pos0 = spu_madd(vel0, dt_v, pos0); pos1 = spu_madd(vel1, dt_v, pos1); pos2 = spu_madd(vel2, dt_v, pos2); pos3 = spu_madd(vel3, dt_v, pos3); dt_inv_mass_v_0 = spu_shuffle(dt_inv_mass_v, dt_inv_mass_v, splat_word_0); dt_inv_mass_v_1 = spu_shuffle(dt_inv_mass_v, dt_inv_mass_v, splat_word_1); dt_inv_mass_v_2 = spu_shuffle(dt_inv_mass_v, dt_inv_mass_v, splat_word_2); dt_inv_mass_v_3 = spu_shuffle(dt_inv_mass_v, dt_inv_mass_v, splat_word_3); vel0 = spu_madd(dt_inv_mass_v_0, force_v, vel0); vel1 = spu_madd(dt_inv_mass_v_1, force_v, vel1); vel2 = spu_madd(dt_inv_mass_v_2, force_v, vel2); vel3 = spu_madd(dt_inv_mass_v_3, force_v, vel3); pos[buffer][i+0] = pos0; pos[buffer][i+1] = pos1; pos[buffer][i+2] = pos2; pos[buffer][i+3] = pos3; vel[buffer][i+0] = vel0; vel[buffer][i+1] = vel1; vel[buffer][i+2] = vel2; vel[buffer][i+3] = vel3; } } int main(unsigned long long spu_id __attribute__ ((unused)), unsigned long long argv) { int buffer, next_buffer; int cnt, next_cnt, left; float time, dt; vector float dt_v; volatile vector float *ctx_pos_v, *ctx_vel_v; volatile vector float *next_ctx_pos_v, *next_ctx_vel_v; volatile float *ctx_inv_mass, *next_ctx_inv_mass; unsigned int tags[2]; // Reserve a pair of DMA tag IDs tags[0] = mfc_tag_reserve(); tags[1] = mfc_tag_reserve(); // Input parameter argv is a pointer to the particle context. // Fetch the parameter context, waiting for it to complete. spu_writech(MFC_WrTagMask, 1 << tags[0]); spu_mfcdma32((void *)(&ctx), (unsigned int)argv, sizeof(parm_context), tags[0], MFC_GET_CMD); (void)spu_mfcstat(MFC_TAG_UPDATE_ALL); dt = ctx.dt; dt_v = spu_splats(dt); // For each step in time for (time=0; time<END_OF_TIME; time += dt) { // For each double buffered block of particles left = ctx.particles; cnt = (left < PARTICLES_PER_BLOCK) ? left : PARTICLES_PER_BLOCK; ctx_pos_v = ctx.pos_v; ctx_vel_v = ctx.vel_v; ctx_inv_mass = ctx.inv_mass; // Prefetch first buffer of input data. buffer = 0; spu_mfcdma32((void *)(pos), (unsigned int)(ctx_pos_v), cnt * sizeof(vector float), tags[0], MFC_GETB_CMD); spu_mfcdma32((void *)(vel), (unsigned int)(ctx_vel_v), cnt * sizeof(vector float), tags[0], MFC_GET_CMD); spu_mfcdma32((void *)(inv_mass), (unsigned int)(ctx_inv_mass), cnt * sizeof(float), tags[0], MFC_GET_CMD); while (cnt < left) { left -= cnt; next_ctx_pos_v = ctx_pos_v + cnt; next_ctx_vel_v = ctx_vel_v + cnt; next_ctx_inv_mass = ctx_inv_mass + cnt; next_cnt = (left < PARTICLES_PER_BLOCK) ? left : PARTICLES_PER_BLOCK; // Prefetch next buffer so the data is available for computation on next loop iteration. // The first DMA is barriered so that we don't GET data before the previous iteration's // data is PUT. next_buffer = buffer^1; spu_mfcdma32((void *)(&pos[next_buffer][0]), (unsigned int)(next_ctx_pos_v), next_cnt * sizeof(vector float), tags[next_buffer], MFC_GETB_CMD); spu_mfcdma32((void *)(&vel[next_buffer][0]), (unsigned int)(next_ctx_vel_v), next_cnt * sizeof(vector float), tags[next_buffer], MFC_GET_CMD); spu_mfcdma32((void *)(&inv_mass[next_buffer][0]), (unsigned int)(next_ctx_inv_mass), next_cnt * sizeof(float), tags[next_buffer], MFC_GET_CMD); // Wait for previously prefetched data spu_writech(MFC_WrTagMask, 1 << tags[buffer]); (void)spu_mfcstat(MFC_TAG_UPDATE_ALL); process_buffer(buffer, cnt, dt_v); // Put the buffer's position and velocity data back into system memory spu_mfcdma32((void *)(&pos[buffer][0]), (unsigned int)(ctx_pos_v), cnt * sizeof(vector float), tags[buffer], MFC_PUT_CMD); spu_mfcdma32((void *)(&vel[buffer][0]), (unsigned int)(ctx_vel_v), cnt * sizeof(vector float), tags[buffer], MFC_PUT_CMD); ctx_pos_v = next_ctx_pos_v; ctx_vel_v = next_ctx_vel_v; ctx_inv_mass = next_ctx_inv_mass; buffer = next_buffer; cnt = next_cnt; } // Wait for previously prefetched data spu_writech(MFC_WrTagMask, 1 << tags[buffer]); (void)spu_mfcstat(MFC_TAG_UPDATE_ALL); process_buffer(buffer, cnt, dt_v); // Put the buffer's position and velocity data back into system memory spu_mfcdma32((void *)(&pos[buffer][0]), (unsigned int)(ctx_pos_v), cnt * sizeof(vector float), tags[buffer], MFC_PUT_CMD); spu_mfcdma32((void *)(&vel[buffer][0]), (unsigned int)(ctx_vel_v), cnt * sizeof(vector float), tags[buffer], MFC_PUT_CMD); // Wait for DMAs to complete before starting the next step in time. spu_writech(MFC_WrTagMask, 1 << tags[buffer]); (void)spu_mfcstat(MFC_TAG_UPDATE_ALL); } return (0); }
int main(unsigned long long speid, unsigned long long argp, unsigned long long envp) { int tgiy0[2]; int tgiy1[2]; int tgiu0[2]; int tgiu1[2]; int tgiv0[2]; int tgiv1[2]; int tgo0[2]; int tgo1[2]; tgiu1[0]=1; tgiu1[1]=2; tgo0[0]=3; tgo0[1]=4; tgiy0[0]=5; tgiy0[1]=6; tgiy1[0]=7; tgiy1[1]=8; tgiu0[0]=9; tgiu0[1]=10; tgiv0[0]=11; tgiv0[1]=12; tgiv1[1]=13; tgiv1[1]=14; tgo1[0]=15; tgo1[1]=16; int selOut = 0; int selIn = 0; int tag = 31; int LineSelIn=0; int LineSelOut=0; int selY0In = 0; int selY1In = 0; int selCrIn = 0; struct img_args *iargs; iargs =(struct img_args*)memalign(128,sizeof(*iargs)); unsigned long long Cp; int first=1; int waiting=0; unsigned long long Op; unsigned int msg; unsigned long long YIp,UIp,VIp,YOp; int crblock0; int crblock1; int srcsmallcroma=0; ; int noscale=1; static int crblockdst1; static int crblockdst0; scaler_settings_t sc; while (spu_stat_in_mbox() == 0); msg=spu_read_in_mbox(); if (msg==RUN){ fprintf(stderr,"spu_yuv2argb_scaler: Starting Up\n"); } dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); //getting neccesary data to process image printf("spu_yuv2argb_scaler: SRC width %d,DST width %d\n",iargs->srcW,iargs->dstW); printf("spu_yuv2argb_scaler: SRC height %d,DST height %d\n",iargs->srcH,iargs->dstH); printf("spu_yuv2argb_scaler: DST offset %d\n",iargs->offset); // bad fix for centering image on 1080p) //iargs->offset=(iargs->maxwidth-iargs->dstW)/2 + iargs->maxwidth*(1080-iargs->dstH)/2; vector unsigned char *widthfilter0=(vector unsigned char*)memalign(128,MAXWIDTH*4+16); vector unsigned char *widthfilter1=(vector unsigned char*)memalign(128,MAXWIDTH*4+16); vector unsigned char *crwidthfilter0=(vector unsigned char*)memalign(128,MAXWIDTH*2+16); vector unsigned char *crwidthfilter1=(vector unsigned char*)memalign(128,MAXWIDTH*2+16); vector float * weightWfilter0=(vector float*)memalign(128,MAXWIDTH*4+16); vector float * weightWfilter1=(vector float*)memalign(128,MAXWIDTH*4+16); float weightHfilter[MAXHEIGHT+1]; unsigned long long dmapos[MAXHEIGHT+2]; unsigned long long dmacromapos[MAXHEIGHT+2]; vector float * Ytemp0=(vector float *)memalign(128,MAXWIDTH*4+16); vector float * Ytemp1=(vector float *)memalign(128,MAXWIDTH*4+16); vector float * Utemp=(vector float *)memalign(128,MAXWIDTH*2+16); vector float * Vtemp=(vector float *)memalign(128,MAXWIDTH*2+16); int wfilterpos[MAXWIDTH+2]; int hfilterpos0[MAXHEIGHT+2]; int hfilterpos1[MAXHEIGHT+2]; int crwfilterpos[MAXWIDTH/2+2]; vector unsigned char *InputY0[2]; InputY0[0]=(vector unsigned char*)memalign(128,MAXWIDTH); InputY0[1]=(vector unsigned char*)memalign(128,MAXWIDTH); vector unsigned char *InputU0[2]; InputU0[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); InputU0[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); vector unsigned char *InputV0[2]; InputV0[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); InputV0[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); vector unsigned char *InputY1[2]; InputY1[0]=(vector unsigned char*)memalign(128,MAXWIDTH); InputY1[1]=(vector unsigned char*)memalign(128,MAXWIDTH); vector unsigned char *InputU1[2]; InputU1[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); InputU1[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); vector unsigned char *InputV1[2]; InputV1[0]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); InputV1[1]=(vector unsigned char*)memalign(128,MAXWIDTH/2+16); vector unsigned char* Output0[2]; Output0[0]=(vector unsigned char*)memalign(128,MAXWIDTH*4); // 1line output Output0[1]=(vector unsigned char*)memalign(128,MAXWIDTH*4); // 1line output vector unsigned char* Output1[2]; Output1[0]=(vector unsigned char*)memalign(128,MAXWIDTH*4); // 1line output Output1[1]=(vector unsigned char*)memalign(128,MAXWIDTH*4); // 1line output while (msg!=STOP) { int h=0; int i; if (first) { crblock0=(iargs->srcW>>1)&~15; // rounded down crblock1=((iargs->srcW>>1) + 15)&~15; //rounded up crblockdst1=((iargs->dstW>>1) + 15)&~15;//destination size rounded up. crblockdst0=((iargs->dstW>>1) + 7)&~7;//destination size rounded up. initHFilter(iargs->srcW,iargs->srcH,iargs->dstH,hfilterpos0,hfilterpos1,weightHfilter,dmapos,dmacromapos); // printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[1]]/16.0,dmacromapos[hfilterpos1[0]]/16.0); // printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[1]]/16.0,dmacromapos[hfilterpos1[1]]/16.0); // // for (i=0;i < iargs->dstH>>1;i++) // { // // printf("Hfilterpos0 dst: %d, src:%d, weight:%f\n",i,hfilterpos0[i],weightHfilter[i]); // // printf("Hfilterpos1 dst: %d, src:%d, weight:%f\n",i,hfilterpos1[i],1.0-weightHfilter[i]); // printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[2*i+2]]/16.0,dmacromapos[hfilterpos1[2*i+2]]/16.0); // printf("line :%d, dmapos :%f, dmacromapos :%f \n",i,dmapos[hfilterpos1[2*i+3]]/16.0,dmacromapos[hfilterpos1[2*i+3]]/16.0); // } if ((iargs->srcW==iargs->dstW)&&(iargs->srcH==iargs->dstH)) { printf("spu_yuv2argb_scaler: No scaling proceeding with direct csc\n"); noscale=1; if ((iargs->srcW%32) != 0) { srcsmallcroma=1; sc.smallcroma=1; } } else { noscale=0; printf("spu_yuv2argb_scaler: Scaling, computing shuffle filters\n"); initWFilter(iargs->srcW,iargs->dstW,1,wfilterpos,widthfilter0,widthfilter1,weightWfilter0,weightWfilter1); /* for (i=0;i < iargs->dstW/4;i++) { printf("filterpos dst: %d, src:%d\n",i,wfilterpos[i]); printcharvec("widthfilter0",widthfilter0[i]); printcharvec("widthfilter1",widthfilter1[i]); printfvec("weightWfilter0",weightWfilter0[i]); printfvec("weightWfilter1",weightWfilter1[i]); }*/ srcsmallcroma=0; sc.smallcroma=0; if ((iargs->srcW%32) != 0) { sc.smallcroma=1; srcsmallcroma=1; initWcrFilter(iargs->srcW/2,iargs->dstW/2,1,crwfilterpos,crwidthfilter0,crwidthfilter1); printf("spu_yuv2argb_scaler: Computing Crshuffle filter\n"); // for (i=0;i < (iargs->dstW>>1)/4;i++) // { // printf("crwfilterpos dst: %d, src:%d, weight:%f\n",i,crwfilterpos[i]); // printcharvec("crwidthfilter0",crwidthfilter0[i]); // printcharvec("crwidthfilter1",crwidthfilter1[i]); // printfvec("weightWfilter0",weightWfilter0[i]); // printfvec("weightWfilter1",weightWfilter1[i]); // // } } sc.wWfilter0=weightWfilter0; sc.wWfilter1=weightWfilter1; sc.wfilterpos=wfilterpos; sc.sWfilter0=widthfilter0; sc.sWfilter1=widthfilter1; sc.crsWfilter0=crwidthfilter0; sc.crsWfilter1=crwidthfilter1; sc.crfilterpos=crwfilterpos; sc.smallcromaline0=0; sc.smallcromaline1=0; } first=0; printf("spu_yuv2argb_scaler: Initiation completed\n"); } YIp = iargs->Ystart[selIn]; UIp = iargs->Ustart[selIn]; VIp = iargs->Vstart[selIn]; Op = iargs->Output[selOut] + iargs->offset*4; LineSelOut=0; selY0In=0; selY1In=0; selCrIn=0; dmaGet(InputY0[0],YIp+dmapos[hfilterpos0[0]],iargs->srcW,tgiy0[0]); dmaGet(InputY1[0],YIp+dmapos[hfilterpos1[0]],iargs->srcW,tgiy1[0]); dmaGet(InputY0[1],YIp+dmapos[hfilterpos0[1]],iargs->srcW,tgiy0[1]); dmaGet(InputY1[1],YIp+dmapos[hfilterpos1[1]],iargs->srcW,tgiy1[1]); dmaGet(InputU0[0],UIp+dmacromapos[hfilterpos0[0]],crblock1,tgiu0[0]); dmaGet(InputU0[1],UIp+dmacromapos[hfilterpos0[1]],crblock1,tgiu0[1]); dmaGet(InputU1[0],UIp+dmacromapos[hfilterpos1[0]],crblock1,tgiu1[0]); dmaGet(InputU1[1],UIp+dmacromapos[hfilterpos1[1]],crblock1,tgiu1[1]); // dmaGet(InputV0[0],VIp+dmacromapos[hfilterpos0[0]],crblock1,tgiv0[0]); dmaGet(InputV0[1],VIp+dmacromapos[hfilterpos0[1]],crblock1,tgiv0[1]); dmaGet(InputV1[0],VIp+dmacromapos[hfilterpos1[0]],crblock1,tgiv1[0]); dmaGet(InputV1[1],VIp+dmacromapos[hfilterpos1[1]],crblock1,tgiv1[1]); LineSelOut=0; selY0In=0; selY1In=0; selCrIn=0; // printf("New image\n"); for (h=0; h < iargs->dstH>>1; h++) //we asume that output is allways h/2 { sc.width=iargs->dstW; sc.smallcroma=0; sc.smallcromaline0=0; sc.smallcromaline1=0; sc.wHfilter=weightHfilter[2*h]; dmaWaitTag(tgiy0[selY0In]); // printf("dma: %d\n",2*h+2); dmaWaitTag(tgiy1[selY1In]); // printf("dma: %d\n",2*h+2); sc.source00=InputY0[selY0In]; sc.source01=InputY1[selY1In]; sc.Output=Ytemp0; if (noscale) { unpack(&sc); } else { scale(&sc); } //first Y line scaled dmaGet(InputY0[selY0In],YIp+dmapos[hfilterpos0[2*h+2]],iargs->srcW,tgiy0[selY0In]); // printf("dma: %d\n",2*h+2); if (!noscale) { //if we are scaling we also need the second line dmaGet(InputY1[selY1In],YIp+dmapos[hfilterpos1[2*h+2]],iargs->srcW,tgiy1[selY1In]); } // printf("dma: %d\n",2*h+2); selY0In=selY0In^1; selY1In=selY1In^1; sc.wHfilter=weightHfilter[2*h+1]; dmaWaitTag(tgiy0[selY0In]); dmaWaitTag(tgiy1[selY0In]); sc.source00=InputY0[selY0In]; sc.source01=InputY1[selY0In]; sc.Output=Ytemp1; if (noscale) { unpack(&sc); } else { scale(&sc); } //second Y line scaled dmaGet(InputY0[selY0In],YIp+dmapos[hfilterpos0[2*h+3]],iargs->srcW,tgiy0[selY0In]); if(!noscale) { //if we are scaling we also need the second line dmaGet(InputY1[selY1In],YIp+dmapos[hfilterpos1[2*h+3]],iargs->srcW,tgiy1[selY1In]); } selY0In=selY0In^1; selY1In=selY1In^1; // printf("dma: %d\n",2*h+3); if (srcsmallcroma) //these settings applly for both U and V { sc.smallcroma=1; if ((hfilterpos0[h]&1)==1) { sc.smallcromaline0=1; } else { sc.smallcromaline0=0; } if ((hfilterpos1[h]&1)==1){ sc.smallcromaline1=1; } else { sc.smallcromaline1=0; } if (((hfilterpos0[h]&1)==0)&&((hfilterpos1[h]&1)==0)) { sc.smallcroma=0; //both lines are 128 bit alligned only when doing extreme downscaling can this happen } } // if (noscale) { // sc.width=crblockdst0;//crblockdst1; // } else { // sc.width=crblockdst0; // } sc.width=iargs->dstW>>1; sc.wHfilter=weightHfilter[h]; dmaWaitTag(tgiu0[selCrIn]); dmaWaitTag(tgiu1[selCrIn]); sc.Output=Utemp; sc.source00=InputU0[selCrIn]; sc.source01=InputU1[selCrIn]; if (noscale) { unpack(&sc); } else { scale(&sc); } dmaWaitTag(tgiv0[selCrIn]); dmaWaitTag(tgiv1[selCrIn]); sc.Output=Vtemp; sc.source00=InputV0[selCrIn]; sc.source01=InputV1[selCrIn]; if (noscale) { unpack(&sc); } else { scale(&sc); } dmaGet(InputV0[selCrIn],VIp+dmacromapos[hfilterpos0[h+2]],crblock1,tgiu0[selCrIn]); //this is allways pos 0 dmaGet(InputU0[selCrIn],UIp+dmacromapos[hfilterpos0[h+2]],crblock1,tgiv0[selCrIn]); if(!noscale) { //if we are scaling we also need the second line dmaGet(InputV1[selCrIn],VIp+dmacromapos[hfilterpos1[h+2]],crblock1,tgiu1[selCrIn]); dmaGet(InputU1[selCrIn],UIp+dmacromapos[hfilterpos1[h+2]],crblock1,tgiv1[selCrIn]); } selCrIn=selCrIn^1; dmaWaitTag(tgo0[LineSelOut]); dmaWaitTag(tgo1[LineSelOut]); yuv420toARGBfloat(Ytemp0,Ytemp1,Utemp,Vtemp,Output0[LineSelOut],Output1[LineSelOut],iargs->dstW,iargs->maxwidth); //colorspace convert results dmaPut(Output0[LineSelOut],Op,iargs->dstW*4,tgo0[LineSelOut]); Op=Op+iargs->maxwidth*4; dmaPut(Output1[LineSelOut],Op,iargs->dstW*4,tgo1[LineSelOut]); Op=Op+iargs->maxwidth*4; LineSelOut=LineSelOut^1; } dmaWaitTag(tgo0[LineSelOut^1]); //wait for last write. dmaWaitTag(tgo1[LineSelOut^1]); //wait for last write. // printf("Image done\n"); if (iargs->MessageForm==INTR) { while (spu_stat_out_intr_mbox() == 0); msg=RDY; spu_writech(SPU_WrOutIntrMbox, msg); waiting=1; } if (iargs->MessageForm==HARD) { while (spu_stat_out_mbox() == 0); msg=RDY; spu_write_out_mbox(msg); waiting=1; } // fprintf(stderr,"spu_yuvscaler: Waiting\n"); while (waiting){ while (spu_stat_in_mbox() == 0); msg=spu_read_in_mbox(); if (msg == RUN){ selOut = selOut ^ 1; // flips the output buffer pointers selIn = selIn ^ 1; // flips the input buffer pointers waiting=0; } else if (msg == STOP) { // fprintf(stderr,"spu_yuvscaler: Stopping\n"); waiting=0; } else if (msg == UPDATE) { // fprintf(stderr,"spu_yuvscaler: Update\n"); dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); //getting neccesary data to process the new image first=1; // update filters to reflect the new image! // selOut=0; // no need to change these. that can be done by the run. // selIn=0; } } } return 0; }
int main( unsigned long long id __attribute__ ((unused)), unsigned long long argv ) { unsigned int i, EA = argv; /* * Allocate memory * */ // Info LS_ShaderInfo shaderinfo; // Memory for the shader unsigned int *shader[__NUMBER_OF_SHADERS]; for( i = 0 ; i < __NUMBER_OF_SHADERS ; i++ ) { shader[i] = (unsigned int *)_malloc_align( __SHADER_SIZE, 7 ); } // Memory for the blocks char *blocks[(__NUMBER_OF_BLOCKS_IN_MEM) * 2]; for( i = 0 ; i < __NUMBER_OF_BLOCKS_IN_MEM * 2 ; i++ ) { blocks[i] = (char *)_malloc_align( __BLOCK_DATA_SIZE, 7 ); shaderinfo.LS_blockDataArea[i] = blocks[i]; } // Memory for metadata char *meta = (char *)_malloc_align( __META_DATA_SIZE, 7 ); shaderinfo.LS_shaderMemory = meta; Operation_t myop[__NUMBER_OF_SHADERS]; unsigned int PPE_addr[__NUMBER_OF_SHADERS] __attribute__((aligned(16))); unsigned int EA_result[__NUMBER_OF_SHADERS]; // Getting SPE id SPE_id = mb_getmbox( ); // SELF CHECK //if ( SPE_id == 0 ) SelfCheck(); //printf( "SPE[%u]: SPE_* is at %#x\n", SPE_id, EA ); //printf( "[SPE(%u)]Check!!!\n", SPE_id ); // SPU program fragment prototype void (*run)( unsigned int SPE_id, LS_ShaderInfo *info, Operation_t *myop, Functions_t *funcs ) = NULL; void (*runr)( unsigned int SPE_id, unsigned int EA_result, LS_ShaderInfo *info, Operation_t *myop, Functions_t *funcs ) = NULL; // Getting SPE id unsigned int seed = mb_getmbox( ); // Init random mc_rand_ks_init( seed ); // Common Functions Functions_t funcs; funcs.printuint = PrintUInt; funcs.printint = PrintInt; funcs.printe = PrintE; funcs.printchar = PrintChar; funcs.printfloat = PrintFloat; funcs.printaddr = PrintAddr; funcs.printstr = PrintString; funcs.printfloat3 = PrintFloat3; funcs.printfloat3v = PrintFloat3v; funcs.printfloatv = PrintFloatv; funcs.printfloatrow = PrintFloatRow; //funcs.printf = printf2; funcs.rand_0_to_1_f = mc_rand_ks_0_to_1_f4; //funcs.rand_0_to_1_fm = mc_rand_ks_0_to_1_array_f4; unsigned int running = 1, task = 0; // prof_clear(); // prof_start(); unsigned int idt; // THA LOOP while( running ) { task = mb_getmbox( ); Printf1( "[SPE(%u)]Got state %u\n", SPE_id, task ); // Transfer operation and run shader with no return value // 0 -> 99 if( task < 100 ) { idt = task; CHECKBOUNDS( idt ); Printf1( "SPE[%u]: Get shader for slot %u\n", SPE_id, idt ); GetSPEAddr( EA, PPE_addr ); GetOperation( PPE_addr[idt], &myop[idt] ); GetShader( (unsigned int)myop[idt].EA_shader, myop[idt].shaderSize, shader[idt] ); Printf1( "SPE[%u]: Shader recieved for slot\n", SPE_id ); run = (void *)shader[0]; Printf1( "SPE[%u]: --- Running shader! ----------\n", SPE_id ); run( SPE_id, &shaderinfo, &myop[idt], &funcs ); Printf1( "SPE[%u]: --- End of shader! -----------\n", SPE_id ); spu_writech( SPU_WrOutMbox, 1 ); } // Transfer operation and run shader WITH return value // 100 -> 199 else if( task < 200 ) { idt = task - 100; CHECKBOUNDS( idt ); Printf1( "SPE[%u]: Get shader with return value for slot %u\n", SPE_id, idt ); EA_result[idt] = mb_getmbox( ); GetSPEAddr( EA, PPE_addr ); GetOperation( PPE_addr[idt], &myop[idt] ); GetShader( (unsigned int)myop[idt].EA_shader, myop[idt].shaderSize, shader[idt] ); runr = (void *)shader[idt]; Printf1( "SPE[%u]: --- Running shader! ----------\n", SPE_id ); runr( SPE_id, EA_result[idt], &shaderinfo, &myop[idt], &funcs ); Printf1( "SPE[%u]: --- End of shader! -----------\n", SPE_id ); spu_writech( SPU_WrOutMbox, 1 ); } // Transfer operation and shader WITHOUT return value // 200 -> 299 else if( task < 300 ) { idt = task - 200; CHECKBOUNDS( idt ); Printf1( "SPE[%u]: Get shader for slot %u\n", SPE_id, idt ); GetSPEAddr( EA, PPE_addr ); GetOperation( PPE_addr[idt], &myop[idt] ); GetShader( (unsigned int)myop[idt].EA_shader, myop[idt].shaderSize, shader[idt] ); spu_writech( SPU_WrOutMbox, 1 ); } // Run shader with no return value // 300 -> 399 else if( task < 400 ) { idt = task - 300; CHECKBOUNDS( idt ); Printf1( "SPE[%u]: Run shader from slot %u\n", SPE_id, idt ); run = (void *)shader[idt]; run( SPE_id, &shaderinfo, &myop[idt], &funcs ); spu_writech( SPU_WrOutMbox, 1 ); } // Transfer operation and shader WITH return value // 400 -> 499 else if( task < 500 ) { idt = task - 400; CHECKBOUNDS( idt ); Printf1( "SPE[%u]: Get shader with return value for slot %u\n", SPE_id, idt ); EA_result[idt] = mb_getmbox( ); GetSPEAddr( EA, PPE_addr ); GetOperation( PPE_addr[idt], &myop[idt] ); GetShader( (unsigned int)myop[idt].EA_shader, myop[idt].shaderSize, shader[idt] ); Printf1( "SPE[%u]: Done getting operation %u\n", SPE_id, idt ); spu_writech( SPU_WrOutMbox, 1 ); } // Run shader WITH return value // 500 -> 599 else if( task < 600 ) { idt = task - 500; CHECKBOUNDS( idt ); Printf1( "SPE[%u]: Run shader with return value from slot %u\n", SPE_id, idt ); runr = (void *)shader[0]; runr( SPE_id, EA_result[idt], &shaderinfo, &myop[0], &funcs ); spu_writech( SPU_WrOutMbox, 1 ); } // Sanity check!!! else if( task == 1000 ) { spu_writech( SPU_WrOutMbox, 123 ); } // DEFAULT else { Printf1( "[SPE(%u)]No such instruction, quitting\n", SPE_id ); running = 0; } // switch( task ) // { // case 0: // QUIT // Printf1( "[SPE(%u)]Quitting\n", SPE_id ); // // running = 0; // break; // // /* // * Get an operation with no return value // * // */ // case 1: // Get operation and run // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[0], &myop[0] ); // GetShader( (unsigned int)myop[0].EA_shader, myop[0].shaderSize, shader[0] ); // run = (void *)shader[0]; // // Printf1( "SPE[%u]: --- Running shader! ----------\n", SPE_id ); // run( SPE_id, &shaderinfo, &myop[0], &funcs ); // Printf1( "SPE[%u]: --- End of shader! -----------\n", SPE_id ); // // spu_writech( SPU_WrOutMbox, 1 ); // break; // // /* // * Get an operation with a return value // * // */ // case 2: // Get operation with return value and run // EA_result = mb_getmbox( ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[0], &myop[0] ); // GetShader( (unsigned int)myop[0].EA_shader, myop[0].shaderSize, shader[0] ); // runr = (void *)shader[0]; // // Printf1( "SPE[%u]: --- Running shader! ----------\n", SPE_id ); // runr( SPE_id, EA_result, &shaderinfo, &myop[0], &funcs ); // // Printf1( "SPE[%u]: --- End of shader! -----------\n", SPE_id ); // // spu_writech( SPU_WrOutMbox, 1 ); // break; // // case 3: // Get operation without return value NO RUN! // // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[0], &myop[0] ); // GetShader( (unsigned int)myop[0].EA_shader, myop[0].shaderSize, shader[0] ); // run = (void *)shader[0]; // // spu_writech( SPU_WrOutMbox, 1 ); // break; // // // case 4: // Get operation with return value NO RUN! // // EA_result = mb_getmbox( ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[0], &myop[0] ); // GetShader( (unsigned int)myop[0].EA_shader, myop[0].shaderSize, shader[0] ); // runr = (void *)shader[0]; // // spu_writech( SPU_WrOutMbox, 1 ); // break; // // case 5: // RUN!!!! with _NO_ return value // Printf1( "SPE[%u]: --- Running shader! ----------\n", SPE_id ); // // run( SPE_id, &shaderinfo, &myop[0], &funcs ); // Printf1( "SPE[%u]: --- End of shader! -----------\n", SPE_id ); // spu_writech( SPU_WrOutMbox, 1 ); // break; // // case 6: // RUN!!!! with return value // // Printf1( "SPE[%u]: --- Running shader! ----------\n", SPE_id ); // // runr( SPE_id, EA_result, &shaderinfo, &myop[0], &funcs ); // // Printf1( "SPE[%u]: --- End of shader! -----------\n", SPE_id ); // // spu_writech( SPU_WrOutMbox, 1 ); // break; // // /* // * Operations with no return values // */ // case 100: // Get a shader for slot 0 // Printf1( "SPE[%u]: Get shader for slot 0\n", SPE_id ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[0], &myop[0] ); // GetShader( (unsigned int)myop[0].EA_shader, myop[0].shaderSize, shader[0] ); // break; // // case 101: // Get a shader for slot 1 // Printf1( "SPE[%u]: Get shader for slot 1\n", SPE_id ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[1], &myop[1] ); // GetShader( (unsigned int)myop[1].EA_shader, myop[1].shaderSize, shader[1] ); // break; // // case 102: // Get a shader for slot 2 // Printf1( "SPE[%u]: Get shader for slot 2\n", SPE_id ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[2], &myop[2] ); // GetShader( (unsigned int)myop[2].EA_shader, myop[2].shaderSize, shader[2] ); // break; // // case 103: // Get a shader for slot 3 // Printf1( "SPE[%u]: Get shader for slot 3\n", SPE_id ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[3], &myop[3] ); // GetShader( (unsigned int)myop[3].EA_shader, myop[3].shaderSize, shader[3] ); // break; // // case 110: // Run slot 0 // Printf1( "SPE[%u]: Run shader from slot 0\n", SPE_id ); // run = (void *)shader[0]; // run( SPE_id, &shaderinfo, &myop[0], &funcs ); // spu_writech( SPU_WrOutMbox, 1 ); // break; // // case 111: // Run slot 1 // Printf1( "SPE[%u]: Run shader from slot 1\n", SPE_id ); // run = (void *)shader[1]; // run( SPE_id, &shaderinfo, &myop[1], &funcs ); // spu_writech( SPU_WrOutMbox, 1 ); // break; // // case 112: // Run slot 2 // Printf1( "SPE[%u]: Run shader from slot 2\n", SPE_id ); // run = (void *)shader[2]; // run( SPE_id, &shaderinfo, &myop[2], &funcs ); // spu_writech( SPU_WrOutMbox, 1 ); // break; // // case 113: // Run slot 3 // Printf1( "SPE[%u]: Run shader from slot 3\n", SPE_id ); // run = (void *)shader[3]; // Printf1( "SPE[%u]: Run shader from slot 3\n", SPE_id ); // run( SPE_id, &shaderinfo, &myop[3], &funcs ); // spu_writech( SPU_WrOutMbox, 1 ); // break; // // /* // * Update operations // */ // // case 120: // Update operation for slot 0 // Printf1( "SPE[%u]: Update operation for slot 0\n", SPE_id ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[0], &myop[0] ); // break; // // case 121: // Update operation for slot 1 // Printf1( "SPE[%u]: Update operation for slot 1\n", SPE_id ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[1], &myop[1] ); // break; // // case 122: // Update operation for slot 2 // Printf1( "SPE[%u]: Update operation for slot 2\n", SPE_id ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[2], &myop[2] ); // break; // // case 123: // Update operation for slot 3 // Printf1( "SPE[%u]: Update operation for slot 3\n", SPE_id ); // GetSPEAddr( EA, PPE_addr ); // GetOperation( PPE_addr[3], &myop[3] ); // break; // // /* // * Operations with return values // */ // case 200: // Get a shader for slot 0 // Printf1( "SPE[%u]: Run shader with return value from slot 0\n", SPE_id ); // EA_result = mb_getmbox( ); // GetOperation( PPE_addr[0], &myop[0] ); // GetShader( (unsigned int)myop[0].EA_shader, myop[0].shaderSize, shader[0] ); // break; // // case 201: // Get a shader for slot 1 // Printf1( "SPE[%u]: Run shader with return value from slot 1\n", SPE_id ); // EA_result = mb_getmbox( ); // GetOperation( PPE_addr[0], &myop[1] ); // GetShader( (unsigned int)myop[1].EA_shader, myop[1].shaderSize, shader[1] ); // break; // // case 210: // Run slot 0 // runr = (void *)shader[0]; // runr( SPE_id, EA_result, &shaderinfo, &myop[0], &funcs ); // spu_writech( SPU_WrOutMbox, 1 ); // break; // // case 211: // Run slot 1 // runr = (void *)shader[1]; // runr( SPE_id, EA_result, &shaderinfo, &myop[1], &funcs ); // spu_writech( SPU_WrOutMbox, 1 ); // break; // // case 1000: // Sanity check // spu_writech( SPU_WrOutMbox, 123 ); // break; // // // default: // Printf1( "[SPE(%u)]No such instruction, quitting\n", SPE_id ); // running = 0; // } } //prof_stop(); return 1; }
void spu_thread_group_exit(int status) { spu_writech(SPU_WrOutMbox,status); spu_stop(0x101); }
int main(unsigned long long speid, unsigned long long argp, unsigned long long envp) { int tgi0[2]; int tgo0[2]; int tgio0[2]; tgi0[0]=1; tgi0[1]=2; tgio0[0]=11; tgio0[1]=12; tgo0[0]=13; tgo0[1]=14; /* tgo1[0]=15; tgo1[1]=16;*/ int selOut = 0; int selIn = 0; int msg=RUN; int waiting=0; int tag = 31; struct img_args *iargs; iargs =(struct img_args*)memalign(128,sizeof(*iargs)); dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); printf("spu_blit_yuv422_to_argb: SRC width %d,DST width %d\n",iargs->src_w,iargs->drw_w); printf("spu_blit_yuv422_to_argb: SRC height %d,DST height %d\n",iargs->src_h,iargs->drw_h); while (spu_stat_in_mbox() == 0); msg=spu_read_in_mbox(); // first=0; vector unsigned char *InOutbuffer[2]; vector unsigned char *Inbuffer[2]; vector unsigned char *Outbuffer[2]; int Outwidth=(4*iargs->drw_w+3)&~3; int Inwidth=(2*iargs->src_w+7)&~7; Inbuffer[0]=(vector unsigned char*)memalign(128,Inwidth); Inbuffer[1]=(vector unsigned char*)memalign(128,Inwidth); if (iargs->BLEND) { InOutbuffer[0]=(vector unsigned char*)memalign(128,Outwidth); InOutbuffer[1]=(vector unsigned char*)memalign(128,Outwidth); } Outbuffer[0]=(vector unsigned char*)memalign(128,Outwidth); Outbuffer[1]=(vector unsigned char*)memalign(128,Outwidth); unsigned long long Inp,Outp,InOutp; int i=0; // int update=1; while (msg!=STOP) { selOut = 0; selIn = 0; Inp=iargs->Inp0[0]; InOutp=iargs->Outp0[0]; Outp=iargs->Outp0[0]; dmaGet(Inbuffer[0],Inp,Inwidth,tgi0[0]); Inp=Inp+iargs->Istride[0]*2; dmaGet(Inbuffer[1],Inp,Inwidth,tgi0[1]); Inp=Inp+iargs->Istride[0]*2; // if (iargs->BLEND) // { // dmaGet(InOutbuffer[0],InOutp,Outwidth,tgio0[0]); // InOutp=InOutp+iargs->Ostride[0]*4; // dmaGet(InOutbuffer[1],InOutp,Outwidth,tgio0[1]); // InOutp=InOutp+iargs->Ostride[0]*4; // } selIn=0; selOut=0; for (i=0;i < iargs->drw_h ;i++) { dmaWaitTag(tgi0[selIn]); // if (iargs->BLEND) // dmaWaitTag(tgio0[selIn]); dmaWaitTag(tgo0[selOut]); if (iargs->SourceFormat==YUY2||iargs->SourceFormat==YUYV422) { yuv422_to_argb(Inbuffer[selIn],Outbuffer[selOut],iargs->drw_w); // printf("spe_blitter: YUV422->ARGB\n"); } //yuv420_to_yuv2(Yinbuffer[selIn],Uinbuffer[selIn],Vinbuffer[selIn],Outbuffer[selOut],iargs->Istride[0]); // if (iargs->BLEND) // blend(InOutbuffer[selIn],OutBuffer[selOut],iargs->ALPHA,iargs->SourceFormat); dmaPut(Outbuffer[selOut],Outp,Outwidth,tgo0[selOut]); // if (iargs->BLEND){ // dmaGet(InOutbuffer[selIn],InOutp,Outwidth,tgio0[selIn]); // InOutp=InOutp+iargs->Ostride[0]; // // } dmaGet(Inbuffer[selIn],Inp,Inwidth,tgi0[selIn]); Inp=Inp+iargs->Istride[0]*2; Outp=Outp+iargs->Ostride[0]*4; selIn=selIn^1; selOut=selOut^1; } while (spu_stat_out_intr_mbox() == 0); msg=RDY; spu_writech(SPU_WrOutIntrMbox, msg); waiting=1; while (waiting){ while (spu_stat_in_mbox() == 0); msg=spu_read_in_mbox(); if (msg == RUN){ waiting=0; } else if (msg == STOP) { waiting=0; } else if (msg == UPDATE) { tag=30; dmaGetnWait(iargs,(unsigned int)argp,(int)envp,tag); //getting neccesary data to process the new image // // update=1; // update filters to reflect the new image! // Outwidth=(iargs->drw_w+3)&~3; // Inwidth=(iargs->src_w+7)&~7; // free(Inbuffer[0]); // free(Inbuffer[1]); // // free(Outbuffer[0]); // free(Outbuffer[1]); // // Inbuffer[0]=(vector unsigned char*)memalign(128,Inwidth); // Inbuffer[1]=(vector unsigned char*)memalign(128,Inwidth); // // if (iargs->BLEND) // { // free(InOutbuffer[0]); // free(InOutbuffer[1]); // InOutbuffer[0]=(vector unsigned char*)memalign(128,Outwidth); // InOutbuffer[1]=(vector unsigned char*)memalign(128,Outwidth); // } // // Outbuffer[0]=(vector unsigned char*)memalign(128,Outwidth); // Outbuffer[1]=(vector unsigned char*)memalign(128,Outwidth); } } } return 0; }