//really hacky ~ //Isn't this now obsolete anyway ? (constprop pass should include it ..) // -> constprop has some small stability issues still, not ready to be used on ip/bios fully yet void PromoteConstAddress(RuntimeBlockInfo* blk) { bool is_const=false; u32 value; total_blocks++; for (size_t i=0;i<blk->oplist.size();i++) { shil_opcode* op=&blk->oplist[i]; if (is_const && op->op==shop_readm && op->rs1.is_reg() && op->rs1._reg==reg_r0) { u32 val=value; if (op->rs3.is_imm()) { val+=op->rs3._imm; op->rs3=shil_param(); } op->rs1=shil_param(FMT_IMM,val); } if (op->op==shop_mov32 && op->rs1.is_imm() && isdst(op,reg_r0) ) { is_const=true; value=op->rs1._imm; } else if (is_const && (isdst(op,reg_r0) || op->op==shop_ifb || op->op==shop_sync_sr) ) is_const=false; } }
void dec_fallback(u32 op) { shil_opcode opcd; opcd.op=shop_ifb; opcd.rs1=shil_param(FMT_IMM,OpDesc[op]->NeedPC()); opcd.rs2=shil_param(FMT_IMM,state.cpu.rpc+2); opcd.rs3=shil_param(FMT_IMM,op); blk->oplist.push_back(opcd); }
void Emit(shilop op,shil_param rd=shil_param(),shil_param rs1=shil_param(),shil_param rs2=shil_param(),u32 flags=0,shil_param rs3=shil_param(),shil_param rd2=shil_param()) { shil_opcode sp; sp.flags=flags; sp.op=op; sp.rd=(rd); sp.rd2=(rd2); sp.rs1=(rs1); sp.rs2=(rs2); sp.rs3=(rs3); sp.guest_offs=state.cpu.rpc-blk->addr; blk->oplist.push_back(sp); }
//"links" consts to each other void constlink(RuntimeBlockInfo* blk) { Sh4RegType def=NoReg; s32 val; for (size_t i=0;i<blk->oplist.size();i++) { shil_opcode* op=&blk->oplist[i]; if (op->op!=shop_mov32) def=NoReg; else { if (def!=NoReg && op->rs1.is_imm() && op->rs1._imm==val) { op->rs1=shil_param(def); } else if (def==NoReg && op->rs1.is_imm() && op->rs1._imm==0) { //def=op->rd._reg; val=op->rs1._imm; } } } }
//enjcond //this is a normally slower //however, cause of reg alloc stuff in arm, this //speeds up access to SR_T (pc_dyn is stored in reg, not mem) //This is temporary til that limitation is fixed on the reg alloc logic void enjcond(RuntimeBlockInfo* blk) { u32 rv[16]; bool isi[16]={0}; if (!blk->has_jcond && (blk->BlockType==BET_Cond_0||blk->BlockType==BET_Cond_1)) { shil_opcode jcnd; jcnd.op=shop_jcond; jcnd.rs1=shil_param(reg_sr_T); jcnd.rd=shil_param(reg_pc_dyn); jcnd.flags=0; blk->oplist.push_back(jcnd); blk->has_jcond=true; } }
bool dec_generic(u32 op) { DecMode mode;DecParam d;DecParam s;shilop natop;u32 e; if (OpDesc[op]->decode==0) return false; u64 inf=OpDesc[op]->decode; e=(u32)(inf>>32); mode=(DecMode)((inf>>24)&0xFF); d=(DecParam)((inf>>16)&0xFF); s=(DecParam)((inf>>8)&0xFF); natop=(shilop)((inf>>0)&0xFF); /* if ((op&0xF00F)==0x300E) { return false; }*/ /* if (mode==DM_ADC) return false; */ bool transfer_64=false; if (op>=0xF000) { state.info.has_fpu=true; //return false;//FPU off for now if (state.cpu.FPR64 /*|| state.cpu.FSZ64*/) return false; if (state.cpu.FSZ64 && (d==PRM_FRN_SZ || d==PRM_FRM_SZ || s==PRM_FRN_SZ || s==PRM_FRM_SZ)) { transfer_64=true; } } shil_param rs1,rs2,rs3,rd; dec_param(s,rs2,rs3,op); dec_param(d,rs1,rs3,op); switch(mode) { case DM_ReadSRF: Emit(shop_mov32,rs1,reg_sr_status); Emit(shop_or,rs1,rs1,reg_sr_T); break; case DM_WriteTOp: Emit(natop,reg_sr_T,rs1,rs2); break; case DM_DT: verify(natop==shop_sub); Emit(natop,rs1,rs1,rs2); Emit(shop_seteq,mk_reg(reg_sr_T),rs1,mk_imm(0)); break; case DM_Shift: if (natop==shop_shl && e==1) Emit(shop_shr,mk_reg(reg_sr_T),rs1,mk_imm(31)); else if (e==1) Emit(shop_and,mk_reg(reg_sr_T),rs1,mk_imm(1)); Emit(natop,rs1,rs1,mk_imm(e)); break; case DM_Rot: if (!(((s32)e>=0?e:-e)&0x1000)) { if ((s32)e<0) { //left rotate Emit(shop_shr,mk_reg(reg_sr_T),rs2,mk_imm(31)); e=-e; } else { //right rotate Emit(shop_and,mk_reg(reg_sr_T),rs2,mk_imm(1)); } } e&=31; Emit(natop,rs1,rs2,mk_imm(e)); break; case DM_BinaryOp://d=d op s if (e&1) Emit(natop,rs1,rs1,rs2,0,rs3); else Emit(natop,shil_param(),rs1,rs2,0,rs3); break; case DM_UnaryOp: //d= op s if (transfer_64 && natop==shop_mov32) natop=shop_mov64; if (natop==shop_cvt_i2f_n && state.cpu.RoundToZero) natop=shop_cvt_i2f_z; if (e&1) Emit(natop,shil_param(),rs1); else Emit(natop,rs1,rs2); break; case DM_WriteM: //write(d,s) { //0 has no effect, so get rid of it if (rs3.is_imm() && rs3._imm==0) rs3=shil_param(); state.info.has_writem=true; if (transfer_64) e=(s32)e*2; bool update_after=false; if ((s32)e<0) { if (rs1._reg!=rs2._reg) //reg shouldn't be updated if its written { Emit(shop_sub,rs1,rs1,mk_imm(-e)); } else { verify(rs3.is_null()); rs3=mk_imm(e); update_after=true; } } Emit(shop_writem,shil_param(),rs1,rs2,(s32)e<0?-e:e,rs3); if (update_after) { Emit(shop_sub,rs1,rs1,mk_imm(-e)); } } break; case DM_ReadM: //0 has no effect, so get rid of it if (rs3.is_imm() && rs3._imm==0) rs3=shil_param(); state.info.has_readm=true; if (transfer_64) e=(s32)e*2; Emit(shop_readm,rs1,rs2,shil_param(),(s32)e<0?-e:e,rs3); if ((s32)e<0) { if (rs1._reg!=rs2._reg)//the reg shouldn't be updated if it was just read. Emit(shop_add,rs2,rs2,mk_imm(-e)); } break; case DM_fiprOp: { shil_param rdd=mk_regi(rs1._reg+3); Emit(natop,rdd,rs1,rs2); } break; case DM_EXTOP: { Emit(natop,rs1,rs2,mk_imm(e==1?0xFF:0xFFFF)); } break; case DM_MUL: { shilop op; shil_param rd=mk_reg(reg_macl); shil_param rd2=shil_param(); switch((s32)e) { case 16: op=shop_mul_u16; break; case -16: op=shop_mul_s16; break; case -32: op=shop_mul_i32; break; case 64: op=shop_mul_u64; rd2 = mk_reg(reg_mach); break; case -64: op=shop_mul_s64; rd2 = mk_reg(reg_mach); break; default: die("DM_MUL: Failed to classify opcode"); } Emit(op,rd,rs1,rs2,0,shil_param(),rd2); } break; case DM_DIV0: { if (e==1) { if (MatchDiv32u(op,state.cpu.rpc)) { verify(!state.cpu.is_delayslot); //div32u Emit(shop_div32u,mk_reg(div_som_reg1),mk_reg(div_som_reg1),mk_reg(div_som_reg2),0,shil_param(),mk_reg(div_som_reg3)); Emit(shop_and,mk_reg(reg_sr_T),mk_reg(div_som_reg1),mk_imm(1)); Emit(shop_shr,mk_reg(div_som_reg1),mk_reg(div_som_reg1),mk_imm(1)); Emit(shop_div32p2,mk_reg(div_som_reg3),mk_reg(div_som_reg3),mk_reg(div_som_reg2),0,shil_param(reg_sr_T)); //skip the aggregated opcodes state.cpu.rpc+=128; blk->guest_cycles+=CPU_RATIO*64; } else { //clear QM (bits 8,9) u32 qm=(1<<8)|(1<<9); Emit(shop_and,mk_reg(reg_sr_status),mk_reg(reg_sr_status),mk_imm(~qm)); //clear T ! Emit(shop_mov32,mk_reg(reg_sr_T),mk_imm(0)); } } else { if (MatchDiv32s(op,state.cpu.rpc)) { verify(!state.cpu.is_delayslot); //div32s Emit(shop_div32s,mk_reg(div_som_reg1),mk_reg(div_som_reg1),mk_reg(div_som_reg2),0,shil_param(),mk_reg(div_som_reg3)); Emit(shop_and,mk_reg(reg_sr_T),mk_reg(div_som_reg1),mk_imm(1)); Emit(shop_sar,mk_reg(div_som_reg1),mk_reg(div_som_reg1),mk_imm(1)); Emit(shop_div32p2,mk_reg(div_som_reg3),mk_reg(div_som_reg3),mk_reg(div_som_reg2),0,shil_param(reg_sr_T)); //skip the aggregated opcodes state.cpu.rpc+=128; blk->guest_cycles+=CPU_RATIO*64; } else { //sr.Q=r[n]>>31; //sr.M=r[m]>>31; //sr.T=sr.M^sr.Q; //This is nasty because there isn't a temp reg .. //VERY NASTY //Clear Q & M Emit(shop_and,mk_reg(reg_sr_status),mk_reg(reg_sr_status),mk_imm(~((1<<8)|(1<<9)))); //sr.Q=r[n]>>31; Emit(shop_sar,mk_reg(reg_sr_T),rs1,mk_imm(31)); Emit(shop_and,mk_reg(reg_sr_T),mk_reg(reg_sr_T),mk_imm(1<<8)); Emit(shop_or,mk_reg(reg_sr_status),mk_reg(reg_sr_status),mk_reg(reg_sr_T)); //sr.M=r[m]>>31; Emit(shop_sar,mk_reg(reg_sr_T),rs2,mk_imm(31)); Emit(shop_and,mk_reg(reg_sr_T),mk_reg(reg_sr_T),mk_imm(1<<9)); Emit(shop_or,mk_reg(reg_sr_status),mk_reg(reg_sr_status),mk_reg(reg_sr_T)); //sr.T=sr.M^sr.Q; Emit(shop_xor,mk_reg(reg_sr_T),rs1,rs2); Emit(shop_shr,mk_reg(reg_sr_T),mk_reg(reg_sr_T),mk_imm(31)); } } } break; case DM_ADC: { Emit(natop,rs1,rs1,rs2,0,mk_reg(reg_sr_T),mk_reg(reg_sr_T)); } break; default: verify(false); } return true; }
shil_param mk_reg(Sh4RegType reg) { return shil_param(reg); }
shil_param mk_imm(u32 immv) { return shil_param(FMT_IMM,immv); }
//read_v4m3z1 void read_v4m3z1(RuntimeBlockInfo* blk) { int state=0; int st_sta=0; Sh4RegType reg_a; Sh4RegType reg_fb; for (size_t i=0;i<blk->oplist.size();i++) { shil_opcode* op=&blk->oplist[i]; bool a=false,b=false; if ((i+6)>blk->oplist.size()) break; if (state==0 && op->op==shop_readm && op->rd.is_r32f() && op->rs1.is_r32i() && op->rs3.is_null()) { if (op->rd._reg==reg_fr_0 || op->rd._reg==reg_fr_4 || op->rd._reg==reg_fr_8 || op->rd._reg==reg_fr_12) { reg_a=op->rs1._reg; reg_fb=op->rd._reg; st_sta=i; goto _next_st; } goto _fail; } else if (state < 8 && state & 1 && op->op==shop_add && op->rd._reg==reg_a && op->rs1.is_reg() && op->rs1._reg==reg_a && op->rs2.is_imm() && op->rs2._imm==4) { if (state==7) { u32 start=st_sta; for (int j=0;j<6;j++) { blk->oplist.erase(blk->oplist.begin()+start); } i=start+1; op=&blk->oplist[start+0]; op->op=shop_readm; op->flags=0x440; op->rd=shil_param(reg_fb==reg_fr_0?regv_fv_0: reg_fb==reg_fr_4?regv_fv_4: reg_fb==reg_fr_8?regv_fv_8: reg_fb==reg_fr_12?regv_fv_12:reg_sr_T); op->rd2=shil_param(); op->rs1=shil_param(reg_a); op->rs2=shil_param(); op->rs3=shil_param(); op=&blk->oplist[start+1]; op->op=shop_add; op->flags=0; op->rd=shil_param(reg_a); op->rd2=shil_param(); op->rs1=shil_param(reg_a); op->rs2=shil_param(FMT_IMM,16); op->rs3=shil_param(); goto _end; } else goto _next_st; } else if (state >1 && op->op==shop_readm && op->rd.is_r32f() && op->rd._reg==(reg_fb+state/2) && op->rs1.is_r32i() && op->rs1._reg==reg_a && op->rs3.is_null()) { goto _next_st; } else if ((a=(op->op==shop_mov32 && op->rd._reg==(reg_fb+3) && op->rs1.is_imm() && (op->rs1._imm==0x3f800000 /*|| op->rs1._imm==0*/))) || (b=(i>7 && op[-7].op==shop_mov32 && op[-7].rd._reg==(reg_fb+3) && op[-7].rs1.is_imm() && (op[-7].rs1._imm==0x3f800000 /*|| op[-7].rs1._imm==0*/))) ) { if (state==6) { if (b) st_sta--; if (a) printf("NOT B\b"); u32 start=st_sta; for (int j=0;j<5;j++) { blk->oplist.erase(blk->oplist.begin()+start); } i=start+1; op=&blk->oplist[start+0]; op->op=shop_readm; op->flags=0x431; op->rd=shil_param(reg_fb==reg_fr_0?regv_fv_0: reg_fb==reg_fr_4?regv_fv_4: reg_fb==reg_fr_8?regv_fv_8: reg_fb==reg_fr_12?regv_fv_12:reg_sr_T); op->rd2=shil_param(); op->rs1=shil_param(reg_a); op->rs2=shil_param(); op->rs3=shil_param(); op=&blk->oplist[start+1]; op->op=shop_add; op->flags=0; op->rd=shil_param(reg_a); op->rd2=shil_param(); op->rs1=shil_param(reg_a); op->rs2=shil_param(FMT_IMM,12); op->rs3=shil_param(); goto _end; } else goto _fail; } else goto _fail; die("wth"); _next_st: state ++; continue; _fail: if (state) i=st_sta; _end: state=0; } }