code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags) { /* Generate: * MOV reg,value * MOV xreg,reg * Not so efficient. We should at least do a PXOR for 0. */ assert(mask[xreg] & XMMREGS); assert(sz == 4 || sz == 8); CodeBuilder cdb; if (I32 && sz == 8) { unsigned r; regm_t rm = ALLREGS; cdb.append(allocreg(&rm,&r,TYint)); // allocate scratch register union { targ_size_t s; targ_long l[2]; } u; u.l[1] = 0; u.s = value; targ_long *p = &u.l[0]; cdb.append(movregconst(CNIL,r,p[0],0)); cdb.genfltreg(STO,r,0); // MOV floatreg,r cdb.append(movregconst(CNIL,r,p[1],0)); cdb.genfltreg(STO,r,4); // MOV floatreg+4,r unsigned op = xmmload(TYdouble, true); cdb.genxmmreg(op,xreg,0,TYdouble); // MOVSD XMMreg,floatreg } else { unsigned reg; cdb.append(regwithvalue(CNIL,ALLREGS,value,®,(sz == 8) ? 64 : 0)); cdb.gen2(LODD,modregxrmx(3,xreg-XMM0,reg)); // MOVD xreg,reg if (sz == 8) code_orrex(cdb.last(), REX_W); checkSetVex(cdb.last(), TYulong); } return cdb.finish(); }
code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags) { /* Generate: * MOV reg,value * MOV xreg,reg * Not so efficient. We should at least do a PXOR for 0. */ assert(mask[xreg] & XMMREGS); assert(sz == 4 || sz == 8); code *c; if (I32 && sz == 8) { unsigned r; regm_t rm = ALLREGS; c = allocreg(&rm,&r,TYint); // allocate scratch register union { targ_size_t s; targ_long l[2]; } u; u.l[1] = 0; u.s = value; targ_long *p = &u.l[0]; c = movregconst(c,r,p[0],0); c = genfltreg(c,0x89,r,0); // MOV floatreg,r c = movregconst(c,r,p[1],0); c = genfltreg(c,0x89,r,4); // MOV floatreg+4,r unsigned op = xmmload(TYdouble); c = genfltreg(c,op,xreg - XMM0,0); // MOVSD XMMreg,floatreg } else { unsigned reg; c = regwithvalue(CNIL,ALLREGS,value,®,(sz == 8) ? 64 : 0); c = gen2(c,LODD,modregxrmx(3,xreg-XMM0,reg)); // MOVD xreg,reg if (sz == 8) code_orrex(c, REX_W); } return c; }
code *xmmcnvt(elem *e,regm_t *pretregs) { unsigned op=0, regs; tym_t ty; unsigned char rex = 0; bool zx = false; // zero extend uint /* There are no ops for integer <-> float/real conversions * but there are instructions for them. In order to use these * try to fuse chained conversions. Be careful not to loose * precision for real to long. */ elem *e1 = e->E1; switch (e->Eoper) { case OPd_f: if (e1->Eoper == OPs32_d) ; else if (I64 && e1->Eoper == OPs64_d) rex = REX_W; else if (I64 && e1->Eoper == OPu32_d) { rex = REX_W; zx = true; } else { regs = XMMREGS; op = CVTSD2SS; ty = TYfloat; break; } // directly use si2ss regs = ALLREGS; e1 = e1->E1; op = CVTSI2SS; ty = TYfloat; break; case OPs32_d: goto Litod; case OPs64_d: rex = REX_W; goto Litod; case OPu32_d: rex = REX_W; zx = true; goto Litod; Litod: regs = ALLREGS; op = CVTSI2SD; ty = TYdouble; break; case OPd_s32: ty = TYint; goto Ldtoi; case OPd_u32: ty = TYlong; if (I64) rex = REX_W; goto Ldtoi; case OPd_s64: ty = TYlong; rex = REX_W; goto Ldtoi; Ldtoi: regs = XMMREGS; switch (e1->Eoper) { case OPf_d: e1 = e1->E1; op = CVTTSS2SI; break; case OPld_d: if (e->Eoper == OPd_s64) return cnvt87(e,pretregs); // precision /* FALL-THROUGH */ default: op = CVTTSD2SI; break; } break; case OPf_d: regs = XMMREGS; op = CVTSS2SD; ty = TYdouble; break; } assert(op); CodeBuilder cdb; cdb.append(codelem(e1, ®s, FALSE)); unsigned reg = findreg(regs); if (reg >= XMM0) reg -= XMM0; else if (zx) { assert(I64); cdb.append(getregs(regs)); cdb.append(genregs(CNIL,STO,reg,reg)); // MOV reg,reg to zero upper 32-bit code_orflag(cdb.last(),CFvolatile); } unsigned retregs = *pretregs; if (tyxmmreg(ty)) // target is XMM { if (!(*pretregs & XMMREGS)) retregs = XMMREGS; } else // source is XMM { assert(regs & XMMREGS); if (!(retregs & ALLREGS)) retregs = ALLREGS; } unsigned rreg; cdb.append(allocreg(&retregs,&rreg,ty)); if (rreg >= XMM0) rreg -= XMM0; cdb.gen2(op, modregxrmx(3,rreg,reg)); assert(I64 || !rex); if (rex) code_orrex(cdb.last(), rex); if (*pretregs != retregs) cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); }
code *xmmeq(elem *e, unsigned op, elem *e1, elem *e2,regm_t *pretregs) { tym_t tymll; unsigned reg; int i; code cs; elem *e11; bool regvar; /* TRUE means evaluate into register variable */ regm_t varregm; unsigned varreg; targ_int postinc; //printf("xmmeq(e1 = %p, e2 = %p, *pretregs = %s)\n", e1, e2, regm_str(*pretregs)); int e2oper = e2->Eoper; tym_t tyml = tybasic(e1->Ety); /* type of lvalue */ regm_t retregs = *pretregs; if (!(retregs & XMMREGS)) retregs = XMMREGS; // pick any XMM reg bool aligned = xmmIsAligned(e1); cs.Iop = (op == OPeq) ? xmmstore(tyml, aligned) : op; regvar = FALSE; varregm = 0; if (config.flags4 & CFG4optimized) { // Be careful of cases like (x = x+x+x). We cannot evaluate in // x if x is in a register. if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable doinreg(e1->EV.sp.Vsym,e2) && // and we can compute directly into it varregm & XMMREGS ) { regvar = TRUE; retregs = varregm; reg = varreg; /* evaluate directly in target register */ } } if (*pretregs & mPSW && !EOP(e1)) // if evaluating e1 couldn't change flags { // Be careful that this lines up with jmpopcode() retregs |= mPSW; *pretregs &= ~mPSW; } CodeBuilder cdb; cdb.append(scodelem(e2,&retregs,0,TRUE)); // get rvalue // Look for special case of (*p++ = ...), where p is a register variable if (e1->Eoper == OPind && ((e11 = e1->E1)->Eoper == OPpostinc || e11->Eoper == OPpostdec) && e11->E1->Eoper == OPvar && e11->E1->EV.sp.Vsym->Sfl == FLreg ) { postinc = e11->E2->EV.Vint; if (e11->Eoper == OPpostdec) postinc = -postinc; cdb.append(getlvalue(&cs,e11,RMstore | retregs)); freenode(e11->E2); } else { postinc = 0; cdb.append(getlvalue(&cs,e1,RMstore | retregs)); // get lvalue (cl == CNIL if regvar) } cdb.append(getregs_imm(regvar ? varregm : 0)); reg = findreg(retregs & XMMREGS); cs.Irm |= modregrm(0,(reg - XMM0) & 7,0); if ((reg - XMM0) & 8) cs.Irex |= REX_R; // Do not generate mov from register onto itself if (!(regvar && reg == XMM0 + ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0)))) { cdb.gen(&cs); // MOV EA+offset,reg if (op == OPeq) checkSetVex(cdb.last(), tyml); } if (e1->Ecount || // if lvalue is a CSE or regvar) // rvalue can't be a CSE { cdb.append(getregs_imm(retregs)); // necessary if both lvalue and // rvalue are CSEs (since a reg // can hold only one e at a time) cssave(e1,retregs,EOP(e1)); // if lvalue is a CSE } cdb.append(fixresult(e,retregs,pretregs)); Lp: if (postinc) { int reg = findreg(idxregm(&cs)); if (*pretregs & mPSW) { // Use LEA to avoid touching the flags unsigned rm = cs.Irm & 7; if (cs.Irex & REX_B) rm |= 8; cdb.genc1(0x8D,buildModregrm(2,reg,rm),FLconst,postinc); if (tysize(e11->E1->Ety) == 8) code_orrex(cdb.last(), REX_W); } else if (I64) { cdb.genc2(0x81,modregrmx(3,0,reg),postinc); if (tysize(e11->E1->Ety) == 8) code_orrex(cdb.last(), REX_W); } else { if (postinc == 1) cdb.gen1(0x40 + reg); // INC reg else if (postinc == -(targ_int)1) cdb.gen1(0x48 + reg); // DEC reg else { cdb.genc2(0x81,modregrm(3,0,reg),postinc); } } } freenode(e1); return cdb.finish(); }