code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags) { /* Generate: * MOV reg,value * MOV xreg,reg * Not so efficient. We should at least do a PXOR for 0. */ assert(mask[xreg] & XMMREGS); assert(sz == 4 || sz == 8); CodeBuilder cdb; if (I32 && sz == 8) { unsigned r; regm_t rm = ALLREGS; cdb.append(allocreg(&rm,&r,TYint)); // allocate scratch register union { targ_size_t s; targ_long l[2]; } u; u.l[1] = 0; u.s = value; targ_long *p = &u.l[0]; cdb.append(movregconst(CNIL,r,p[0],0)); cdb.genfltreg(STO,r,0); // MOV floatreg,r cdb.append(movregconst(CNIL,r,p[1],0)); cdb.genfltreg(STO,r,4); // MOV floatreg+4,r unsigned op = xmmload(TYdouble, true); cdb.genxmmreg(op,xreg,0,TYdouble); // MOVSD XMMreg,floatreg } else { unsigned reg; cdb.append(regwithvalue(CNIL,ALLREGS,value,®,(sz == 8) ? 64 : 0)); cdb.gen2(LODD,modregxrmx(3,xreg-XMM0,reg)); // MOVD xreg,reg if (sz == 8) code_orrex(cdb.last(), REX_W); checkSetVex(cdb.last(), TYulong); } return cdb.finish(); }
code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags) { /* Generate: * MOV reg,value * MOV xreg,reg * Not so efficient. We should at least do a PXOR for 0. */ assert(mask[xreg] & XMMREGS); assert(sz == 4 || sz == 8); code *c; if (I32 && sz == 8) { unsigned r; regm_t rm = ALLREGS; c = allocreg(&rm,&r,TYint); // allocate scratch register union { targ_size_t s; targ_long l[2]; } u; u.l[1] = 0; u.s = value; targ_long *p = &u.l[0]; c = movregconst(c,r,p[0],0); c = genfltreg(c,0x89,r,0); // MOV floatreg,r c = movregconst(c,r,p[1],0); c = genfltreg(c,0x89,r,4); // MOV floatreg+4,r unsigned op = xmmload(TYdouble); c = genfltreg(c,op,xreg - XMM0,0); // MOVSD XMMreg,floatreg } else { unsigned reg; c = regwithvalue(CNIL,ALLREGS,value,®,(sz == 8) ? 64 : 0); c = gen2(c,LODD,modregxrmx(3,xreg-XMM0,reg)); // MOVD xreg,reg if (sz == 8) code_orrex(c, REX_W); } return c; }
code *xmmpost(elem *e,regm_t *pretregs) { elem *e1 = e->E1; elem *e2 = e->E2; tym_t ty1 = tybasic(e1->Ety); CodeBuilder cdb; regm_t retregs; unsigned reg; bool regvar = FALSE; if (config.flags4 & CFG4optimized) { // Be careful of cases like (x = x+x+x). We cannot evaluate in // x if x is in a register. unsigned varreg; regm_t varregm; if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable doinreg(e1->EV.sp.Vsym,e2) // and we can compute directly into it ) { regvar = TRUE; retregs = varregm; reg = varreg; // evaluate directly in target register cdb.append(getregs(retregs)); // destroy these regs } } code cs; if (!regvar) { code *c = getlvalue(&cs,e1,0); // get EA cdb.append(c); retregs = XMMREGS & ~*pretregs; if (!retregs) retregs = XMMREGS; c = allocreg(&retregs,®,ty1); cdb.append(c); cs.Iop = xmmload(ty1, true); // MOVSD xmm,xmm_m64 code_newreg(&cs,reg - XMM0); cdb.gen(&cs); checkSetVex(cdb.last(), ty1); } // Result register regm_t resultregs = XMMREGS & *pretregs & ~retregs; if (!resultregs) resultregs = XMMREGS & ~retregs; unsigned resultreg; code *c = allocreg(&resultregs, &resultreg, ty1); cdb.append(c); cdb.gen2(xmmload(ty1,true),modregxrmx(3,resultreg-XMM0,reg-XMM0)); // MOVSS/D resultreg,reg checkSetVex(cdb.last(), ty1); regm_t rretregs = XMMREGS & ~(*pretregs | retregs | resultregs); if (!rretregs) rretregs = XMMREGS & ~(retregs | resultregs); c = codelem(e2,&rretregs,FALSE); // eval right leaf cdb.append(c); unsigned rreg = findreg(rretregs); unsigned op = xmmoperator(e1->Ety, e->Eoper); cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0)); // ADD reg,rreg checkSetVex(cdb.last(), e1->Ety); if (!regvar) { cs.Iop = xmmstore(ty1,true); // reverse operand order of MOVS[SD] cdb.gen(&cs); checkSetVex(cdb.last(), ty1); } if (e1->Ecount || // if lvalue is a CSE or regvar) // rvalue can't be a CSE { cdb.append(getregs_imm(retregs)); // necessary if both lvalue and // rvalue are CSEs (since a reg // can hold only one e at a time) cssave(e1,retregs,EOP(e1)); // if lvalue is a CSE } cdb.append(fixresult(e,resultregs,pretregs)); freenode(e1); return cdb.finish(); }
code *xmmopass(elem *e,regm_t *pretregs) { elem *e1 = e->E1; elem *e2 = e->E2; tym_t ty1 = tybasic(e1->Ety); unsigned sz1 = tysize[ty1]; regm_t rretregs = XMMREGS & ~*pretregs; if (!rretregs) rretregs = XMMREGS; code *cr = codelem(e2,&rretregs,FALSE); // eval right leaf unsigned rreg = findreg(rretregs); code cs; code *cl,*cg; regm_t retregs; unsigned reg; bool regvar = FALSE; if (config.flags4 & CFG4optimized) { // Be careful of cases like (x = x+x+x). We cannot evaluate in // x if x is in a register. unsigned varreg; regm_t varregm; if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable doinreg(e1->EV.sp.Vsym,e2) // and we can compute directly into it ) { regvar = TRUE; retregs = varregm; reg = varreg; // evaluate directly in target register cl = NULL; cg = getregs(retregs); // destroy these regs } } if (!regvar) { cl = getlvalue(&cs,e1,rretregs); // get EA retregs = *pretregs & XMMREGS & ~rretregs; if (!retregs) retregs = XMMREGS & ~rretregs; cg = allocreg(&retregs,®,ty1); cs.Iop = xmmload(ty1); // MOVSD xmm,xmm_m64 code_newreg(&cs,reg - XMM0); cg = gen(cg,&cs); } unsigned op = xmmoperator(e1->Ety, e->Eoper); code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0)); if (!regvar) { cs.Iop = xmmstore(ty1); // reverse operand order of MOVS[SD] gen(co,&cs); } if (e1->Ecount || // if lvalue is a CSE or regvar) // rvalue can't be a CSE { cl = cat(cl,getregs_imm(retregs)); // necessary if both lvalue and // rvalue are CSEs (since a reg // can hold only one e at a time) cssave(e1,retregs,EOP(e1)); // if lvalue is a CSE } co = cat(co,fixresult(e,retregs,pretregs)); freenode(e1); return cat4(cr,cl,cg,co); }