code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags) { /* Generate: * MOV reg,value * MOV xreg,reg * Not so efficient. We should at least do a PXOR for 0. */ assert(mask[xreg] & XMMREGS); assert(sz == 4 || sz == 8); CodeBuilder cdb; if (I32 && sz == 8) { unsigned r; regm_t rm = ALLREGS; cdb.append(allocreg(&rm,&r,TYint)); // allocate scratch register union { targ_size_t s; targ_long l[2]; } u; u.l[1] = 0; u.s = value; targ_long *p = &u.l[0]; cdb.append(movregconst(CNIL,r,p[0],0)); cdb.genfltreg(STO,r,0); // MOV floatreg,r cdb.append(movregconst(CNIL,r,p[1],0)); cdb.genfltreg(STO,r,4); // MOV floatreg+4,r unsigned op = xmmload(TYdouble, true); cdb.genxmmreg(op,xreg,0,TYdouble); // MOVSD XMMreg,floatreg } else { unsigned reg; cdb.append(regwithvalue(CNIL,ALLREGS,value,®,(sz == 8) ? 64 : 0)); cdb.gen2(LODD,modregxrmx(3,xreg-XMM0,reg)); // MOVD xreg,reg if (sz == 8) code_orrex(cdb.last(), REX_W); checkSetVex(cdb.last(), TYulong); } return cdb.finish(); }
code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags) { /* Generate: * MOV reg,value * MOV xreg,reg * Not so efficient. We should at least do a PXOR for 0. */ assert(mask[xreg] & XMMREGS); assert(sz == 4 || sz == 8); code *c; if (I32 && sz == 8) { unsigned r; regm_t rm = ALLREGS; c = allocreg(&rm,&r,TYint); // allocate scratch register union { targ_size_t s; targ_long l[2]; } u; u.l[1] = 0; u.s = value; targ_long *p = &u.l[0]; c = movregconst(c,r,p[0],0); c = genfltreg(c,0x89,r,0); // MOV floatreg,r c = movregconst(c,r,p[1],0); c = genfltreg(c,0x89,r,4); // MOV floatreg+4,r unsigned op = xmmload(TYdouble); c = genfltreg(c,op,xreg - XMM0,0); // MOVSD XMMreg,floatreg } else { unsigned reg; c = regwithvalue(CNIL,ALLREGS,value,®,(sz == 8) ? 64 : 0); c = gen2(c,LODD,modregxrmx(3,xreg-XMM0,reg)); // MOVD xreg,reg if (sz == 8) code_orrex(c, REX_W); } return c; }