STATIC void ecom(elem **pe) { int i,op; HCSArray hcsarraySave; unsigned hash; elem *e,*ehash; tym_t tym; e = *pe; assert(e); elem_debug(e); #ifdef DEBUG assert(e->Ecount == 0); //assert(e->Ecomsub == 0); #endif tym = tybasic(e->Ety); op = e->Eoper; switch (op) { case OPconst: case OPvar: case OPrelconst: break; case OPstreq: case OPpostinc: case OPpostdec: case OPeq: case OPaddass: case OPminass: case OPmulass: case OPdivass: case OPmodass: case OPshrass: case OPashrass: case OPshlass: case OPandass: case OPxorass: case OPorass: case OPvecsto: #if TX86 /* Reverse order of evaluation for double op=. This is so that */ /* the pushing of the address of the second operand is easier. */ /* However, with the 8087 we don't need the kludge. */ if (op != OPeq && tym == TYdouble && !config.inline8087) { if (EOP(e->E1)) ecom(&e->E1->E1); ecom(&e->E2); } else #endif { /* Don't mark the increment of an i++ or i-- as a CSE, if it */ /* can be done with an INC or DEC instruction. */ if (!(OTpost(op) && elemisone(e->E2))) ecom(&e->E2); /* evaluate 2nd operand first */ case OPnegass: if (EOP(e->E1)) /* if lvalue is an operator */ { if (e->E1->Eoper != OPind) elem_print(e); assert(e->E1->Eoper == OPind); ecom(&(e->E1->E1)); } } touchlvalue(e->E1); if (!OTpost(op)) /* lvalue of i++ or i-- is not a cse*/ { hash = cs_comphash(e->E1); vec_setbit(hash % CSVECDIM,csvec); addhcstab(e->E1,hash); // add lvalue to hcstab[] } return; case OPbtc: case OPbts: case OPbtr: case OPcmpxchg: ecom(&e->E1); ecom(&e->E2); touchfunc(0); // indirect assignment return; case OPandand: case OPoror: ecom(&e->E1); hcsarraySave = hcsarray; ecom(&e->E2); hcsarray = hcsarraySave; // no common subs by E2 return; /* if comsub then logexp() will */ /* break */ case OPcond: ecom(&e->E1); hcsarraySave = hcsarray; ecom(&e->E2->E1); // left condition hcsarray = hcsarraySave; // no common subs by E2 ecom(&e->E2->E2); // right condition hcsarray = hcsarraySave; // no common subs by E2 return; // can't be a common sub case OPcall: case OPcallns: ecom(&e->E2); /* eval right first */ /* FALL-THROUGH */ case OPucall: case OPucallns: ecom(&e->E1); touchfunc(1); return; case OPstrpar: /* so we don't break logexp() */ #if TX86 case OPinp: /* never CSE the I/O instruction itself */ #endif case OPprefetch: // don't CSE E2 or the instruction ecom(&e->E1); /* FALL-THROUGH */ case OPasm: case OPstrthis: // don't CSE these case OPframeptr: case OPgot: case OPctor: case OPdtor: case OPdctor: case OPmark: return; case OPddtor: touchall(); ecom(&e->E1); touchall(); return; case OPparam: #if TX86 case OPoutp: #endif ecom(&e->E1); case OPinfo: ecom(&e->E2); return; case OPcomma: case OPremquo: ecom(&e->E1); ecom(&e->E2); break; case OPvp_fp: case OPcvp_fp: ecom(&e->E1); touchaccess(e); break; case OPind: ecom(&e->E1); /* Generally, CSEing a *(double *) results in worse code */ if (tyfloating(tym)) return; break; case OPstrcpy: case OPstrcat: case OPmemcpy: case OPmemset: ecom(&e->E2); case OPsetjmp: ecom(&e->E1); touchfunc(0); return; default: /* other operators */ if (!EBIN(e)) WROP(e->Eoper); assert(EBIN(e)); case OPadd: case OPmin: case OPmul: case OPdiv: case OPor: case OPxor: case OPand: case OPeqeq: case OPne: #if TX86 case OPscale: case OPyl2x: case OPyl2xp1: #endif ecom(&e->E1); ecom(&e->E2); break; case OPstring: case OPaddr: case OPbit: WROP(e->Eoper); elem_print(e); assert(0); /* optelem() should have removed these */ /* NOTREACHED */ // Explicitly list all the unary ops for speed case OPnot: case OPcom: case OPneg: case OPuadd: case OPabs: case OPrndtol: case OPrint: case OPpreinc: case OPpredec: case OPbool: case OPstrlen: case OPs16_32: case OPu16_32: case OPd_s32: case OPd_u32: case OPs32_d: case OPu32_d: case OPd_s16: case OPs16_d: case OP32_16: case OPd_f: case OPf_d: case OPd_ld: case OPld_d: case OPc_r: case OPc_i: case OPu8_16: case OPs8_16: case OP16_8: case OPu32_64: case OPs32_64: case OP64_32: case OPmsw: case OPu64_128: case OPs64_128: case OP128_64: case OPd_s64: case OPs64_d: case OPd_u64: case OPu64_d: case OPstrctor: case OPu16_d: case OPd_u16: case OParrow: case OPvoid: case OPbsf: case OPbsr: case OPbswap: case OPpopcnt: case OPvector: case OPld_u64: #if TX86 case OPsqrt: case OPsin: case OPcos: #endif case OPoffset: case OPnp_fp: case OPnp_f16p: case OPf16p_np: case OPvecfill: ecom(&e->E1); break; case OPhalt: return; } /* don't CSE structures or unions or volatile stuff */ if (tym == TYstruct || tym == TYvoid || e->Ety & mTYvolatile #if TX86 || tyxmmreg(tym) // don't CSE doubles if inline 8087 code (code generator can't handle it) || (tyfloating(tym) && config.inline8087) #endif ) return; hash = cs_comphash(e); /* must be AFTER leaves are done */ /* Search for a match in hcstab[]. * Search backwards, as most likely matches will be towards the end * of the list. */ #ifdef DEBUG if (debugx) dbg_printf("elem: %p hash: %6d\n",e,hash); #endif int csveci = hash % CSVECDIM; if (vec_testbit(csveci,csvec)) { for (i = hcsarray.top; i--;) { #ifdef DEBUG if (debugx) dbg_printf("i: %2d Hhash: %6d Helem: %p\n", i,hcstab[i].Hhash,hcstab[i].Helem); #endif if (hash == hcstab[i].Hhash && (ehash = hcstab[i].Helem) != NULL) { /* if elems are the same and we still have room for more */ if (el_match(e,ehash) && ehash->Ecount < 0xFF) { /* Make sure leaves are also common subexpressions * to avoid false matches. */ if (!OTleaf(op)) { if (!e->E1->Ecount) continue; if (OTbinary(op) && !e->E2->Ecount) continue; } ehash->Ecount++; *pe = ehash; #ifdef DEBUG if (debugx) dbg_printf("**MATCH** %p with %p\n",e,*pe); #endif el_free(e); return; } } } } else vec_setbit(csveci,csvec); addhcstab(e,hash); // add this elem to hcstab[] }
void sliceStructs() { if (debugc) printf("sliceStructs()\n"); size_t sia_length = globsym.top; /* 3 is because it is used for two arrays, sia[] and sia2[]. * sia2[] can grow to twice the size of sia[], as symbols can get split into two. */ SymInfo *sia = (SymInfo *)malloc(3 * sia_length * sizeof(SymInfo)); assert(sia); SymInfo *sia2 = sia + sia_length; bool anySlice = false; for (int si = 0; si < globsym.top; si++) { Symbol *s = globsym.tab[si]; //printf("slice1: %s\n", s->Sident); if ((s->Sflags & (GTregcand | SFLunambig)) != (GTregcand | SFLunambig)) { sia[si].canSlice = false; continue; } targ_size_t sz = type_size(s->Stype); if (sz != 2 * REGSIZE || tyfv(s->Stype->Tty) || tybasic(s->Stype->Tty) == TYhptr) // because there is no TYseg { sia[si].canSlice = false; continue; } switch (s->Sclass) { case SCfastpar: case SCregister: case SCauto: case SCshadowreg: case SCparameter: anySlice = true; sia[si].canSlice = true; sia[si].accessSlice = false; // We can't slice whole XMM registers if (tyxmmreg(s->Stype->Tty) && s->Spreg >= XMM0 && s->Spreg <= XMM15 && s->Spreg2 == NOREG) { sia[si].canSlice = false; } break; case SCstack: case SCpseudo: case SCstatic: case SCbprel: sia[si].canSlice = false; break; default: symbol_print(s); assert(0); } } if (!anySlice) goto Ldone; for (block *b = startblock; b; b = b->Bnext) { if (b->BC == BCasm) goto Ldone; if (b->Belem) sliceStructs_Gather(sia, b->Belem); } { // scope needed because of goto skipping declarations bool any = false; int n = 0; // the number of symbols added for (int si = 0; si < sia_length; si++) { sia2[si + n].canSlice = false; if (sia[si].canSlice) { // If never did access it as a slice, don't slice if (!sia[si].accessSlice) { sia[si].canSlice = false; continue; } /* Split slice-able symbol sold into two symbols, * (sold,snew) in adjacent slots in the symbol table. */ Symbol *sold = globsym.tab[si + n]; size_t idlen = 2 + strlen(sold->Sident) + 2; char *id = (char *)malloc(idlen + 1); assert(id); sprintf(id, "__%s_%d", sold->Sident, REGSIZE); if (debugc) printf("creating slice symbol %s\n", id); Symbol *snew = symbol_calloc(id, idlen); free(id); snew->Sclass = sold->Sclass; snew->Sfl = sold->Sfl; snew->Sflags = sold->Sflags; if (snew->Sclass == SCfastpar || snew->Sclass == SCshadowreg) { snew->Spreg = sold->Spreg2; snew->Spreg2 = NOREG; sold->Spreg2 = NOREG; } type_free(sold->Stype); sold->Stype = type_fake(sia[si].ty0); sold->Stype->Tcount++; snew->Stype = type_fake(sia[si].ty1); snew->Stype->Tcount++; SYMIDX sinew = symbol_add(snew); for (int i = sinew; i > si + n + 1; --i) { globsym.tab[i] = globsym.tab[i - 1]; globsym.tab[i]->Ssymnum += 1; } globsym.tab[si + n + 1] = snew; snew->Ssymnum = si + n + 1; sia2[si + n].canSlice = true; sia2[si + n].si0 = si + n; sia2[si + n].ty0 = sia[si].ty0; sia2[si + n].ty1 = sia[si].ty1; ++n; any = true; } } if (!any) goto Ldone; } for (int si = 0; si < globsym.top; si++) { Symbol *s = globsym.tab[si]; assert(s->Ssymnum == si); } for (block *b = startblock; b; b = b->Bnext) { if (b->Belem) sliceStructs_Replace(sia2, b->Belem); } Ldone: free(sia); }
void FuncDeclaration::toObjFile(int multiobj) { FuncDeclaration *func = this; ClassDeclaration *cd = func->parent->isClassDeclaration(); int reverse; int has_arguments; //printf("FuncDeclaration::toObjFile(%p, %s.%s)\n", func, parent->toChars(), func->toChars()); //if (type) printf("type = %s\n", func->type->toChars()); #if 0 //printf("line = %d\n",func->getWhere() / LINEINC); EEcontext *ee = env->getEEcontext(); if (ee->EEcompile == 2) { if (ee->EElinnum < (func->getWhere() / LINEINC) || ee->EElinnum > (func->endwhere / LINEINC) ) return; // don't compile this function ee->EEfunc = func->toSymbol(); } #endif if (semanticRun >= PASSobj) // if toObjFile() already run return; // If errors occurred compiling it, such as bugzilla 6118 if (type && type->ty == Tfunction && ((TypeFunction *)type)->next->ty == Terror) return; if (!func->fbody) { return; } if (func->isUnitTestDeclaration() && !global.params.useUnitTests) return; if (multiobj && !isStaticDtorDeclaration() && !isStaticCtorDeclaration()) { obj_append(this); return; } assert(semanticRun == PASSsemantic3done); semanticRun = PASSobj; if (global.params.verbose) printf("function %s\n",func->toChars()); Symbol *s = func->toSymbol(); func_t *f = s->Sfunc; #if TARGET_WINDOS /* This is done so that the 'this' pointer on the stack is the same * distance away from the function parameters, so that an overriding * function can call the nested fdensure or fdrequire of its overridden function * and the stack offsets are the same. */ if (isVirtual() && (fensure || frequire)) f->Fflags3 |= Ffakeeh; #endif #if TARGET_OSX s->Sclass = SCcomdat; #else s->Sclass = SCglobal; #endif for (Dsymbol *p = parent; p; p = p->parent) { if (p->isTemplateInstance()) { s->Sclass = SCcomdat; break; } } /* Vector operations should be comdat's */ if (isArrayOp) s->Sclass = SCcomdat; if (isNested()) { // if (!(config.flags3 & CFG3pic)) // s->Sclass = SCstatic; f->Fflags3 |= Fnested; } else { const char *libname = (global.params.symdebug) ? global.params.debuglibname : global.params.defaultlibname; // Pull in RTL startup code if (func->isMain()) { objextdef("_main"); #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS obj_ehsections(); // initialize exception handling sections #endif #if TARGET_WINDOS objextdef("__acrtused_con"); #endif obj_includelib(libname); s->Sclass = SCglobal; } else if (strcmp(s->Sident, "main") == 0 && linkage == LINKc) { #if TARGET_WINDOS objextdef("__acrtused_con"); // bring in C startup code obj_includelib("snn.lib"); // bring in C runtime library #endif s->Sclass = SCglobal; } else if (func->isWinMain()) { objextdef("__acrtused"); obj_includelib(libname); s->Sclass = SCglobal; } // Pull in RTL startup code else if (func->isDllMain()) { objextdef("__acrtused_dll"); obj_includelib(libname); s->Sclass = SCglobal; } } cstate.CSpsymtab = &f->Flocsym; // Find module m for this function Module *m = NULL; for (Dsymbol *p = parent; p; p = p->parent) { m = p->isModule(); if (m) break; } IRState irs(m, func); Dsymbols deferToObj; // write these to OBJ file later irs.deferToObj = &deferToObj; TypeFunction *tf; enum RET retmethod; symbol *shidden = NULL; Symbol *sthis = NULL; tym_t tyf; tyf = tybasic(s->Stype->Tty); //printf("linkage = %d, tyf = x%x\n", linkage, tyf); reverse = tyrevfunc(s->Stype->Tty); assert(func->type->ty == Tfunction); tf = (TypeFunction *)(func->type); has_arguments = (tf->linkage == LINKd) && (tf->varargs == 1); retmethod = tf->retStyle(); if (retmethod == RETstack) { // If function returns a struct, put a pointer to that // as the first argument ::type *thidden = tf->next->pointerTo()->toCtype(); char hiddenparam[5+4+1]; static int hiddenparami; // how many we've generated so far sprintf(hiddenparam,"__HID%d",++hiddenparami); shidden = symbol_name(hiddenparam,SCparameter,thidden); shidden->Sflags |= SFLtrue | SFLfree; #if DMDV1 if (func->nrvo_can && func->nrvo_var && func->nrvo_var->nestedref) #else if (func->nrvo_can && func->nrvo_var && func->nrvo_var->nestedrefs.dim) #endif type_setcv(&shidden->Stype, shidden->Stype->Tty | mTYvolatile); irs.shidden = shidden; this->shidden = shidden; } else { // Register return style cannot make nrvo. // Auto functions keep the nrvo_can flag up to here, // so we should eliminate it before entering backend. nrvo_can = 0; } if (vthis) { assert(!vthis->csym); sthis = vthis->toSymbol(); irs.sthis = sthis; if (!(f->Fflags3 & Fnested)) f->Fflags3 |= Fmember; } Symbol **params; unsigned pi; // Estimate number of parameters, pi pi = (v_arguments != NULL); if (parameters) pi += parameters->dim; // Allow extra 2 for sthis and shidden params = (Symbol **)alloca((pi + 2) * sizeof(Symbol *)); // Get the actual number of parameters, pi, and fill in the params[] pi = 0; if (v_arguments) { params[pi] = v_arguments->toSymbol(); pi += 1; } if (parameters) { for (size_t i = 0; i < parameters->dim; i++) { VarDeclaration *v = (*parameters)[i]; if (v->csym) { error("compiler error, parameter '%s', bugzilla 2962?", v->toChars()); assert(0); } params[pi + i] = v->toSymbol(); } pi += parameters->dim; } if (reverse) { // Reverse params[] entries for (size_t i = 0; i < pi/2; i++) { Symbol *sptmp = params[i]; params[i] = params[pi - 1 - i]; params[pi - 1 - i] = sptmp; } } if (shidden) { #if 0 // shidden becomes last parameter params[pi] = shidden; #else // shidden becomes first parameter memmove(params + 1, params, pi * sizeof(params[0])); params[0] = shidden; #endif pi++; } if (sthis) { #if 0 // sthis becomes last parameter params[pi] = sthis; #else // sthis becomes first parameter memmove(params + 1, params, pi * sizeof(params[0])); params[0] = sthis; #endif pi++; } if ((global.params.isLinux || global.params.isOSX || global.params.isFreeBSD || global.params.isSolaris) && linkage != LINKd && shidden && sthis) { /* swap shidden and sthis */ Symbol *sp = params[0]; params[0] = params[1]; params[1] = sp; } for (size_t i = 0; i < pi; i++) { Symbol *sp = params[i]; sp->Sclass = SCparameter; sp->Sflags &= ~SFLspill; sp->Sfl = FLpara; symbol_add(sp); } // Determine register assignments if (pi) { size_t numintegerregs = 0, numfloatregs = 0; const unsigned char* argregs = getintegerparamsreglist(tyf, &numintegerregs); const unsigned char* floatregs = getfloatparamsreglist(tyf, &numfloatregs); // Order of assignment of pointer or integer parameters int r = 0; int xmmcnt = 0; for (size_t i = 0; i < pi; i++) { Symbol *sp = params[i]; tym_t ty = tybasic(sp->Stype->Tty); // BUG: doesn't work for structs if (r < numintegerregs) { if ((I64 || (i == 0 && (tyf == TYjfunc || tyf == TYmfunc))) && type_jparam(sp->Stype)) { sp->Sclass = SCfastpar; sp->Spreg = argregs[r]; sp->Sfl = FLauto; ++r; } } if (xmmcnt < numfloatregs) { if (tyxmmreg(ty)) { sp->Sclass = SCfastpar; sp->Spreg = floatregs[xmmcnt]; sp->Sfl = FLauto; ++xmmcnt; } } } } if (func->fbody) { block *b; Blockx bx; Statement *sbody; localgot = NULL; sbody = func->fbody; memset(&bx,0,sizeof(bx)); bx.startblock = block_calloc(); bx.curblock = bx.startblock; bx.funcsym = s; bx.scope_index = -1; bx.classdec = cd; bx.member = func; bx.module = getModule(); irs.blx = &bx; #if DMDV2 buildClosure(&irs); #endif #if 0 if (func->isSynchronized()) { if (cd) { elem *esync; if (func->isStatic()) { // monitor is in ClassInfo esync = el_ptr(cd->toSymbol()); } else { // 'this' is the monitor esync = el_var(sthis); } if (func->isStatic() || sbody->usesEH() || !(config.flags2 & CFG2seh)) { // BUG: what if frequire or fensure uses EH? sbody = new SynchronizedStatement(func->loc, esync, sbody); } else { #if TARGET_WINDOS if (config.flags2 & CFG2seh) { /* The "jmonitor" uses an optimized exception handling frame * which is a little shorter than the more general EH frame. * It isn't strictly necessary. */ s->Sfunc->Fflags3 |= Fjmonitor; } #endif el_free(esync); } } else { error("synchronized function %s must be a member of a class", func->toChars()); } } #elif TARGET_WINDOS if (func->isSynchronized() && cd && config.flags2 & CFG2seh && !func->isStatic() && !sbody->usesEH()) { /* The "jmonitor" hack uses an optimized exception handling frame * which is a little shorter than the more general EH frame. */ s->Sfunc->Fflags3 |= Fjmonitor; } #endif sbody->toIR(&irs); bx.curblock->BC = BCret; f->Fstartblock = bx.startblock; // einit = el_combine(einit,bx.init); if (isCtorDeclaration()) { assert(sthis); for (b = f->Fstartblock; b; b = b->Bnext) { if (b->BC == BCret) { b->BC = BCretexp; b->Belem = el_combine(b->Belem, el_var(sthis)); } } } } // If static constructor #if DMDV2 if (isSharedStaticCtorDeclaration()) // must come first because it derives from StaticCtorDeclaration { ssharedctors.push(s); } else #endif if (isStaticCtorDeclaration()) { sctors.push(s); } // If static destructor #if DMDV2 if (isSharedStaticDtorDeclaration()) // must come first because it derives from StaticDtorDeclaration { SharedStaticDtorDeclaration *f = isSharedStaticDtorDeclaration(); assert(f); if (f->vgate) { /* Increment destructor's vgate at construction time */ esharedctorgates.push(f); } sshareddtors.shift(s); } else #endif if (isStaticDtorDeclaration()) { StaticDtorDeclaration *f = isStaticDtorDeclaration(); assert(f); if (f->vgate) { /* Increment destructor's vgate at construction time */ ectorgates.push(f); } sdtors.shift(s); } // If unit test if (isUnitTestDeclaration()) { stests.push(s); } if (global.errors) return; writefunc(s); if (isExport()) obj_export(s, Poffset); for (size_t i = 0; i < irs.deferToObj->dim; i++) { Dsymbol *s = (*irs.deferToObj)[i]; FuncDeclaration *fd = s->isFuncDeclaration(); if (fd) { FuncDeclaration *fdp = fd->toParent2()->isFuncDeclaration(); if (fdp && fdp->semanticRun < PASSobj) { /* Bugzilla 7595 * FuncDeclaration::buildClosure() relies on nested functions * being toObjFile'd after the outer function. Otherwise, the * v->offset's for the closure variables are wrong. * So, defer fd until after fdp is done. */ fdp->deferred.push(fd); continue; } } s->toObjFile(0); } for (size_t i = 0; i < deferred.dim; i++) { FuncDeclaration *fd = deferred[i]; fd->toObjFile(0); } #if TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_SOLARIS // A hack to get a pointer to this function put in the .dtors segment if (ident && memcmp(ident->toChars(), "_STD", 4) == 0) obj_staticdtor(s); #endif #if DMDV2 if (irs.startaddress) { printf("Setting start address\n"); obj_startaddress(irs.startaddress); } #endif }
code *xmmcnvt(elem *e,regm_t *pretregs) { unsigned op=0, regs; tym_t ty; unsigned char rex = 0; bool zx = false; // zero extend uint /* There are no ops for integer <-> float/real conversions * but there are instructions for them. In order to use these * try to fuse chained conversions. Be careful not to loose * precision for real to long. */ elem *e1 = e->E1; switch (e->Eoper) { case OPd_f: if (e1->Eoper == OPs32_d) ; else if (I64 && e1->Eoper == OPs64_d) rex = REX_W; else if (I64 && e1->Eoper == OPu32_d) { rex = REX_W; zx = true; } else { regs = XMMREGS; op = CVTSD2SS; ty = TYfloat; break; } // directly use si2ss regs = ALLREGS; e1 = e1->E1; op = CVTSI2SS; ty = TYfloat; break; case OPs32_d: goto Litod; case OPs64_d: rex = REX_W; goto Litod; case OPu32_d: rex = REX_W; zx = true; goto Litod; Litod: regs = ALLREGS; op = CVTSI2SD; ty = TYdouble; break; case OPd_s32: ty = TYint; goto Ldtoi; case OPd_u32: ty = TYlong; if (I64) rex = REX_W; goto Ldtoi; case OPd_s64: ty = TYlong; rex = REX_W; goto Ldtoi; Ldtoi: regs = XMMREGS; switch (e1->Eoper) { case OPf_d: e1 = e1->E1; op = CVTTSS2SI; break; case OPld_d: if (e->Eoper == OPd_s64) return cnvt87(e,pretregs); // precision /* FALL-THROUGH */ default: op = CVTTSD2SI; break; } break; case OPf_d: regs = XMMREGS; op = CVTSS2SD; ty = TYdouble; break; } assert(op); CodeBuilder cdb; cdb.append(codelem(e1, ®s, FALSE)); unsigned reg = findreg(regs); if (reg >= XMM0) reg -= XMM0; else if (zx) { assert(I64); cdb.append(getregs(regs)); cdb.append(genregs(CNIL,STO,reg,reg)); // MOV reg,reg to zero upper 32-bit code_orflag(cdb.last(),CFvolatile); } unsigned retregs = *pretregs; if (tyxmmreg(ty)) // target is XMM { if (!(*pretregs & XMMREGS)) retregs = XMMREGS; } else // source is XMM { assert(regs & XMMREGS); if (!(retregs & ALLREGS)) retregs = ALLREGS; } unsigned rreg; cdb.append(allocreg(&retregs,&rreg,ty)); if (rreg >= XMM0) rreg -= XMM0; cdb.gen2(op, modregxrmx(3,rreg,reg)); assert(I64 || !rex); if (rex) code_orrex(cdb.last(), rex); if (*pretregs != retregs) cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); }