code *xmmneg(elem *e,regm_t *pretregs) { //printf("xmmneg()\n"); //elem_print(e); assert(*pretregs); tym_t tyml = tybasic(e->E1->Ety); int sz = tysize[tyml]; regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; /* Generate: * MOV reg,e1 * MOV rreg,signbit * XOR reg,rreg */ code *cl = codelem(e->E1,&retregs,FALSE); cl = cat(cl,getregs(retregs)); unsigned reg = findreg(retregs); regm_t rretregs = XMMREGS & ~retregs; unsigned rreg; cl = cat(cl,allocreg(&rretregs,&rreg,tyml)); targ_size_t signbit = 0x80000000; if (sz == 8) signbit = 0x8000000000000000LL; code *c = movxmmconst(rreg, sz, signbit, 0); code *cg = getregs(retregs); unsigned op = (sz == 8) ? XORPD : XORPS; // XORPD/S reg,rreg code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0)); co = cat(co,fixresult(e,retregs,pretregs)); return cat4(cl,c,cg,co); }
code *xmmneg(elem *e,regm_t *pretregs) { //printf("xmmneg()\n"); //elem_print(e); assert(*pretregs); tym_t tyml = tybasic(e->E1->Ety); int sz = _tysize[tyml]; regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; /* Generate: * MOV reg,e1 * MOV rreg,signbit * XOR reg,rreg */ CodeBuilder cdb; cdb.append(codelem(e->E1,&retregs,FALSE)); cdb.append(getregs(retregs)); unsigned reg = findreg(retregs); regm_t rretregs = XMMREGS & ~retregs; unsigned rreg; cdb.append(allocreg(&rretregs,&rreg,tyml)); targ_size_t signbit = 0x80000000; if (sz == 8) signbit = 0x8000000000000000LL; cdb.append(movxmmconst(rreg, sz, signbit, 0)); cdb.append(getregs(retregs)); unsigned op = (sz == 8) ? XORPD : XORPS; // XORPD/S reg,rreg cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0)); cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); }
code *orthxmm(elem *e, regm_t *pretregs) { elem *e1 = e->E1; elem *e2 = e->E2; regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; code *c = codelem(e1,&retregs,FALSE); // eval left leaf unsigned reg = findreg(retregs); regm_t rretregs = XMMREGS & ~retregs; code *cr = scodelem(e2, &rretregs, retregs, TRUE); // eval right leaf unsigned op = xmmoperator(e1->Ety, e->Eoper); unsigned rreg = findreg(rretregs); // float + ifloat is not actually addition if ((e->Eoper == OPadd || e->Eoper == OPmin) && ((tyreal(e1->Ety) && tyimaginary(e2->Ety)) || (tyreal(e2->Ety) && tyimaginary(e1->Ety)))) { retregs |= rretregs; c = cat(c, cr); if (e->Eoper == OPmin) { unsigned nretregs = XMMREGS & ~retregs; unsigned sreg; // hold sign bit unsigned sz = tysize[tybasic(e1->Ety)]; c = cat(c,allocreg(&nretregs,&sreg,e2->Ety)); targ_size_t signbit = 0x80000000; if (sz == 8) signbit = 0x8000000000000000LL; c = cat(c, movxmmconst(sreg, sz, signbit, 0)); c = cat(c, getregs(nretregs)); unsigned xop = (sz == 8) ? XORPD : XORPS; // XORPD/S rreg,sreg c = cat(c, gen2(CNIL,xop,modregxrmx(3,rreg-XMM0,sreg-XMM0))); } if (retregs != *pretregs) c = cat(c, fixresult(e,retregs,pretregs)); return c; } /* We should take advantage of mem addressing modes for OP XMM,MEM * but we do not at the moment. */ code *cg; if (OTrel(e->Eoper)) { retregs = mPSW; cg = NULL; code *cc = gen2(CNIL,op,modregxrmx(3,rreg-XMM0,reg-XMM0)); return cat4(c,cr,cg,cc); } else cg = getregs(retregs); code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0)); if (retregs != *pretregs) co = cat(co,fixresult(e,retregs,pretregs)); return cat4(c,cr,cg,co); }
int main (int argc, char *argv[]) { printf ("%s %s\n", __TIME__, __DATE__); char data[256] = {0}; long i = -1; long addr = 0x10001c10; struct pt_regs regs = {{0}}; pid_t traced_process = 0; if(argc != 2) { printf("Usage: %s <pid to be traced>\n", argv[0], argv[1]); exit(1); } traced_process = atoi(argv[1]); lib_init (traced_process); getregs(traced_process, ®s); long *ptr = (long *)®s; for (i = 0;i < sizeof (struct pt_regs) / sizeof (long);i ++) { printf ("0x%08x\n", *(ptr ++)); } getdata(traced_process, addr, data, 256); i = -1; while (++i < 256) { printf ("0x%02x\n", *(data+i)); } lib_destroy (traced_process); return 0; }
code *nteh_monitor_epilog(regm_t retregs) { /* * CALL _d_monitor_epilog * POP FS:__except_list */ assert(config.exe == EX_WIN32); // BUG: figure out how to implement for other EX's Symbol *s = getRtlsym(RTLSYM_MONITOR_EPILOG); //desregs = ~s->Sregsaved & ALLREGS; regm_t desregs = 0; code *cs1; code *cs2; gensaverestore(retregs& desregs,&cs1,&cs2); CodeBuilder cdb(getregs(desregs)); cdb.gencs(0xE8,0,FLfunc,s); // CALL __d_monitor_epilog CodeBuilder cdb1(cs1); CodeBuilder cdb2(cs2); cdb1.append(cdb, cdb2); code cs; cs.Iop = 0x8F; cs.Irm = modregrm(0,0,BPRM); cs.Iflags = CFfs; cs.Irex = 0; cs.IFL1 = FLextern; cs.IEVsym1 = getRtlsym(RTLSYM_EXCEPT_LIST); cs.IEVoffset1 = 0; cdb1.gen(&cs); // POP FS:__except_list return cdb1.finish(); }
/*! * @brief Allocates memory on a debugged process. * @param pid Process identifier of the debugged process. * @param regs Pointer to the \c user_regs_struct of the debugged process. * @param addr Address to allocate. * @param length Size to allocate. * @returns Indication of success or failure. * @retval 0 indicates success. */ LONG allocate(LONG pid, struct user_regs_struct *regs, unsigned long addr, size_t length) { unsigned long *alloc_code = (unsigned long *)mmap_stub; unsigned long *addr_ptr = (unsigned long *)(mmap_stub + MMAP_ADDR_POS); size_t *length_ptr = (size_t *)(mmap_stub + MMAP_LENGTH_POS); ULONG code_size = MMAP_STUB_SIZE; LONG result = 0; if (regs == NULL) { return ERROR_INVALID_PARAMETER; } // Fix mmap stub with allocation data addr_ptr[0] = addr; length_ptr[0] = length; result = execute_stub(pid, regs->eip, alloc_code, code_size); if (result != 0) return result; if (wait_trap(pid) == FALSE) return ECANCELED; // We don't know what failed in the remote process result = getregs(pid, regs); if (result != 0) return result; return 0; }
/*! * @brief Executes an stub of code on a debugged process. * @param pid Process identifier where to execute code. * @param addr Process addr to store the code. * @param stub Pointer to code stub to execute. * @param stub_size ptrace friendly length of the stub to execute. * @returns Indication of success or failure. * @retval 0 indicates success. */ LONG execute_stub(LONG pid, unsigned long addr, unsigned long *stub, ULONG stub_size) { LONG i = 0; LONG result = 0; struct user_regs_struct stub_regs; if (stub_size == 0 || stub == NULL) return ERROR_INVALID_PARAMETER; result = write_memory(pid, addr, stub, stub_size); if (result != 0) return result; // Jump into the nops stub, makes code modification // more reliable. result = getregs(pid, &stub_regs); if (result != 0) return result; dprintf("[EXECUTE_STUB] Original EIP 0x%x", stub_regs.eip); stub_regs.eip = stub_regs.eip + 8; result = setregs(pid, &stub_regs); if (result != 0) return result; dprintf("[EXECUTE_STUB] Redirecting to 0x%x", stub_regs.eip); result = cont(pid); if (result != 0) return result; return 0; }
code *nteh_monitor_prolog(Symbol *shandle) { /* * PUSH handle * PUSH offset _d_monitor_handler * PUSH FS:__except_list * MOV FS:__except_list,ESP * CALL _d_monitor_prolog */ CodeBuilder cdb1; CodeBuilder cdb; assert(config.exe == EX_WIN32); // BUG: figure out how to implement for other EX's if (shandle->Sclass == SCfastpar) { assert(shandle->Spreg != DX); assert(shandle->Spreg2 == NOREG); cdb.gen1(0x50 + shandle->Spreg); // PUSH shandle } else { // PUSH shandle useregs(mCX); cdb.genc1(0x8B,modregrm(2,CX,4),FLconst,4 * (1 + needframe) + shandle->Soffset + localsize); cdb.last()->Isib = modregrm(0,4,SP); cdb.gen1(0x50 + CX); // PUSH ECX } Symbol *smh = getRtlsym(RTLSYM_MONITOR_HANDLER); cdb.gencs(0x68,0,FLextern,smh); // PUSH offset _d_monitor_handler makeitextern(smh); code cs; useregs(mDX); cs.Iop = 0x8B; cs.Irm = modregrm(0,DX,BPRM); cs.Iflags = CFfs; cs.Irex = 0; cs.IFL1 = FLextern; cs.IEVsym1 = getRtlsym(RTLSYM_EXCEPT_LIST); cs.IEVoffset1 = 0; cdb1.gen(&cs); // MOV EDX,FS:__except_list cdb.gen1(0x50 + DX); // PUSH EDX Symbol *s = getRtlsym(RTLSYM_MONITOR_PROLOG); regm_t desregs = ~s->Sregsaved & ALLREGS; cdb.append(getregs(desregs)); cdb.gencs(0xE8,0,FLfunc,s); // CALL _d_monitor_prolog cs.Iop = 0x89; NEWREG(cs.Irm,SP); cdb.gen(&cs); // MOV FS:__except_list,ESP cdb1.append(cdb); return cdb1.finish(); }
code *nteh_unwind(regm_t retregs,unsigned index) { code cs; regm_t desregs; int reg; int local_unwind; // Shouldn't this always be CX? #if SCPP reg = AX; #else reg = CX; #endif #if MARS local_unwind = RTLSYM_D_LOCAL_UNWIND2; #else local_unwind = RTLSYM_LOCAL_UNWIND2; #endif desregs = (~getRtlsym(local_unwind)->Sregsaved & (ALLREGS)) | mask[reg]; code *cs1; code *cs2; gensaverestore(retregs & desregs,&cs1,&cs2); CodeBuilder cdb(getregs(desregs)); cs.Iop = 0x8D; cs.Irm = modregrm(2,reg,BPRM); cs.Iflags = 0; cs.Irex = 0; cs.IFL1 = FLconst; // EBP offset of __context.prev cs.IEV1.Vint = nteh_EBPoffset_prev(); cdb.gen(&cs); // LEA ECX,contextsym cdb.genc2(0x68,0,index); // PUSH index cdb.gen1(0x50 + reg); // PUSH ECX #if MARS //cdb.gencs(0xB8+AX,0,FLextern,nteh_scopetable()); // MOV EAX,&scope_table cdb.gencs(0x68,0,FLextern,nteh_scopetable()); // PUSH &scope_table cdb.gencs(0xE8,0,FLfunc,getRtlsym(local_unwind)); // CALL __d_local_unwind2() cdb.append(cod3_stackadj(NULL, -12)); #else cdb.gencs(0xE8,0,FLfunc,getRtlsym(local_unwind)); // CALL __local_unwind2() cdb.append(cod3_stackadj(NULL, -8)); #endif CodeBuilder cdb1(cs1); CodeBuilder cdb2(cs2); cdb1.append(cdb, cdb2); return cdb1.finish(); }
static int tile_gpr_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { struct pt_regs regs; getregs(target, ®s); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, ®s, 0, sizeof(regs)); }
code *nteh_unwind(regm_t retregs,unsigned index) { code *c; code cs; code *cs1; code *cs2; regm_t desregs; int reg; int local_unwind; // Shouldn't this always be CX? #if SCPP reg = AX; #else reg = CX; #endif #if MARS local_unwind = RTLSYM_D_LOCAL_UNWIND2; #else local_unwind = RTLSYM_LOCAL_UNWIND2; #endif desregs = (~rtlsym[local_unwind]->Sregsaved & (ALLREGS)) | mask[reg]; gensaverestore(retregs & desregs,&cs1,&cs2); c = getregs(desregs); cs.Iop = 0x8D; cs.Irm = modregrm(2,reg,BPRM); cs.Iflags = 0; cs.Irex = 0; cs.IFL1 = FLconst; // EBP offset of __context.prev cs.IEV1.Vint = nteh_EBPoffset_prev(); c = gen(c,&cs); // LEA ECX,contextsym genc2(c,0x68,0,index); // PUSH index gen1(c,0x50 + reg); // PUSH ECX #if MARS //gencs(c,0xB8+AX,0,FLextern,nteh_scopetable()); // MOV EAX,&scope_table gencs(c,0x68,0,FLextern,nteh_scopetable()); // PUSH &scope_table gencs(c,0xE8,0,FLfunc,rtlsym[local_unwind]); // CALL __d_local_unwind2() genc2(c,0x81,modregrm(3,0,SP),12); // ADD ESP,12 #else gencs(c,0xE8,0,FLfunc,rtlsym[local_unwind]); // CALL __local_unwind2() genc2(c,0x81,modregrm(3,0,SP),8); // ADD ESP,8 #endif c = cat4(cs1,c,cs2,NULL); return c; }
/*! * @brief Saves the process state. * @param pid Process identifier to save. * @param s Pointer to \c state to store code and registers. * @returns Indication of success or failure. * @retval 0 Indicates success. */ LONG save_state(LONG pid, state *s) { LONG result = 0; LONG i = 0; if (s == NULL) return ERROR_INVALID_PARAMETER; result = getregs(pid, &(s->regs)); if (result != 0) return result; result = read_memory(pid, s->regs.eip, s->memory, MMAP_STUB_SIZE); if (result != 0) return result; return 0; }
code *linux_unwind(regm_t retregs,unsigned index) { int i; regm_t desregs; int reg; int local_unwind; // Shouldn't this always be CX? #if SCPP reg = AX; #else reg = CX; #endif #if MARS local_unwind = RTLSYM_D_LOCAL_UNWIND2; #else local_unwind = RTLSYM_LOCAL_UNWIND2; #endif desregs = (~getRtlsym(local_unwind)->Sregsaved & (ALLREGS)) | mask[reg]; code *cs1; code *cs2; gensaverestore(retregs & desregs,&cs1,&cs2); CodeBuilder cdb(getregs(desregs)); cdb.genc2(0x68,0,index); // PUSH index #if MARS // cdb.gencs(0x68,0,FLextern,nteh_scopetable()); // PUSH &scope_table cdb.gencs(0xE8,0,FLfunc,getRtlsym(local_unwind)); // CALL __d_local_unwind2() cdb.append(cod3_stackadj(NULL, -4)); #else cdb.gencs(0xE8,0,FLfunc,getRtlsym(local_unwind)); // CALL __local_unwind2() cdb.append(cod3_stackadj(NULL, -8)); #endif CodeBuilder cdb1(cs1); CodeBuilder cdb2(cs2); cdb1.append(cdb, cdb2); return cdb1.finish(); }
void kmain(ol_mmap_register_t mmr) { textinit(); clearscreen(); println("The openLoader kernel is executing. \n"); print("Current stack pointer: "); ol_registers_t regs = getregs(); printnum(regs->esp, 16, FALSE, FALSE); putc(0xa); char status = inb(0x60); if((status & 2) == 2) { println("The A20 gate is open."); putc(0xa); } pic_init(); setIDT(); outb(OL_KBC_COMMAND, OL_KB_INIT); // enable the keyboard // display mmap init_mmap(mmr); println("Multiboot memory map:\n"); display_mmap(mmr); #if 0 uint8_t active = ide_init(bootdrive); ide_read(0x100, 1<<20, &bootdrive[active], 60); uint8_t eax = ata_identify(); printnum(active, 16, FALSE, FALSE); #endif putc(0xa); println("Waiting for service interrupts.."); while(1) halt(); println("End of program reached!"); endprogram(); }
code *linux_unwind(regm_t retregs,unsigned index) { code *c; code *cs1; code *cs2; int i; regm_t desregs; int reg; int local_unwind; // Shouldn't this always be CX? #if SCPP reg = AX; #else reg = CX; #endif #if MARS local_unwind = RTLSYM_D_LOCAL_UNWIND2; #else local_unwind = RTLSYM_LOCAL_UNWIND2; #endif desregs = (~rtlsym[local_unwind]->Sregsaved & (ALLREGS)) | mask[reg]; gensaverestore(retregs & desregs,&cs1,&cs2); c = getregs(desregs); c = genc2(c,0x68,0,index); // PUSH index #if MARS // gencs(c,0x68,0,FLextern,nteh_scopetable()); // PUSH &scope_table gencs(c,0xE8,0,FLfunc,rtlsym[local_unwind]); // CALL __d_local_unwind2() genc2(c,0x81,modregrm(3,0,SP),4); // ADD ESP,12 #else gencs(c,0xE8,0,FLfunc,rtlsym[local_unwind]); // CALL __local_unwind2() genc2(c,0x81,modregrm(3,0,SP),8); // ADD ESP,8 #endif c = cat4(cs1,c,cs2,NULL); return c; }
code *nteh_filter(block *b) { code cs; CodeBuilder cdb; assert(b->BC == BC_filter); if (b->Bflags & BFLehcode) // if referenced __ecode { /* Generate: mov EAX,__context[EBP].info mov EAX,[EAX] mov EAX,[EAX] mov __ecode[EBP],EAX */ cdb.append(getregs(mAX)); cs.Iop = 0x8B; cs.Irm = modregrm(2,AX,BPRM); cs.Iflags = 0; cs.Irex = 0; cs.IFL1 = FLconst; // EBP offset of __context.info cs.IEV1.Vint = nteh_EBPoffset_info(); cdb.gen(&cs); // MOV EAX,__context[EBP].info cs.Irm = modregrm(0,AX,0); cdb.gen(&cs); // MOV EAX,[EAX] cdb.gen(&cs); // MOV EAX,[EAX] cs.Iop = 0x89; cs.Irm = modregrm(2,AX,BPRM); cs.IFL1 = FLauto; cs.IEVsym1 = nteh_ecodesym(); cs.IEVoffset1 = 0; cdb.gen(&cs); // MOV __ecode[EBP],EAX } return cdb.finish(); }
code *nteh_monitor_epilog(regm_t retregs) { /* * CALL _d_monitor_epilog * POP FS:__except_list */ code cs; code *c; code *cs1; code *cs2; code *cpop; regm_t desregs; Symbol *s; assert(config.flags2 & CFG2seh); // BUG: figure out how to implement for other EX's s = rtlsym[RTLSYM_MONITOR_EPILOG]; //desregs = ~s->Sregsaved & ALLREGS; desregs = 0; gensaverestore(retregs & desregs,&cs1,&cs2); c = getregs(desregs); c = gencs(c,0xE8,0,FLfunc,s); // CALL __d_monitor_epilog cs.Iop = 0x8F; cs.Irm = modregrm(0,0,BPRM); cs.Iflags = CFfs; cs.Irex = 0; cs.IFL1 = FLextern; cs.IEVsym1 = rtlsym[RTLSYM_EXCEPT_LIST]; cs.IEVoffset1 = 0; cpop = gen(NULL,&cs); // POP FS:__except_list c = cat4(cs1,c,cs2,cpop); return c; }
code *nteh_monitor_prolog(Symbol *shandle) { /* * PUSH handle * PUSH offset _d_monitor_handler * PUSH FS:__except_list * MOV FS:__except_list,ESP * CALL _d_monitor_prolog */ code *c1 = NULL; code *c; code cs; Symbol *s; regm_t desregs; assert(config.flags2 & CFG2seh); // BUG: figure out how to implement for other EX's if (shandle->Sclass == SCfastpar) { assert(shandle->Spreg != DX); c = gen1(NULL,0x50 + shandle->Spreg); // PUSH shandle } else { // PUSH shandle #if 0 c = genc1(NULL,0xFF,modregrm(2,6,4),FLconst,4 * (1 + needframe) + shandle->Soffset + localsize); c->Isib = modregrm(0,4,SP); #else useregs(mCX); c = genc1(NULL,0x8B,modregrm(2,CX,4),FLconst,4 * (1 + needframe) + shandle->Soffset + localsize); c->Isib = modregrm(0,4,SP); gen1(c,0x50 + CX); // PUSH ECX #endif } s = rtlsym[RTLSYM_MONITOR_HANDLER]; c = gencs(c,0x68,0,FLextern,s); // PUSH offset _d_monitor_handler makeitextern(s); #if 0 cs.Iop = 0xFF; cs.Irm = modregrm(0,6,BPRM); cs.Iflags = CFfs; cs.Irex = 0; cs.IFL1 = FLextern; cs.IEVsym1 = rtlsym[RTLSYM_EXCEPT_LIST]; cs.IEVoffset1 = 0; gen(c,&cs); // PUSH FS:__except_list #else useregs(mDX); cs.Iop = 0x8B; cs.Irm = modregrm(0,DX,BPRM); cs.Iflags = CFfs; cs.Irex = 0; cs.IFL1 = FLextern; cs.IEVsym1 = rtlsym[RTLSYM_EXCEPT_LIST]; cs.IEVoffset1 = 0; c1 = gen(c1,&cs); // MOV EDX,FS:__except_list gen1(c,0x50 + DX); // PUSH EDX #endif s = rtlsym[RTLSYM_MONITOR_PROLOG]; desregs = ~s->Sregsaved & ALLREGS; c = cat(c,getregs(desregs)); c = gencs(c,0xE8,0,FLfunc,s); // CALL _d_monitor_prolog cs.Iop = 0x89; NEWREG(cs.Irm,SP); gen(c,&cs); // MOV FS:__except_list,ESP return cat(c1,c); }
code *xmmopass(elem *e,regm_t *pretregs) { elem *e1 = e->E1; elem *e2 = e->E2; tym_t ty1 = tybasic(e1->Ety); unsigned sz1 = tysize[ty1]; regm_t rretregs = XMMREGS & ~*pretregs; if (!rretregs) rretregs = XMMREGS; code *cr = codelem(e2,&rretregs,FALSE); // eval right leaf unsigned rreg = findreg(rretregs); code cs; code *cl,*cg; regm_t retregs; unsigned reg; bool regvar = FALSE; if (config.flags4 & CFG4optimized) { // Be careful of cases like (x = x+x+x). We cannot evaluate in // x if x is in a register. unsigned varreg; regm_t varregm; if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable doinreg(e1->EV.sp.Vsym,e2) // and we can compute directly into it ) { regvar = TRUE; retregs = varregm; reg = varreg; // evaluate directly in target register cl = NULL; cg = getregs(retregs); // destroy these regs } } if (!regvar) { cl = getlvalue(&cs,e1,rretregs); // get EA retregs = *pretregs & XMMREGS & ~rretregs; if (!retregs) retregs = XMMREGS & ~rretregs; cg = allocreg(&retregs,®,ty1); cs.Iop = xmmload(ty1); // MOVSD xmm,xmm_m64 code_newreg(&cs,reg - XMM0); cg = gen(cg,&cs); } unsigned op = xmmoperator(e1->Ety, e->Eoper); code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0)); if (!regvar) { cs.Iop = xmmstore(ty1); // reverse operand order of MOVS[SD] gen(co,&cs); } if (e1->Ecount || // if lvalue is a CSE or regvar) // rvalue can't be a CSE { cl = cat(cl,getregs_imm(retregs)); // necessary if both lvalue and // rvalue are CSEs (since a reg // can hold only one e at a time) cssave(e1,retregs,EOP(e1)); // if lvalue is a CSE } co = cat(co,fixresult(e,retregs,pretregs)); freenode(e1); return cat4(cr,cl,cg,co); }
/*************** * Generate code for OPvecfill (broadcast). * OPvecfill takes the single value in e1 and * fills the vector type with it. */ code *cdvecfill(elem *e, regm_t *pretregs) { //printf("cdvecfill(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; CodeBuilder cdb; code *c; code cs; elem *e1 = e->E1; #if 0 if ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar) { cr = getlvalue(&cs, e1, RMload | retregs); // get addressing mode } else { unsigned rretregs = XMMREGS & ~retregs; cr = scodelem(op2, &rretregs, retregs, TRUE); unsigned rreg = findreg(rretregs) - XMM0; cs.Irm = modregrm(3,0,rreg & 7); cs.Iflags = 0; cs.Irex = 0; if (rreg & 8) cs.Irex |= REX_B; } #endif unsigned reg; unsigned rreg; unsigned varreg; regm_t varregm; tym_t ty = tybasic(e->Ety); switch (ty) { case TYfloat4: case TYfloat8: if (config.avx && ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) || tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg) ) { Lint: if (e1->Eoper == OPvar) e1->EV.sp.Vsym->Sflags &= ~GTregcand; // VBROADCASTSS XMM,MEM cdb.append(getlvalue(&cs, e1, 0)); // get addressing mode assert((cs.Irm & 0xC0) != 0xC0); // AVX1 doesn't have register source operands cdb.append(allocreg(&retregs,®,ty)); cs.Iop = VBROADCASTSS; cs.Irex &= ~REX_W; code_newreg(&cs,reg - XMM0); checkSetVex(&cs,ty); cdb.gen(&cs); } else { // SHUFPS XMM0,XMM0,0 0F C6 /r ib c = codelem(e1,&retregs,FALSE); // eval left leaf cdb.append(c); reg = findreg(retregs) - XMM0; cdb.append(getregs(retregs)); cs.Iop = SHUFPS; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; if (config.avx >= 2 || tysize(ty) == 32) { // VBROADCASTSS XMM,XMM cs.Iop = VBROADCASTSS; checkSetVex(&cs, ty); } cdb.gen(&cs); } break; case TYdouble2: case TYdouble4: if (config.avx && ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) || tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg) ) { if (e1->Eoper == OPvar) e1->EV.sp.Vsym->Sflags &= ~GTregcand; // VBROADCASTSD XMM,MEM cdb.append(getlvalue(&cs, e1, 0)); // get addressing mode assert((cs.Irm & 0xC0) != 0xC0); // AVX1 doesn't have register source operands cdb.append(allocreg(&retregs,®,ty)); cs.Iop = VBROADCASTSD; cs.Irex &= ~REX_W; code_newreg(&cs,reg - XMM0); checkSetVex(&cs,ty); cdb.gen(&cs); } else { // UNPCKLPD XMM0,XMM0 66 0F 14 /r c = codelem(e1,&retregs,FALSE); // eval left leaf cdb.append(c); reg = findreg(retregs) - XMM0; cdb.append(getregs(retregs)); cs.Iop = UNPCKLPD; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; if (config.avx >= 2 || tysize(ty) == 32) { // VBROADCASTSD XMM,XMM cs.Iop = VBROADCASTSD; checkSetVex(&cs, ty); } cdb.gen(&cs); } break; case TYschar16: case TYuchar16: case TYschar32: case TYuchar32: { /* MOVD XMM0,r * PUNPCKLBW XMM0,XMM0 * PUNPCKLWD XMM0,XMM0 * PSHUFD XMM0,XMM0,0 */ regm_t regm = ALLREGS; c = codelem(e1,®m,FALSE); // eval left leaf cdb.append(c); unsigned r = findreg(regm); c = allocreg(&retregs,®, e->Ety); cdb.append(c); reg -= XMM0; cdb.gen2(LODD,modregxrmx(3,reg,r)); // MOVD reg,r checkSetVex(cdb.last(),TYschar16); cs.Iop = PUNPCKLBW; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cdb.gen(&cs); cs.Iop = PUNPCKLWD; cdb.gen(&cs); cs.Iop = PSHUFD; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; checkSetVex(&cs,TYschar16); cdb.gen(&cs); if (tysize(ty) == 32) { // VINSERTF128 YMM0,YMM0,XMM0,1 cs.Iop = VINSERTF128; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 1; checkSetVex(&cs,ty); cdb.gen(&cs); } break; } case TYshort8: case TYushort8: case TYshort16: case TYushort16: { regm_t regm = ALLREGS; c = codelem(e1,®m,FALSE); // eval left leaf cdb.append(c); unsigned r = findreg(regm); if (config.avx || tysize(ty) == 32) { /* * VPXOR XMM0,XMM0,XMM0 * VPINSRW XMM0,XMM0,r,0 * VPINSRW XMM0,XMM0,r,1 * VPINSRW XMM0,XMM0,r,2 * VPINSRW XMM0,XMM0,r,3 */ cdb.append(allocreg(&retregs,®, ty)); cdb.gen2(PXOR,modregxrmx(3,reg-XMM0,reg-XMM0)); checkSetVex(cdb.last(), TYshort8); for (int i = 0; i < tysize(ty) / 4; ++i) { cdb.genc2(PINSRW,modregxrmx(3,reg-XMM0,r),i); checkSetVex(cdb.last(), TYshort8); } if (tysize(ty) == 32) { // VINSERTF128 YMM0,YMM0,XMM0,1 cs.Iop = VINSERTF128; cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 1; checkSetVex(&cs,ty); cdb.gen(&cs); } else { // VPSHUFD XMM0,XMM0,0 cs.Iop = PSHUFD; cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; checkSetVex(&cs,ty); cdb.gen(&cs); } } else { /* MOVD XMM0,r * PUNPCKLWD XMM0,XMM0 * PSHUFD XMM0,XMM0,0 */ c = allocreg(&retregs,®, e->Ety); cdb.append(c); reg -= XMM0; cdb.gen2(LODD,modregxrmx(3,reg,r)); // MOVD reg,r checkSetVex(cdb.last(),e->Ety); cs.Iop = PUNPCKLWD; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cdb.gen(&cs); cs.Iop = PSHUFD; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; cdb.gen(&cs); } break; } case TYlong8: case TYulong8: case TYlong4: case TYulong4: { if (config.avx && ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) || tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg)) { goto Lint; } /* MOVD XMM1,r * PSHUFD XMM0,XMM1,0 */ regm_t regm = ALLREGS; c = codelem(e1,®m,FALSE); // eval left leaf cdb.append(c); unsigned r = findreg(regm); c = allocreg(&retregs,®, e->Ety); cdb.append(c); reg -= XMM0; cdb.gen2(LODD,modregxrmx(3,reg,r)); // MOVD reg,r cs.Iop = PSHUFD; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; if (config.avx >= 2 || tysize(ty) == 32) { // VBROADCASTSS XMM,XMM cs.Iop = VBROADCASTSS; checkSetVex(&cs, ty); } cdb.gen(&cs); break; } case TYllong2: case TYullong2: case TYllong4: case TYullong4: if (config.avx || tysize(ty) >= 32) { if (e1->Eoper == OPvar) e1->EV.sp.Vsym->Sflags &= ~GTregcand; // VMOVDDUP XMM,MEM cdb.append(getlvalue(&cs, e1, 0)); // get addressing mode if ((cs.Irm & 0xC0) == 0xC0) { unsigned sreg = ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0)); regm_t sregm = XMMREGS; cdb.append(fixresult(e1, mask[sreg], &sregm)); unsigned rmreg = findreg(sregm); cs.Irm = (cs.Irm & ~7) | ((rmreg - XMM0) & 7); if ((rmreg - XMM0) & 8) cs.Irex |= REX_B; else cs.Irex &= ~REX_B; } cdb.append(allocreg(&retregs,®,ty)); if (config.avx >= 2 || tysize(ty) >= 32) { cs.Iop = VBROADCASTSD; cs.Irex &= ~REX_W; } else cs.Iop = MOVDDUP; code_newreg(&cs,reg - XMM0); checkSetVex(&cs,ty); cdb.gen(&cs); } else { /* MOVQ XMM0,mem128 * PUNPCKLQDQ XMM0,XMM0 */ c = codelem(e1,&retregs,FALSE); // eval left leaf cdb.append(c); unsigned reg = findreg(retregs); reg -= XMM0; //cdb.gen2(LODD,modregxrmx(3,reg,r)); // MOVQ reg,r cs.Iop = PUNPCKLQDQ; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cdb.gen(&cs); } break; default: assert(0); } c = fixresult(e,retregs,pretregs); cdb.append(c); return cdb.finish(); }
code *cdvector(elem *e, regm_t *pretregs) { /* e should look like one of: * vector * | * param * / \ * param op2 * / \ * op op1 */ if (!config.fpxmmregs) { printf("SIMD operations not supported on this platform\n"); exit(1); } unsigned n = el_nparams(e->E1); elem **params = (elem **)malloc(n * sizeof(elem *)); assert(params); elem **tmp = params; el_paramArray(&tmp, e->E1); #if 0 printf("cdvector()\n"); for (int i = 0; i < n; i++) { printf("[%d]: ", i); elem_print(params[i]); } #endif if (*pretregs == 0) { /* Evaluate for side effects only */ CodeBuilder cdb; for (int i = 0; i < n; i++) { cdb.append(codelem(params[i], pretregs, FALSE)); *pretregs = 0; // in case they got set } return cdb.finish(); } assert(n >= 2 && n <= 4); elem *eop = params[0]; elem *op1 = params[1]; elem *op2 = NULL; tym_t ty2 = 0; if (n >= 3) { op2 = params[2]; ty2 = tybasic(op2->Ety); } unsigned op = el_tolong(eop); #ifdef DEBUG assert(!isXMMstore(op)); #endif tym_t ty1 = tybasic(op1->Ety); unsigned sz1 = _tysize[ty1]; // assert(sz1 == 16); // float or double regm_t retregs; CodeBuilder cdb; if (n == 3 && ty2 == TYuchar && op2->Eoper == OPconst) { // Handle: op xmm,imm8 retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; cdb.append(codelem(op1,&retregs,FALSE)); // eval left leaf unsigned reg = findreg(retregs); int r; switch (op) { case PSLLD: r = 6; op = 0x660F72; break; case PSLLQ: r = 6; op = 0x660F73; break; case PSLLW: r = 6; op = 0x660F71; break; case PSRAD: r = 4; op = 0x660F72; break; case PSRAW: r = 4; op = 0x660F71; break; case PSRLD: r = 2; op = 0x660F72; break; case PSRLQ: r = 2; op = 0x660F73; break; case PSRLW: r = 2; op = 0x660F71; break; case PSRLDQ: r = 3; op = 0x660F73; break; case PSLLDQ: r = 7; op = 0x660F73; break; default: printf("op = x%x\n", op); assert(0); break; } cdb.append(getregs(retregs)); cdb.genc2(op,modregrmx(3,r,reg-XMM0), el_tolong(op2)); } else if (n == 2) { /* Handle: op xmm,mem * where xmm is written only, not read */ code cs; if ((op1->Eoper == OPind && !op1->Ecount) || op1->Eoper == OPvar) { cdb.append(getlvalue(&cs, op1, RMload)); // get addressing mode } else { regm_t rretregs = XMMREGS; cdb.append(codelem(op1, &rretregs, FALSE)); unsigned rreg = findreg(rretregs) - XMM0; cs.Irm = modregrm(3,0,rreg & 7); cs.Iflags = 0; cs.Irex = 0; if (rreg & 8) cs.Irex |= REX_B; } retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; unsigned reg; cdb.append(allocreg(&retregs, ®, e->Ety)); code_newreg(&cs, reg - XMM0); cs.Iop = op; cdb.gen(&cs); } else if (n == 3 || n == 4) { /* Handle: * op xmm,mem // n = 3 * op xmm,mem,imm8 // n = 4 * Both xmm and mem are operands, evaluate xmm first. */ code cs; retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; cdb.append(codelem(op1,&retregs,FALSE)); // eval left leaf unsigned reg = findreg(retregs); if ((op2->Eoper == OPind && !op2->Ecount) || op2->Eoper == OPvar) { cdb.append(getlvalue(&cs, op2, RMload | retregs)); // get addressing mode } else { unsigned rretregs = XMMREGS & ~retregs; cdb.append(scodelem(op2, &rretregs, retregs, TRUE)); unsigned rreg = findreg(rretregs) - XMM0; cs.Irm = modregrm(3,0,rreg & 7); cs.Iflags = 0; cs.Irex = 0; if (rreg & 8) cs.Irex |= REX_B; } cdb.append(getregs(retregs)); if (n == 4) { switch (op) { case CMPPD: case CMPSS: case CMPSD: case CMPPS: case PSHUFD: case PSHUFHW: case PSHUFLW: case BLENDPD: case BLENDPS: case DPPD: case DPPS: case MPSADBW: case PBLENDW: case ROUNDPD: case ROUNDPS: case ROUNDSD: case ROUNDSS: case SHUFPD: case SHUFPS: break; default: printf("op = x%x\n", op); assert(0); break; } elem *imm8 = params[3]; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = el_tolong(imm8); } code_newreg(&cs, reg - XMM0); cs.Iop = op; cdb.gen(&cs); } else assert(0); cdb.append(fixresult(e,retregs,pretregs)); free(params); freenode(e); return cdb.finish(); }
code *cdsetjmp(elem *e,regm_t *pretregs) { code cs; code *c; regm_t retregs; unsigned stackpushsave; unsigned flag; c = NULL; stackpushsave = stackpush; #if SCPP if (CPP && (funcsym_p->Sfunc->Fflags3 & Fcppeh || usednteh & NTEHcpp)) { /* If in C++ try block If the frame that is calling setjmp has a try,catch block then the call to setjmp3 is as follows: __setjmp3(environment,3,__cpp_longjmp_unwind,trylevel,funcdata); __cpp_longjmp_unwind is a routine in the RTL. This is a stdcall routine that will deal with unwinding for CPP Frames. trylevel is the value that gets incremented at each catch, constructor invocation. funcdata is the same value that you put into EAX prior to cppframehandler getting called. */ symbol *s; s = except_gensym(); if (!s) goto L1; c = gencs(c,0x68,0,FLextern,s); // PUSH &scope_table stackpush += 4; genadjesp(c,4); c = genc1(c,0xFF,modregrm(1,6,BP),FLconst,(targ_uns)-4); // PUSH trylevel stackpush += 4; genadjesp(c,4); cs.Iop = 0x68; cs.Iflags = CFoff; cs.Irex = 0; cs.IFL2 = FLextern; cs.IEVsym2 = rtlsym[RTLSYM_CPP_LONGJMP]; cs.IEVoffset2 = 0; c = gen(c,&cs); // PUSH &_cpp_longjmp_unwind stackpush += 4; genadjesp(c,4); flag = 3; } else #endif if (funcsym_p->Sfunc->Fflags3 & Fnteh) { /* If in NT SEH try block If the frame that is calling setjmp has a try, except block then the call to setjmp3 is as follows: __setjmp3(environment,2,__seh_longjmp_unwind,trylevel); __seth_longjmp_unwind is supplied by the RTL and is a stdcall function. It is the name that MSOFT uses, we should probably use the same one. trylevel is the value that you increment at each try and decrement at the close of the try. This corresponds to the index field of the ehrec. */ int sindex_off; sindex_off = 20; // offset of __context.sindex cs.Iop = 0xFF; cs.Irm = modregrm(2,6,BPRM); cs.Iflags = 0; cs.Irex = 0; cs.IFL1 = FLbprel; cs.IEVsym1 = nteh_contextsym(); cs.IEVoffset1 = sindex_off; c = gen(c,&cs); // PUSH scope_index stackpush += 4; genadjesp(c,4); cs.Iop = 0x68; cs.Iflags = CFoff; cs.Irex = 0; cs.IFL2 = FLextern; cs.IEVsym2 = rtlsym[RTLSYM_LONGJMP]; cs.IEVoffset2 = 0; c = gen(c,&cs); // PUSH &_seh_longjmp_unwind stackpush += 4; genadjesp(c,4); flag = 2; } else { /* If the frame calling setjmp has neither a try..except, nor a try..catch, then call setjmp3 as follows: _setjmp3(environment,0) */ L1: flag = 0; } cs.Iop = 0x68; cs.Iflags = 0; cs.Irex = 0; cs.IFL2 = FLconst; cs.IEV2.Vint = flag; c = gen(c,&cs); // PUSH flag stackpush += 4; genadjesp(c,4); c = cat(c,params(e->E1,REGSIZE)); c = cat(c,getregs(~rtlsym[RTLSYM_SETJMP3]->Sregsaved & (ALLREGS | mES))); gencs(c,0xE8,0,FLfunc,rtlsym[RTLSYM_SETJMP3]); // CALL __setjmp3 c = genc2(c,0x81,modregrm(3,0,SP),stackpush - stackpushsave); // ADD ESP,8 genadjesp(c,-(stackpush - stackpushsave)); stackpush = stackpushsave; retregs = regmask(e->Ety, TYnfunc); return cat(c,fixresult(e,retregs,pretregs)); }
code *xmmcnvt(elem *e,regm_t *pretregs) { unsigned op=0, regs; tym_t ty; unsigned char rex = 0; bool zx = false; // zero extend uint /* There are no ops for integer <-> float/real conversions * but there are instructions for them. In order to use these * try to fuse chained conversions. Be careful not to loose * precision for real to long. */ elem *e1 = e->E1; switch (e->Eoper) { case OPd_f: if (e1->Eoper == OPs32_d) ; else if (I64 && e1->Eoper == OPs64_d) rex = REX_W; else if (I64 && e1->Eoper == OPu32_d) { rex = REX_W; zx = true; } else { regs = XMMREGS; op = CVTSD2SS; ty = TYfloat; break; } // directly use si2ss regs = ALLREGS; e1 = e1->E1; op = CVTSI2SS; ty = TYfloat; break; case OPs32_d: goto Litod; case OPs64_d: rex = REX_W; goto Litod; case OPu32_d: rex = REX_W; zx = true; goto Litod; Litod: regs = ALLREGS; op = CVTSI2SD; ty = TYdouble; break; case OPd_s32: ty = TYint; goto Ldtoi; case OPd_u32: ty = TYlong; if (I64) rex = REX_W; goto Ldtoi; case OPd_s64: ty = TYlong; rex = REX_W; goto Ldtoi; Ldtoi: regs = XMMREGS; switch (e1->Eoper) { case OPf_d: e1 = e1->E1; op = CVTTSS2SI; break; case OPld_d: if (e->Eoper == OPd_s64) return cnvt87(e,pretregs); // precision /* FALL-THROUGH */ default: op = CVTTSD2SI; break; } break; case OPf_d: regs = XMMREGS; op = CVTSS2SD; ty = TYdouble; break; } assert(op); CodeBuilder cdb; cdb.append(codelem(e1, ®s, FALSE)); unsigned reg = findreg(regs); if (reg >= XMM0) reg -= XMM0; else if (zx) { assert(I64); cdb.append(getregs(regs)); cdb.append(genregs(CNIL,STO,reg,reg)); // MOV reg,reg to zero upper 32-bit code_orflag(cdb.last(),CFvolatile); } unsigned retregs = *pretregs; if (tyxmmreg(ty)) // target is XMM { if (!(*pretregs & XMMREGS)) retregs = XMMREGS; } else // source is XMM { assert(regs & XMMREGS); if (!(retregs & ALLREGS)) retregs = ALLREGS; } unsigned rreg; cdb.append(allocreg(&retregs,&rreg,ty)); if (rreg >= XMM0) rreg -= XMM0; cdb.gen2(op, modregxrmx(3,rreg,reg)); assert(I64 || !rex); if (rex) code_orrex(cdb.last(), rex); if (*pretregs != retregs) cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); }
long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; long ret = -EIO; char *childreg; struct pt_regs copyregs; switch (request) { case PTRACE_PEEKUSR: /* Read register from pt_regs. */ if (addr >= PTREGS_SIZE) break; childreg = getregs(child, ©regs) + addr; #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) break; ret = put_user(*(compat_long_t *)childreg, (compat_long_t __user *)datap); } else #endif { if (addr & (sizeof(long)-1)) break; ret = put_user(*(long *)childreg, datap); } break; case PTRACE_POKEUSR: /* Write register in pt_regs. */ if (addr >= PTREGS_SIZE) break; childreg = getregs(child, ©regs) + addr; #ifdef CONFIG_COMPAT if (is_compat_task()) { if (addr & (sizeof(compat_long_t)-1)) break; *(compat_long_t *)childreg = data; } else #endif { if (addr & (sizeof(long)-1)) break; *(long *)childreg = data; } putregs(child, ©regs); ret = 0; break; case PTRACE_GETREGS: /* Get all registers from the child. */ ret = copy_regset_to_user(child, &tile_user_regset_view, REGSET_GPR, 0, sizeof(struct pt_regs), datap); break; case PTRACE_SETREGS: /* Set all registers in the child. */ ret = copy_regset_from_user(child, &tile_user_regset_view, REGSET_GPR, 0, sizeof(struct pt_regs), datap); break; case PTRACE_GETFPREGS: /* Get the child FPU state. */ case PTRACE_SETFPREGS: /* Set the child FPU state. */ break; case PTRACE_SETOPTIONS: /* Support TILE-specific ptrace options. */ BUILD_BUG_ON(PTRACE_O_MASK_TILE & PTRACE_O_MASK); tmp = data & PTRACE_O_MASK_TILE; data &= ~PTRACE_O_MASK_TILE; ret = ptrace_request(child, request, addr, data); if (ret == 0) { unsigned int flags = child->ptrace; flags &= ~(PTRACE_O_MASK_TILE << PT_OPT_FLAG_SHIFT); flags |= (tmp << PT_OPT_FLAG_SHIFT); child->ptrace = flags; } break; default: #ifdef CONFIG_COMPAT if (task_thread_info(current)->status & TS_COMPAT) { ret = compat_ptrace_request(child, request, addr, data); break; } #endif ret = ptrace_request(child, request, addr, data); break; } return ret; }
code *xmmpost(elem *e,regm_t *pretregs) { elem *e1 = e->E1; elem *e2 = e->E2; tym_t ty1 = tybasic(e1->Ety); CodeBuilder cdb; regm_t retregs; unsigned reg; bool regvar = FALSE; if (config.flags4 & CFG4optimized) { // Be careful of cases like (x = x+x+x). We cannot evaluate in // x if x is in a register. unsigned varreg; regm_t varregm; if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable doinreg(e1->EV.sp.Vsym,e2) // and we can compute directly into it ) { regvar = TRUE; retregs = varregm; reg = varreg; // evaluate directly in target register cdb.append(getregs(retregs)); // destroy these regs } } code cs; if (!regvar) { code *c = getlvalue(&cs,e1,0); // get EA cdb.append(c); retregs = XMMREGS & ~*pretregs; if (!retregs) retregs = XMMREGS; c = allocreg(&retregs,®,ty1); cdb.append(c); cs.Iop = xmmload(ty1, true); // MOVSD xmm,xmm_m64 code_newreg(&cs,reg - XMM0); cdb.gen(&cs); checkSetVex(cdb.last(), ty1); } // Result register regm_t resultregs = XMMREGS & *pretregs & ~retregs; if (!resultregs) resultregs = XMMREGS & ~retregs; unsigned resultreg; code *c = allocreg(&resultregs, &resultreg, ty1); cdb.append(c); cdb.gen2(xmmload(ty1,true),modregxrmx(3,resultreg-XMM0,reg-XMM0)); // MOVSS/D resultreg,reg checkSetVex(cdb.last(), ty1); regm_t rretregs = XMMREGS & ~(*pretregs | retregs | resultregs); if (!rretregs) rretregs = XMMREGS & ~(retregs | resultregs); c = codelem(e2,&rretregs,FALSE); // eval right leaf cdb.append(c); unsigned rreg = findreg(rretregs); unsigned op = xmmoperator(e1->Ety, e->Eoper); cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0)); // ADD reg,rreg checkSetVex(cdb.last(), e1->Ety); if (!regvar) { cs.Iop = xmmstore(ty1,true); // reverse operand order of MOVS[SD] cdb.gen(&cs); checkSetVex(cdb.last(), ty1); } if (e1->Ecount || // if lvalue is a CSE or regvar) // rvalue can't be a CSE { cdb.append(getregs_imm(retregs)); // necessary if both lvalue and // rvalue are CSEs (since a reg // can hold only one e at a time) cssave(e1,retregs,EOP(e1)); // if lvalue is a CSE } cdb.append(fixresult(e,resultregs,pretregs)); freenode(e1); return cdb.finish(); }
code *orthxmm(elem *e, regm_t *pretregs) { //printf("orthxmm(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); elem *e1 = e->E1; elem *e2 = e->E2; // float + ifloat is not actually addition if ((e->Eoper == OPadd || e->Eoper == OPmin) && ((tyreal(e1->Ety) && tyimaginary(e2->Ety)) || (tyreal(e2->Ety) && tyimaginary(e1->Ety)))) { regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; unsigned reg; regm_t rretregs; unsigned rreg; if (tyreal(e1->Ety)) { reg = findreg(retregs); rreg = findreg(retregs & ~mask[reg]); retregs = mask[reg]; rretregs = mask[rreg]; } else { // Pick the second register, not the first rreg = findreg(retregs); rretregs = mask[rreg]; reg = findreg(retregs & ~rretregs); retregs = mask[reg]; } assert(retregs && rretregs); CodeBuilder cdb; cdb.append(codelem(e1,&retregs,FALSE)); // eval left leaf cdb.append(scodelem(e2, &rretregs, retregs, TRUE)); // eval right leaf retregs |= rretregs; if (e->Eoper == OPmin) { unsigned nretregs = XMMREGS & ~retregs; unsigned sreg; // hold sign bit unsigned sz = tysize(e1->Ety); cdb.append(allocreg(&nretregs,&sreg,e2->Ety)); targ_size_t signbit = 0x80000000; if (sz == 8) signbit = 0x8000000000000000LL; cdb.append(movxmmconst(sreg, sz, signbit, 0)); cdb.append(getregs(nretregs)); unsigned xop = (sz == 8) ? XORPD : XORPS; // XORPD/S rreg,sreg cdb.gen2(xop,modregxrmx(3,rreg-XMM0,sreg-XMM0)); } if (retregs != *pretregs) cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); } regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; CodeBuilder cdb; cdb.append(codelem(e1,&retregs,FALSE)); // eval left leaf unsigned reg = findreg(retregs); regm_t rretregs = XMMREGS & ~retregs; cdb.append(scodelem(e2, &rretregs, retregs, TRUE)); // eval right leaf unsigned rreg = findreg(rretregs); unsigned op = xmmoperator(e1->Ety, e->Eoper); /* We should take advantage of mem addressing modes for OP XMM,MEM * but we do not at the moment. */ if (OTrel(e->Eoper)) { retregs = mPSW; cdb.gen2(op,modregxrmx(3,rreg-XMM0,reg-XMM0)); checkSetVex(cdb.last(), e1->Ety); return cdb.finish(); } else cdb.append(getregs(retregs)); cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0)); checkSetVex(cdb.last(), e1->Ety); if (retregs != *pretregs) cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); }