void function(sSymbol_t f, sSymbol_t caller[], sSymbol_t callee[], int ncalls) { int i; localsize=offset=tmpsize=nbregs=0; funame=f->x.name; for (i=8;i<32;i++) temp[i]->x.name="******"; for (i = 0; caller[i] && callee[i]; i++) { caller[i]->x.name=stringf("(ap),%d",offset); caller[i]->x.adrmode='A'; offset+=caller[i]->type->size; if (optimizelevel>1 && callee[i]->sclass==REGISTER && allocreg(callee[i])) ; /* allocreg ok */ else { callee[i]->x.adrmode=caller[i]->x.adrmode; callee[i]->x.name=caller[i]->x.name; callee[i]->sclass=AUTO; } } busy=localsize=0; offset=6; gencode(caller,callee); omit_frame=(i==0 && localsize==6); print("%s\n",funame); if (optimizelevel>1 && omit_frame && nbregs==0) ; else print("\tENTER(%d,%d)\n",nbregs,localsize); if (isstruct(freturn(f->type))) print("\tMOVW_DI(op1,(fp),6)\n"); emitcode(); }
code *xmmneg(elem *e,regm_t *pretregs) { //printf("xmmneg()\n"); //elem_print(e); assert(*pretregs); tym_t tyml = tybasic(e->E1->Ety); int sz = _tysize[tyml]; regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; /* Generate: * MOV reg,e1 * MOV rreg,signbit * XOR reg,rreg */ CodeBuilder cdb; cdb.append(codelem(e->E1,&retregs,FALSE)); cdb.append(getregs(retregs)); unsigned reg = findreg(retregs); regm_t rretregs = XMMREGS & ~retregs; unsigned rreg; cdb.append(allocreg(&rretregs,&rreg,tyml)); targ_size_t signbit = 0x80000000; if (sz == 8) signbit = 0x8000000000000000LL; cdb.append(movxmmconst(rreg, sz, signbit, 0)); cdb.append(getregs(retregs)); unsigned op = (sz == 8) ? XORPD : XORPS; // XORPD/S reg,rreg cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0)); cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); }
code *orthxmm(elem *e, regm_t *pretregs) { elem *e1 = e->E1; elem *e2 = e->E2; regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; code *c = codelem(e1,&retregs,FALSE); // eval left leaf unsigned reg = findreg(retregs); regm_t rretregs = XMMREGS & ~retregs; code *cr = scodelem(e2, &rretregs, retregs, TRUE); // eval right leaf unsigned op = xmmoperator(e1->Ety, e->Eoper); unsigned rreg = findreg(rretregs); // float + ifloat is not actually addition if ((e->Eoper == OPadd || e->Eoper == OPmin) && ((tyreal(e1->Ety) && tyimaginary(e2->Ety)) || (tyreal(e2->Ety) && tyimaginary(e1->Ety)))) { retregs |= rretregs; c = cat(c, cr); if (e->Eoper == OPmin) { unsigned nretregs = XMMREGS & ~retregs; unsigned sreg; // hold sign bit unsigned sz = tysize[tybasic(e1->Ety)]; c = cat(c,allocreg(&nretregs,&sreg,e2->Ety)); targ_size_t signbit = 0x80000000; if (sz == 8) signbit = 0x8000000000000000LL; c = cat(c, movxmmconst(sreg, sz, signbit, 0)); c = cat(c, getregs(nretregs)); unsigned xop = (sz == 8) ? XORPD : XORPS; // XORPD/S rreg,sreg c = cat(c, gen2(CNIL,xop,modregxrmx(3,rreg-XMM0,sreg-XMM0))); } if (retregs != *pretregs) c = cat(c, fixresult(e,retregs,pretregs)); return c; } /* We should take advantage of mem addressing modes for OP XMM,MEM * but we do not at the moment. */ code *cg; if (OTrel(e->Eoper)) { retregs = mPSW; cg = NULL; code *cc = gen2(CNIL,op,modregxrmx(3,rreg-XMM0,reg-XMM0)); return cat4(c,cr,cg,cc); } else cg = getregs(retregs); code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0)); if (retregs != *pretregs) co = cat(co,fixresult(e,retregs,pretregs)); return cat4(c,cr,cg,co); }
code *xmmneg(elem *e,regm_t *pretregs) { //printf("xmmneg()\n"); //elem_print(e); assert(*pretregs); tym_t tyml = tybasic(e->E1->Ety); int sz = tysize[tyml]; regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; /* Generate: * MOV reg,e1 * MOV rreg,signbit * XOR reg,rreg */ code *cl = codelem(e->E1,&retregs,FALSE); cl = cat(cl,getregs(retregs)); unsigned reg = findreg(retregs); regm_t rretregs = XMMREGS & ~retregs; unsigned rreg; cl = cat(cl,allocreg(&rretregs,&rreg,tyml)); targ_size_t signbit = 0x80000000; if (sz == 8) signbit = 0x8000000000000000LL; code *c = movxmmconst(rreg, sz, signbit, 0); code *cg = getregs(retregs); unsigned op = (sz == 8) ? XORPD : XORPS; // XORPD/S reg,rreg code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0)); co = cat(co,fixresult(e,retregs,pretregs)); return cat4(cl,c,cg,co); }
void local(sSymbol_t p) { if (optimizelevel>1 && p->sclass==REGISTER && allocreg(p)) return; /* allocreg ok */ if (p->x.name && p->x.name[0]!='*') return; /* keep previous local (it isn't busy) */ p->x.name = stringf("(fp),%d",offset); p->x.adrmode = 'A'; p->sclass = AUTO; offset+=p->type->size; }
code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags) { /* Generate: * MOV reg,value * MOV xreg,reg * Not so efficient. We should at least do a PXOR for 0. */ assert(mask[xreg] & XMMREGS); assert(sz == 4 || sz == 8); CodeBuilder cdb; if (I32 && sz == 8) { unsigned r; regm_t rm = ALLREGS; cdb.append(allocreg(&rm,&r,TYint)); // allocate scratch register union { targ_size_t s; targ_long l[2]; } u; u.l[1] = 0; u.s = value; targ_long *p = &u.l[0]; cdb.append(movregconst(CNIL,r,p[0],0)); cdb.genfltreg(STO,r,0); // MOV floatreg,r cdb.append(movregconst(CNIL,r,p[1],0)); cdb.genfltreg(STO,r,4); // MOV floatreg+4,r unsigned op = xmmload(TYdouble, true); cdb.genxmmreg(op,xreg,0,TYdouble); // MOVSD XMMreg,floatreg } else { unsigned reg; cdb.append(regwithvalue(CNIL,ALLREGS,value,®,(sz == 8) ? 64 : 0)); cdb.gen2(LODD,modregxrmx(3,xreg-XMM0,reg)); // MOVD xreg,reg if (sz == 8) code_orrex(cdb.last(), REX_W); checkSetVex(cdb.last(), TYulong); } return cdb.finish(); }
code *movxmmconst(unsigned xreg, unsigned sz, targ_size_t value, regm_t flags) { /* Generate: * MOV reg,value * MOV xreg,reg * Not so efficient. We should at least do a PXOR for 0. */ assert(mask[xreg] & XMMREGS); assert(sz == 4 || sz == 8); code *c; if (I32 && sz == 8) { unsigned r; regm_t rm = ALLREGS; c = allocreg(&rm,&r,TYint); // allocate scratch register union { targ_size_t s; targ_long l[2]; } u; u.l[1] = 0; u.s = value; targ_long *p = &u.l[0]; c = movregconst(c,r,p[0],0); c = genfltreg(c,0x89,r,0); // MOV floatreg,r c = movregconst(c,r,p[1],0); c = genfltreg(c,0x89,r,4); // MOV floatreg+4,r unsigned op = xmmload(TYdouble); c = genfltreg(c,op,xreg - XMM0,0); // MOVSD XMMreg,floatreg } else { unsigned reg; c = regwithvalue(CNIL,ALLREGS,value,®,(sz == 8) ? 64 : 0); c = gen2(c,LODD,modregxrmx(3,xreg-XMM0,reg)); // MOVD xreg,reg if (sz == 8) code_orrex(c, REX_W); } return c; }
/* * Loads an ELF 32 executable. */ PRIVATE addr_t load_elf32(struct inode *inode) { int i; /* Loop index. */ addr_t addr; /* Region address. */ addr_t entry; /* Program entry point. */ struct elf32_fhdr *elf; /* ELF file header. */ struct elf32_phdr *seg; /* ELF Program header. */ block_t blk; /* Working block number. */ buffer_t header; /* File headers block buffer. */ struct region *reg; /* Working memory region. */ struct pregion *preg; /* Working process memory region. */ blk = block_map(inode, 0, 0); /* Empty file. */ if (blk == BLOCK_NULL) { curr_proc->errno = -ENOEXEC; return (0); } /* Read ELF file header. */ header = bread(inode->dev, blk); elf = buffer_data(header); /* Bad ELF file. */ if (!is_elf(elf)) { brelse(header); curr_proc->errno = -ENOEXEC; return (0); } /* Bad ELF file. */ if (elf->e_phoff + elf->e_phnum*elf->e_phentsize > BLOCK_SIZE) { brelse(header); curr_proc->errno = -ENOEXEC; return (0); } seg = (struct elf32_phdr *)((char *)buffer_data(header) + elf->e_phoff); /* Load segments. */ for (i = 0; i < elf->e_phnum; i++) { /* Not loadable. */ if (seg[i].p_type != PT_LOAD) continue; /* Broken executable. */ if (seg[i].p_filesz > seg[i].p_memsz) { kprintf("broken executable"); brelse(header); curr_proc->errno = -ENOEXEC; return (0); } addr = ALIGN(seg[i].p_vaddr, seg[i].p_align); /* Text section. */ if (!(seg[i].p_flags ^ (PF_R | PF_X))) { preg = TEXT(curr_proc); reg = allocreg(S_IRUSR | S_IXUSR, seg[i].p_memsz, 0); } /* Data section. */ else { preg = DATA(curr_proc); reg = allocreg(S_IRUSR | S_IWUSR, seg[i].p_memsz, 0); } /* Failed to allocate region. */ if (reg == NULL) { brelse(header); curr_proc->errno = -ENOMEM; return (0); } /* Attach memory region. */ if (attachreg(curr_proc, preg, addr, reg)) { freereg(reg); brelse(header); curr_proc->errno = -ENOMEM; return (0); } loadreg(inode, reg, seg[i].p_offset, seg[i].p_filesz); unlockreg(reg); } entry = elf->e_entry; brelse(header); return (entry); }
/* * Executes a program. */ PUBLIC int sys_execve(const char *filename, const char **argv, const char **envp) { int i; /* Loop index. */ struct inode *inode; /* File inode. */ struct region *reg; /* Process region. */ addr_t entry; /* Program entry point. */ addr_t sp; /* User stack pointer. */ char *name; /* File name. */ char stack[ARG_MAX]; /* Stack size. */ /* Get file name. */ if ((name = getname(filename)) == NULL) return (curr_proc->errno); /* Build arguments before freeing user memory. */ kmemset(stack, 0, ARG_MAX); if (!(sp = buildargs(stack, ARG_MAX, argv, envp))) { putname(name); return (curr_proc->errno); } /* Get file's inode. */ if ((inode = inode_name(name)) == NULL) { putname(name); return (curr_proc->errno); } /* Not a regular file. */ if (!S_ISREG(inode->mode)) { putname(name); inode_put(inode); return (-EACCES); } /* Not allowed. */ if (!permission(inode->mode, inode->uid, inode->gid, curr_proc, MAY_EXEC, 0)) { putname(name); inode_put(inode); return (-EACCES); } /* Close file descriptors. */ for (i = 0; i < OPEN_MAX; i++) { if (curr_proc->close & (1 << i)) do_close(i); } /* Detach process memory regions. */ for (i = 0; i < NR_PREGIONS; i++) detachreg(curr_proc, &curr_proc->pregs[i]); /* Reset signal handlers. */ curr_proc->restorer = NULL; for (i = 0; i < NR_SIGNALS; i++) { if (curr_proc->handlers[i] != SIG_DFL) { if (curr_proc->handlers[i] != SIG_IGN) curr_proc->handlers[i] = SIG_DFL; } } /* Load executable. */ if (!(entry = load_elf32(inode))) goto die0; /* Attach stack region. */ if ((reg = allocreg(S_IRUSR | S_IWUSR, PAGE_SIZE, REGION_DOWNWARDS)) == NULL) goto die0; if (attachreg(curr_proc, STACK(curr_proc), USTACK_ADDR - 1, reg)) goto die1; unlockreg(reg); /* Attach heap region. */ if ((reg = allocreg(S_IRUSR | S_IWUSR, PAGE_SIZE, REGION_UPWARDS)) == NULL) goto die0; if (attachreg(curr_proc, HEAP(curr_proc), UHEAP_ADDR, reg)) goto die1; unlockreg(reg); inode_put(inode); putname(name); kmemcpy((void *)(USTACK_ADDR - ARG_MAX), stack, ARG_MAX); user_mode(entry, sp); /* Will not return. */ return (0); die1: unlockreg(reg); freereg(reg); die0: inode_put(inode); putname(name); die(((SIGSEGV & 0xff) << 16) | (1 << 9)); return (-1); }
code *orthxmm(elem *e, regm_t *pretregs) { //printf("orthxmm(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); elem *e1 = e->E1; elem *e2 = e->E2; // float + ifloat is not actually addition if ((e->Eoper == OPadd || e->Eoper == OPmin) && ((tyreal(e1->Ety) && tyimaginary(e2->Ety)) || (tyreal(e2->Ety) && tyimaginary(e1->Ety)))) { regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; unsigned reg; regm_t rretregs; unsigned rreg; if (tyreal(e1->Ety)) { reg = findreg(retregs); rreg = findreg(retregs & ~mask[reg]); retregs = mask[reg]; rretregs = mask[rreg]; } else { // Pick the second register, not the first rreg = findreg(retregs); rretregs = mask[rreg]; reg = findreg(retregs & ~rretregs); retregs = mask[reg]; } assert(retregs && rretregs); CodeBuilder cdb; cdb.append(codelem(e1,&retregs,FALSE)); // eval left leaf cdb.append(scodelem(e2, &rretregs, retregs, TRUE)); // eval right leaf retregs |= rretregs; if (e->Eoper == OPmin) { unsigned nretregs = XMMREGS & ~retregs; unsigned sreg; // hold sign bit unsigned sz = tysize(e1->Ety); cdb.append(allocreg(&nretregs,&sreg,e2->Ety)); targ_size_t signbit = 0x80000000; if (sz == 8) signbit = 0x8000000000000000LL; cdb.append(movxmmconst(sreg, sz, signbit, 0)); cdb.append(getregs(nretregs)); unsigned xop = (sz == 8) ? XORPD : XORPS; // XORPD/S rreg,sreg cdb.gen2(xop,modregxrmx(3,rreg-XMM0,sreg-XMM0)); } if (retregs != *pretregs) cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); } regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; CodeBuilder cdb; cdb.append(codelem(e1,&retregs,FALSE)); // eval left leaf unsigned reg = findreg(retregs); regm_t rretregs = XMMREGS & ~retregs; cdb.append(scodelem(e2, &rretregs, retregs, TRUE)); // eval right leaf unsigned rreg = findreg(rretregs); unsigned op = xmmoperator(e1->Ety, e->Eoper); /* We should take advantage of mem addressing modes for OP XMM,MEM * but we do not at the moment. */ if (OTrel(e->Eoper)) { retregs = mPSW; cdb.gen2(op,modregxrmx(3,rreg-XMM0,reg-XMM0)); checkSetVex(cdb.last(), e1->Ety); return cdb.finish(); } else cdb.append(getregs(retregs)); cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0)); checkSetVex(cdb.last(), e1->Ety); if (retregs != *pretregs) cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); }
code *xmmpost(elem *e,regm_t *pretregs) { elem *e1 = e->E1; elem *e2 = e->E2; tym_t ty1 = tybasic(e1->Ety); CodeBuilder cdb; regm_t retregs; unsigned reg; bool regvar = FALSE; if (config.flags4 & CFG4optimized) { // Be careful of cases like (x = x+x+x). We cannot evaluate in // x if x is in a register. unsigned varreg; regm_t varregm; if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable doinreg(e1->EV.sp.Vsym,e2) // and we can compute directly into it ) { regvar = TRUE; retregs = varregm; reg = varreg; // evaluate directly in target register cdb.append(getregs(retregs)); // destroy these regs } } code cs; if (!regvar) { code *c = getlvalue(&cs,e1,0); // get EA cdb.append(c); retregs = XMMREGS & ~*pretregs; if (!retregs) retregs = XMMREGS; c = allocreg(&retregs,®,ty1); cdb.append(c); cs.Iop = xmmload(ty1, true); // MOVSD xmm,xmm_m64 code_newreg(&cs,reg - XMM0); cdb.gen(&cs); checkSetVex(cdb.last(), ty1); } // Result register regm_t resultregs = XMMREGS & *pretregs & ~retregs; if (!resultregs) resultregs = XMMREGS & ~retregs; unsigned resultreg; code *c = allocreg(&resultregs, &resultreg, ty1); cdb.append(c); cdb.gen2(xmmload(ty1,true),modregxrmx(3,resultreg-XMM0,reg-XMM0)); // MOVSS/D resultreg,reg checkSetVex(cdb.last(), ty1); regm_t rretregs = XMMREGS & ~(*pretregs | retregs | resultregs); if (!rretregs) rretregs = XMMREGS & ~(retregs | resultregs); c = codelem(e2,&rretregs,FALSE); // eval right leaf cdb.append(c); unsigned rreg = findreg(rretregs); unsigned op = xmmoperator(e1->Ety, e->Eoper); cdb.gen2(op,modregxrmx(3,reg-XMM0,rreg-XMM0)); // ADD reg,rreg checkSetVex(cdb.last(), e1->Ety); if (!regvar) { cs.Iop = xmmstore(ty1,true); // reverse operand order of MOVS[SD] cdb.gen(&cs); checkSetVex(cdb.last(), ty1); } if (e1->Ecount || // if lvalue is a CSE or regvar) // rvalue can't be a CSE { cdb.append(getregs_imm(retregs)); // necessary if both lvalue and // rvalue are CSEs (since a reg // can hold only one e at a time) cssave(e1,retregs,EOP(e1)); // if lvalue is a CSE } cdb.append(fixresult(e,resultregs,pretregs)); freenode(e1); return cdb.finish(); }
code *xmmcnvt(elem *e,regm_t *pretregs) { unsigned op=0, regs; tym_t ty; unsigned char rex = 0; bool zx = false; // zero extend uint /* There are no ops for integer <-> float/real conversions * but there are instructions for them. In order to use these * try to fuse chained conversions. Be careful not to loose * precision for real to long. */ elem *e1 = e->E1; switch (e->Eoper) { case OPd_f: if (e1->Eoper == OPs32_d) ; else if (I64 && e1->Eoper == OPs64_d) rex = REX_W; else if (I64 && e1->Eoper == OPu32_d) { rex = REX_W; zx = true; } else { regs = XMMREGS; op = CVTSD2SS; ty = TYfloat; break; } // directly use si2ss regs = ALLREGS; e1 = e1->E1; op = CVTSI2SS; ty = TYfloat; break; case OPs32_d: goto Litod; case OPs64_d: rex = REX_W; goto Litod; case OPu32_d: rex = REX_W; zx = true; goto Litod; Litod: regs = ALLREGS; op = CVTSI2SD; ty = TYdouble; break; case OPd_s32: ty = TYint; goto Ldtoi; case OPd_u32: ty = TYlong; if (I64) rex = REX_W; goto Ldtoi; case OPd_s64: ty = TYlong; rex = REX_W; goto Ldtoi; Ldtoi: regs = XMMREGS; switch (e1->Eoper) { case OPf_d: e1 = e1->E1; op = CVTTSS2SI; break; case OPld_d: if (e->Eoper == OPd_s64) return cnvt87(e,pretregs); // precision /* FALL-THROUGH */ default: op = CVTTSD2SI; break; } break; case OPf_d: regs = XMMREGS; op = CVTSS2SD; ty = TYdouble; break; } assert(op); CodeBuilder cdb; cdb.append(codelem(e1, ®s, FALSE)); unsigned reg = findreg(regs); if (reg >= XMM0) reg -= XMM0; else if (zx) { assert(I64); cdb.append(getregs(regs)); cdb.append(genregs(CNIL,STO,reg,reg)); // MOV reg,reg to zero upper 32-bit code_orflag(cdb.last(),CFvolatile); } unsigned retregs = *pretregs; if (tyxmmreg(ty)) // target is XMM { if (!(*pretregs & XMMREGS)) retregs = XMMREGS; } else // source is XMM { assert(regs & XMMREGS); if (!(retregs & ALLREGS)) retregs = ALLREGS; } unsigned rreg; cdb.append(allocreg(&retregs,&rreg,ty)); if (rreg >= XMM0) rreg -= XMM0; cdb.gen2(op, modregxrmx(3,rreg,reg)); assert(I64 || !rex); if (rex) code_orrex(cdb.last(), rex); if (*pretregs != retregs) cdb.append(fixresult(e,retregs,pretregs)); return cdb.finish(); }
/*************** * Generate code for OPvecfill (broadcast). * OPvecfill takes the single value in e1 and * fills the vector type with it. */ code *cdvecfill(elem *e, regm_t *pretregs) { //printf("cdvecfill(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); regm_t retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; CodeBuilder cdb; code *c; code cs; elem *e1 = e->E1; #if 0 if ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar) { cr = getlvalue(&cs, e1, RMload | retregs); // get addressing mode } else { unsigned rretregs = XMMREGS & ~retregs; cr = scodelem(op2, &rretregs, retregs, TRUE); unsigned rreg = findreg(rretregs) - XMM0; cs.Irm = modregrm(3,0,rreg & 7); cs.Iflags = 0; cs.Irex = 0; if (rreg & 8) cs.Irex |= REX_B; } #endif unsigned reg; unsigned rreg; unsigned varreg; regm_t varregm; tym_t ty = tybasic(e->Ety); switch (ty) { case TYfloat4: case TYfloat8: if (config.avx && ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) || tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg) ) { Lint: if (e1->Eoper == OPvar) e1->EV.sp.Vsym->Sflags &= ~GTregcand; // VBROADCASTSS XMM,MEM cdb.append(getlvalue(&cs, e1, 0)); // get addressing mode assert((cs.Irm & 0xC0) != 0xC0); // AVX1 doesn't have register source operands cdb.append(allocreg(&retregs,®,ty)); cs.Iop = VBROADCASTSS; cs.Irex &= ~REX_W; code_newreg(&cs,reg - XMM0); checkSetVex(&cs,ty); cdb.gen(&cs); } else { // SHUFPS XMM0,XMM0,0 0F C6 /r ib c = codelem(e1,&retregs,FALSE); // eval left leaf cdb.append(c); reg = findreg(retregs) - XMM0; cdb.append(getregs(retregs)); cs.Iop = SHUFPS; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; if (config.avx >= 2 || tysize(ty) == 32) { // VBROADCASTSS XMM,XMM cs.Iop = VBROADCASTSS; checkSetVex(&cs, ty); } cdb.gen(&cs); } break; case TYdouble2: case TYdouble4: if (config.avx && ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) || tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg) ) { if (e1->Eoper == OPvar) e1->EV.sp.Vsym->Sflags &= ~GTregcand; // VBROADCASTSD XMM,MEM cdb.append(getlvalue(&cs, e1, 0)); // get addressing mode assert((cs.Irm & 0xC0) != 0xC0); // AVX1 doesn't have register source operands cdb.append(allocreg(&retregs,®,ty)); cs.Iop = VBROADCASTSD; cs.Irex &= ~REX_W; code_newreg(&cs,reg - XMM0); checkSetVex(&cs,ty); cdb.gen(&cs); } else { // UNPCKLPD XMM0,XMM0 66 0F 14 /r c = codelem(e1,&retregs,FALSE); // eval left leaf cdb.append(c); reg = findreg(retregs) - XMM0; cdb.append(getregs(retregs)); cs.Iop = UNPCKLPD; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; if (config.avx >= 2 || tysize(ty) == 32) { // VBROADCASTSD XMM,XMM cs.Iop = VBROADCASTSD; checkSetVex(&cs, ty); } cdb.gen(&cs); } break; case TYschar16: case TYuchar16: case TYschar32: case TYuchar32: { /* MOVD XMM0,r * PUNPCKLBW XMM0,XMM0 * PUNPCKLWD XMM0,XMM0 * PSHUFD XMM0,XMM0,0 */ regm_t regm = ALLREGS; c = codelem(e1,®m,FALSE); // eval left leaf cdb.append(c); unsigned r = findreg(regm); c = allocreg(&retregs,®, e->Ety); cdb.append(c); reg -= XMM0; cdb.gen2(LODD,modregxrmx(3,reg,r)); // MOVD reg,r checkSetVex(cdb.last(),TYschar16); cs.Iop = PUNPCKLBW; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cdb.gen(&cs); cs.Iop = PUNPCKLWD; cdb.gen(&cs); cs.Iop = PSHUFD; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; checkSetVex(&cs,TYschar16); cdb.gen(&cs); if (tysize(ty) == 32) { // VINSERTF128 YMM0,YMM0,XMM0,1 cs.Iop = VINSERTF128; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 1; checkSetVex(&cs,ty); cdb.gen(&cs); } break; } case TYshort8: case TYushort8: case TYshort16: case TYushort16: { regm_t regm = ALLREGS; c = codelem(e1,®m,FALSE); // eval left leaf cdb.append(c); unsigned r = findreg(regm); if (config.avx || tysize(ty) == 32) { /* * VPXOR XMM0,XMM0,XMM0 * VPINSRW XMM0,XMM0,r,0 * VPINSRW XMM0,XMM0,r,1 * VPINSRW XMM0,XMM0,r,2 * VPINSRW XMM0,XMM0,r,3 */ cdb.append(allocreg(&retregs,®, ty)); cdb.gen2(PXOR,modregxrmx(3,reg-XMM0,reg-XMM0)); checkSetVex(cdb.last(), TYshort8); for (int i = 0; i < tysize(ty) / 4; ++i) { cdb.genc2(PINSRW,modregxrmx(3,reg-XMM0,r),i); checkSetVex(cdb.last(), TYshort8); } if (tysize(ty) == 32) { // VINSERTF128 YMM0,YMM0,XMM0,1 cs.Iop = VINSERTF128; cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 1; checkSetVex(&cs,ty); cdb.gen(&cs); } else { // VPSHUFD XMM0,XMM0,0 cs.Iop = PSHUFD; cs.Irm = modregxrmx(3,reg-XMM0,reg-XMM0); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; checkSetVex(&cs,ty); cdb.gen(&cs); } } else { /* MOVD XMM0,r * PUNPCKLWD XMM0,XMM0 * PSHUFD XMM0,XMM0,0 */ c = allocreg(&retregs,®, e->Ety); cdb.append(c); reg -= XMM0; cdb.gen2(LODD,modregxrmx(3,reg,r)); // MOVD reg,r checkSetVex(cdb.last(),e->Ety); cs.Iop = PUNPCKLWD; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cdb.gen(&cs); cs.Iop = PSHUFD; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; cdb.gen(&cs); } break; } case TYlong8: case TYulong8: case TYlong4: case TYulong4: { if (config.avx && ((e1->Eoper == OPind && !e1->Ecount) || e1->Eoper == OPvar && !isregvar(e1,&varregm,&varreg)) || tysize(ty) == 32 && !isregvar(e1,&varregm,&varreg)) { goto Lint; } /* MOVD XMM1,r * PSHUFD XMM0,XMM1,0 */ regm_t regm = ALLREGS; c = codelem(e1,®m,FALSE); // eval left leaf cdb.append(c); unsigned r = findreg(regm); c = allocreg(&retregs,®, e->Ety); cdb.append(c); reg -= XMM0; cdb.gen2(LODD,modregxrmx(3,reg,r)); // MOVD reg,r cs.Iop = PSHUFD; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = 0; if (config.avx >= 2 || tysize(ty) == 32) { // VBROADCASTSS XMM,XMM cs.Iop = VBROADCASTSS; checkSetVex(&cs, ty); } cdb.gen(&cs); break; } case TYllong2: case TYullong2: case TYllong4: case TYullong4: if (config.avx || tysize(ty) >= 32) { if (e1->Eoper == OPvar) e1->EV.sp.Vsym->Sflags &= ~GTregcand; // VMOVDDUP XMM,MEM cdb.append(getlvalue(&cs, e1, 0)); // get addressing mode if ((cs.Irm & 0xC0) == 0xC0) { unsigned sreg = ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0)); regm_t sregm = XMMREGS; cdb.append(fixresult(e1, mask[sreg], &sregm)); unsigned rmreg = findreg(sregm); cs.Irm = (cs.Irm & ~7) | ((rmreg - XMM0) & 7); if ((rmreg - XMM0) & 8) cs.Irex |= REX_B; else cs.Irex &= ~REX_B; } cdb.append(allocreg(&retregs,®,ty)); if (config.avx >= 2 || tysize(ty) >= 32) { cs.Iop = VBROADCASTSD; cs.Irex &= ~REX_W; } else cs.Iop = MOVDDUP; code_newreg(&cs,reg - XMM0); checkSetVex(&cs,ty); cdb.gen(&cs); } else { /* MOVQ XMM0,mem128 * PUNPCKLQDQ XMM0,XMM0 */ c = codelem(e1,&retregs,FALSE); // eval left leaf cdb.append(c); unsigned reg = findreg(retregs); reg -= XMM0; //cdb.gen2(LODD,modregxrmx(3,reg,r)); // MOVQ reg,r cs.Iop = PUNPCKLQDQ; cs.Irm = modregxrmx(3,reg,reg); cs.Iflags = 0; cdb.gen(&cs); } break; default: assert(0); } c = fixresult(e,retregs,pretregs); cdb.append(c); return cdb.finish(); }
code *cdvector(elem *e, regm_t *pretregs) { /* e should look like one of: * vector * | * param * / \ * param op2 * / \ * op op1 */ if (!config.fpxmmregs) { printf("SIMD operations not supported on this platform\n"); exit(1); } unsigned n = el_nparams(e->E1); elem **params = (elem **)malloc(n * sizeof(elem *)); assert(params); elem **tmp = params; el_paramArray(&tmp, e->E1); #if 0 printf("cdvector()\n"); for (int i = 0; i < n; i++) { printf("[%d]: ", i); elem_print(params[i]); } #endif if (*pretregs == 0) { /* Evaluate for side effects only */ CodeBuilder cdb; for (int i = 0; i < n; i++) { cdb.append(codelem(params[i], pretregs, FALSE)); *pretregs = 0; // in case they got set } return cdb.finish(); } assert(n >= 2 && n <= 4); elem *eop = params[0]; elem *op1 = params[1]; elem *op2 = NULL; tym_t ty2 = 0; if (n >= 3) { op2 = params[2]; ty2 = tybasic(op2->Ety); } unsigned op = el_tolong(eop); #ifdef DEBUG assert(!isXMMstore(op)); #endif tym_t ty1 = tybasic(op1->Ety); unsigned sz1 = _tysize[ty1]; // assert(sz1 == 16); // float or double regm_t retregs; CodeBuilder cdb; if (n == 3 && ty2 == TYuchar && op2->Eoper == OPconst) { // Handle: op xmm,imm8 retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; cdb.append(codelem(op1,&retregs,FALSE)); // eval left leaf unsigned reg = findreg(retregs); int r; switch (op) { case PSLLD: r = 6; op = 0x660F72; break; case PSLLQ: r = 6; op = 0x660F73; break; case PSLLW: r = 6; op = 0x660F71; break; case PSRAD: r = 4; op = 0x660F72; break; case PSRAW: r = 4; op = 0x660F71; break; case PSRLD: r = 2; op = 0x660F72; break; case PSRLQ: r = 2; op = 0x660F73; break; case PSRLW: r = 2; op = 0x660F71; break; case PSRLDQ: r = 3; op = 0x660F73; break; case PSLLDQ: r = 7; op = 0x660F73; break; default: printf("op = x%x\n", op); assert(0); break; } cdb.append(getregs(retregs)); cdb.genc2(op,modregrmx(3,r,reg-XMM0), el_tolong(op2)); } else if (n == 2) { /* Handle: op xmm,mem * where xmm is written only, not read */ code cs; if ((op1->Eoper == OPind && !op1->Ecount) || op1->Eoper == OPvar) { cdb.append(getlvalue(&cs, op1, RMload)); // get addressing mode } else { regm_t rretregs = XMMREGS; cdb.append(codelem(op1, &rretregs, FALSE)); unsigned rreg = findreg(rretregs) - XMM0; cs.Irm = modregrm(3,0,rreg & 7); cs.Iflags = 0; cs.Irex = 0; if (rreg & 8) cs.Irex |= REX_B; } retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; unsigned reg; cdb.append(allocreg(&retregs, ®, e->Ety)); code_newreg(&cs, reg - XMM0); cs.Iop = op; cdb.gen(&cs); } else if (n == 3 || n == 4) { /* Handle: * op xmm,mem // n = 3 * op xmm,mem,imm8 // n = 4 * Both xmm and mem are operands, evaluate xmm first. */ code cs; retregs = *pretregs & XMMREGS; if (!retregs) retregs = XMMREGS; cdb.append(codelem(op1,&retregs,FALSE)); // eval left leaf unsigned reg = findreg(retregs); if ((op2->Eoper == OPind && !op2->Ecount) || op2->Eoper == OPvar) { cdb.append(getlvalue(&cs, op2, RMload | retregs)); // get addressing mode } else { unsigned rretregs = XMMREGS & ~retregs; cdb.append(scodelem(op2, &rretregs, retregs, TRUE)); unsigned rreg = findreg(rretregs) - XMM0; cs.Irm = modregrm(3,0,rreg & 7); cs.Iflags = 0; cs.Irex = 0; if (rreg & 8) cs.Irex |= REX_B; } cdb.append(getregs(retregs)); if (n == 4) { switch (op) { case CMPPD: case CMPSS: case CMPSD: case CMPPS: case PSHUFD: case PSHUFHW: case PSHUFLW: case BLENDPD: case BLENDPS: case DPPD: case DPPS: case MPSADBW: case PBLENDW: case ROUNDPD: case ROUNDPS: case ROUNDSD: case ROUNDSS: case SHUFPD: case SHUFPS: break; default: printf("op = x%x\n", op); assert(0); break; } elem *imm8 = params[3]; cs.IFL2 = FLconst; cs.IEV2.Vsize_t = el_tolong(imm8); } code_newreg(&cs, reg - XMM0); cs.Iop = op; cdb.gen(&cs); } else assert(0); cdb.append(fixresult(e,retregs,pretregs)); free(params); freenode(e); return cdb.finish(); }
code *xmmopass(elem *e,regm_t *pretregs) { elem *e1 = e->E1; elem *e2 = e->E2; tym_t ty1 = tybasic(e1->Ety); unsigned sz1 = tysize[ty1]; regm_t rretregs = XMMREGS & ~*pretregs; if (!rretregs) rretregs = XMMREGS; code *cr = codelem(e2,&rretregs,FALSE); // eval right leaf unsigned rreg = findreg(rretregs); code cs; code *cl,*cg; regm_t retregs; unsigned reg; bool regvar = FALSE; if (config.flags4 & CFG4optimized) { // Be careful of cases like (x = x+x+x). We cannot evaluate in // x if x is in a register. unsigned varreg; regm_t varregm; if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable doinreg(e1->EV.sp.Vsym,e2) // and we can compute directly into it ) { regvar = TRUE; retregs = varregm; reg = varreg; // evaluate directly in target register cl = NULL; cg = getregs(retregs); // destroy these regs } } if (!regvar) { cl = getlvalue(&cs,e1,rretregs); // get EA retregs = *pretregs & XMMREGS & ~rretregs; if (!retregs) retregs = XMMREGS & ~rretregs; cg = allocreg(&retregs,®,ty1); cs.Iop = xmmload(ty1); // MOVSD xmm,xmm_m64 code_newreg(&cs,reg - XMM0); cg = gen(cg,&cs); } unsigned op = xmmoperator(e1->Ety, e->Eoper); code *co = gen2(CNIL,op,modregxrmx(3,reg-XMM0,rreg-XMM0)); if (!regvar) { cs.Iop = xmmstore(ty1); // reverse operand order of MOVS[SD] gen(co,&cs); } if (e1->Ecount || // if lvalue is a CSE or regvar) // rvalue can't be a CSE { cl = cat(cl,getregs_imm(retregs)); // necessary if both lvalue and // rvalue are CSEs (since a reg // can hold only one e at a time) cssave(e1,retregs,EOP(e1)); // if lvalue is a CSE } co = cat(co,fixresult(e,retregs,pretregs)); freenode(e1); return cat4(cr,cl,cg,co); }