/* * findinc finds ADD instructions with a constant * argument which falls within the immed_12 range. */ static Flow* findinc(Flow *r, Flow *r2, Adr *v) { Flow *r1; Prog *p; for(r1=uniqs(r); r1!=nil && r1!=r2; r=r1,r1=uniqs(r)) { if(uniqp(r1) != r) return nil; switch(copyu(r1->prog, v, A)) { case 0: /* not touched */ continue; case 4: /* set and used */ p = r1->prog; if(p->as == AADD) if(isdconst(&p->from)) if(p->from.offset > -4096 && p->from.offset < 4096) return r1; default: return nil; } } return nil; }
Reg* findinc(Reg *r, Reg *r2, Adr *v) { Reg *r1; Prog *p; for(r1=uniqs(r); r1!=R && r1!=r2; r=r1,r1=uniqs(r)) { if(uniqp(r1) != r) return R; switch(copyu(r1->prog, v, A)) { case 0: /* not touched */ continue; case 4: /* set and used */ p = r1->prog; if(p->as == AADD) if(p->from.type == D_CONST) if(p->from.offset > -4096 && p->from.offset < 4096) return r1; default: return R; } } return R; }
int nochange(Reg *r, Reg *r2, Prog *p) { Adr a[3]; int i, n; if(r == r2) return 1; n = 0; if(p->reg != NREG && p->reg != p->to.reg) { a[n].type = D_REG; a[n++].reg = p->reg; } switch(p->from.type) { case D_SHIFT: a[n].type = D_REG; a[n++].reg = p->from.offset&0xf; case D_REG: a[n].type = D_REG; a[n++].reg = p->from.reg; } if(n == 0) return 1; for(; r!=R && r!=r2; r=uniqs(r)) { p = r->prog; for(i=0; i<n; i++) if(copyu(p, &a[i], A) > 1) return 0; } return 1; }
static void nilwalkfwd(NilFlow *rcheck) { NilFlow *r; Prog *p; ProgInfo info; // If the path down from rcheck dereferences the address // (possibly with a small offset) before writing to memory // and before any subsequent checks, it's okay to wait for // that implicit check. Only consider this basic block to // avoid problems like: // _ = *x // should panic // for {} // no writes but infinite loop may be considered visible for(r = (NilFlow*)uniqs(&rcheck->f); r != nil; r = (NilFlow*)uniqs(&r->f)) { p = r->f.prog; proginfo(&info, p); if((info.flags & LeftRead) && smallindir(&p->from, &rcheck->f.prog->from)) { rcheck->kill = 1; return; } if((info.flags & (RightRead|RightWrite)) && smallindir(&p->to, &rcheck->f.prog->from)) { rcheck->kill = 1; return; } // Stop if another nil check happens. if(p->as == ACHECKNIL) return; // Stop if value is lost. if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) return; // Stop if memory write. if((info.flags & RightWrite) && !regtyp(&p->to)) return; } }
static void conprop(Reg *r0) { Reg *r; Prog *p, *p0; int t; Adr *v0; p0 = r0->prog; v0 = &p0->to; r = r0; loop: r = uniqs(r); if(r == R || r == r0) return; if(uniqp(r) == R) return; p = r->prog; t = copyu(p, v0, A); switch(t) { case 0: // miss case 1: // use goto loop; case 2: // rar case 4: // use and set break; case 3: // set if(p->as == p0->as) if(p->from.type == p0->from.type) if(p->from.sym == p0->from.sym) if(p->from.offset == p0->from.offset) if(p->from.scale == p0->from.scale) if(p->from.dval == p0->from.dval) if(p->from.index == p0->from.index) { excise(r); t++; goto loop; } break; } }
static Flow* rnops(Flow *r) { Prog *p; Flow *r1; if(r != nil) for(;;) { p = r->prog; if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE) break; r1 = uniqs(r); if(r1 == nil) break; r = r1; } return r; }
static Reg* rnops(Reg *r) { Prog *p; Reg *r1; if(r != R) for(;;) { p = r->prog; if(p->as != ANOP || p->from.type != D_NONE || p->to.type != D_NONE) break; r1 = uniqs(r); if(r1 == R) break; r = r1; } return r; }
static void conprop(Flow *r0) { Flow *r; Prog *p, *p0; int t; Adr *v0; p0 = r0->prog; v0 = &p0->to; r = r0; loop: r = uniqs(r); if(r == nil || r == r0) return; if(uniqp(r) == nil) return; p = r->prog; t = copyu(p, v0, nil); switch(t) { case 0: // miss case 1: // use goto loop; case 2: // rar case 4: // use and set break; case 3: // set if(p->as == p0->as) if(p->from.type == p0->from.type) if(p->from.node == p0->from.node) if(p->from.offset == p0->from.offset) if(p->from.scale == p0->from.scale) if(p->from.type == D_FCONST && p->from.u.dval == p0->from.u.dval) if(p->from.index == p0->from.index) { excise(r); goto loop; } break; } }
/* * findpre returns the last instruction mentioning v * before r. It must be a set, and there must be * a unique path from that instruction to r. */ static Flow* findpre(Flow *r, Adr *v) { Flow *r1; for(r1=uniqp(r); r1!=nil; r=r1,r1=uniqp(r)) { if(uniqs(r1) != r) return nil; switch(copyu(r1->prog, v, A)) { case 1: /* used */ case 2: /* read-alter-rewrite */ return nil; case 3: /* set */ case 4: /* set and used */ return r1; } } return nil; }
Reg* findpre(Reg *r, Adr *v) { Reg *r1; for(r1=uniqp(r); r1!=R; r=r1,r1=uniqp(r)) { if(uniqs(r1) != r) return R; switch(copyu(r1->prog, v, A)) { case 1: /* used */ case 2: /* read-alter-rewrite */ return R; case 3: /* set */ case 4: /* set and used */ return r1; } } return R; }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ int subprop(Reg *r0) { Prog *p; Adr *v1, *v2; Reg *r; int t; if(debug['P'] && debug['v']) print("subprop %P\n", r0->prog); p = r0->prog; v1 = &p->from; if(!regtyp(v1)) { if(debug['P'] && debug['v']) print("\tnot regtype %D; return 0\n", v1); return 0; } v2 = &p->to; if(!regtyp(v2)) { if(debug['P'] && debug['v']) print("\tnot regtype %D; return 0\n", v2); return 0; } for(r=uniqp(r0); r!=R; r=uniqp(r)) { if(debug['P'] && debug['v']) print("\t? %P\n", r->prog); if(uniqs(r) == R) { if(debug['P'] && debug['v']) print("\tno unique successor\n"); break; } p = r->prog; switch(p->as) { case ACALL: if(debug['P'] && debug['v']) print("\tfound %P; return 0\n", p); return 0; case AIMULL: case AIMULQ: case AIMULW: if(p->to.type != D_NONE) break; goto giveup; case ARCLB: case ARCLL: case ARCLQ: case ARCLW: case ARCRB: case ARCRL: case ARCRQ: case ARCRW: case AROLB: case AROLL: case AROLQ: case AROLW: case ARORB: case ARORL: case ARORQ: case ARORW: case ASALB: case ASALL: case ASALQ: case ASALW: case ASARB: case ASARL: case ASARQ: case ASARW: case ASHLB: case ASHLL: case ASHLQ: case ASHLW: case ASHRB: case ASHRL: case ASHRQ: case ASHRW: if(p->from.type == D_CONST) break; goto giveup; case ADIVB: case ADIVL: case ADIVQ: case ADIVW: case AIDIVB: case AIDIVL: case AIDIVQ: case AIDIVW: case AIMULB: case AMULB: case AMULL: case AMULQ: case AMULW: case AREP: case AREPN: case ACWD: case ACDQ: case ACQO: case ASTOSB: case ASTOSL: case ASTOSQ: case AMOVSB: case AMOVSL: case AMOVSQ: giveup: if(debug['P'] && debug['v']) print("\tfound %P; return 0\n", p); return 0; case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(p->to.type == v1->type) goto gotit; break; } if(copyau(&p->from, v2) || copyau(&p->to, v2)) { if(debug['P'] && debug['v']) print("\tcopyau %D failed\n", v2); break; } if(copysub(&p->from, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) { if(debug['P'] && debug['v']) print("\tcopysub failed\n"); break; } } if(debug['P'] && debug['v']) print("\tran off end; return 0\n"); return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->type; v1->type = v2->type; v2->type = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
static void pushback(Reg *r0) { Reg *r, *b; Prog *p0, *p, t; b = R; p0 = r0->prog; for(r=uniqp(r0); r!=R && uniqs(r)!=R; r=uniqp(r)) { p = r->prog; if(p->as != ANOP) { if(!regconsttyp(&p->from) || !regtyp(&p->to)) break; if(copyu(p, &p0->to, A) || copyu(p0, &p->to, A)) break; } if(p->as == ACALL) break; b = r; } if(b == R) { if(debug['v']) { print("no pushback: %P\n", r0->prog); if(r) print("\t%P [%d]\n", r->prog, uniqs(r)!=R); } return; } if(debug['v']) { print("pushback\n"); for(r=b;; r=r->link) { print("\t%P\n", r->prog); if(r == r0) break; } } t = *r0->prog; for(r=uniqp(r0);; r=uniqp(r)) { p0 = r->link->prog; p = r->prog; p0->as = p->as; p0->lineno = p->lineno; p0->from = p->from; p0->to = p->to; if(r == b) break; } p0 = r->prog; p0->as = t.as; p0->lineno = t.lineno; p0->from = t.from; p0->to = t.to; if(debug['v']) { print("\tafter\n"); for(r=b;; r=r->link) { print("\t%P\n", r->prog); if(r == r0) break; } } }
void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } flowend(g); }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ int subprop(Reg *r0) { Prog *p; Adr *v1, *v2; Reg *r; int t; p = r0->prog; v1 = &p->from; if(!regtyp(v1)) return 0; v2 = &p->to; if(!regtyp(v2)) return 0; for(r=uniqp(r0); r!=R; r=uniqp(r)) { if(uniqs(r) == R) break; p = r->prog; switch(p->as) { case ABL: return 0; case AMULLU: case AMULA: case AMVN: return 0; case ACMN: case AADD: case ASUB: case ASBC: case ARSB: case ASLL: case ASRL: case ASRA: case AORR: case AAND: case AEOR: case AMUL: case AMULU: case ADIV: case ADIVU: case AMOD: case AMODU: case AADDD: case AADDF: case ASUBD: case ASUBF: case AMULD: case AMULF: case ADIVD: case ADIVF: if(p->to.type == v1->type) if(p->to.reg == v1->reg) if(p->scond == C_SCOND_NONE) { if(p->reg == NREG) p->reg = p->to.reg; goto gotit; } break; case AMOVF: case AMOVD: case AMOVW: if(p->to.type == v1->type) if(p->to.reg == v1->reg) if(p->scond == C_SCOND_NONE) goto gotit; break; case AMOVM: t = 1<<v2->reg; if((p->from.type == D_CONST && (p->from.offset&t)) || (p->to.type == D_CONST && (p->to.offset&t))) return 0; break; } if(copyau(&p->from, v2) || copyau1(p, v2) || copyau(&p->to, v2)) break; if(copysub(&p->from, v1, v2, 0) || copysub1(p, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) break; } return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub1(p, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->reg; v1->reg = v2->reg; v2->reg = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ static int subprop(Flow *r0) { Prog *p; ProgInfo info; Adr *v1, *v2; Flow *r; int t; if(debug['P'] && debug['v']) print("subprop %P\n", r0->prog); p = r0->prog; v1 = &p->from; if(!regtyp(v1)) { if(debug['P'] && debug['v']) print("\tnot regtype %D; return 0\n", v1); return 0; } v2 = &p->to; if(!regtyp(v2)) { if(debug['P'] && debug['v']) print("\tnot regtype %D; return 0\n", v2); return 0; } for(r=uniqp(r0); r!=nil; r=uniqp(r)) { if(debug['P'] && debug['v']) print("\t? %P\n", r->prog); if(uniqs(r) == nil) { if(debug['P'] && debug['v']) print("\tno unique successor\n"); break; } p = r->prog; proginfo(&info, p); if(info.flags & Call) { if(debug['P'] && debug['v']) print("\tfound %P; return 0\n", p); return 0; } if(info.reguse | info.regset) { if(debug['P'] && debug['v']) print("\tfound %P; return 0\n", p); return 0; } if((info.flags & Move) && (info.flags & (SizeL|SizeQ|SizeF|SizeD)) && p->to.type == v1->type) goto gotit; if(copyau(&p->from, v2) || copyau(&p->to, v2)) { if(debug['P'] && debug['v']) print("\tcopyau %D failed\n", v2); break; } if(copysub(&p->from, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) { if(debug['P'] && debug['v']) print("\tcopysub failed\n"); break; } } if(debug['P'] && debug['v']) print("\tran off end; return 0\n"); return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->type; v1->type = v2->type; v2->type = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ int subprop(Reg *r0) { Prog *p; Adr *v1, *v2; Reg *r; int t; p = r0->prog; v1 = &p->from; if(!regtyp(v1)) return 0; v2 = &p->to; if(!regtyp(v2)) return 0; for(r=uniqp(r0); r!=R; r=uniqp(r)) { if(uniqs(r) == R) break; p = r->prog; switch(p->as) { case AJMPL: return 0; case AADD: case ASUB: case ASLL: case ASRL: case ASRA: case AOR: case AAND: case AXOR: case AMUL: case ADIV: case ADIVL: case AMOD: case AMODL: case AFADDD: case AFADDF: case AFSUBD: case AFSUBF: case AFMULD: case AFMULF: case AFDIVD: case AFDIVF: if(p->to.type == v1->type) if(p->to.reg == v1->reg) { if(p->reg == NREG) p->reg = p->to.reg; goto gotit; } break; case AFMOVF: case AFMOVD: case AMOVW: if(p->to.type == v1->type) if(p->to.reg == v1->reg) goto gotit; break; } if(copyau(&p->from, v2) || copyau1(p, v2) || copyau(&p->to, v2)) break; if(copysub(&p->from, v1, v2, 0) || copysub1(p, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) break; } return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub1(p, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->reg; v1->reg = v2->reg; v2->reg = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
int xtramodes(Reg *r, Adr *a) { Reg *r1, *r2, *r3; Prog *p, *p1; Adr v; p = r->prog; if(debug['h'] && p->as == AMOVB && p->from.type == D_OREG) /* byte load */ return 0; v = *a; v.type = D_REG; r1 = findpre(r, &v); if(r1 != R) { p1 = r1->prog; if(p1->to.type == D_REG && p1->to.reg == v.reg) switch(p1->as) { case AADD: if(p1->from.type == D_REG || (p1->from.type == D_SHIFT && (p1->from.offset&(1<<4)) == 0 && (p->as != AMOVB || (a == &p->from && (p1->from.offset&~0xf) == 0))) || (p1->from.type == D_CONST && p1->from.offset > -4096 && p1->from.offset < 4096)) if(nochange(uniqs(r1), r, p1)) { if(a != &p->from || v.reg != p->to.reg) if (finduse(r->s1, &v)) { if(p1->reg == NREG || p1->reg == v.reg) /* pre-indexing */ p->scond |= C_WBIT; else return 0; } switch (p1->from.type) { case D_REG: /* register offset */ a->type = D_SHIFT; a->offset = p1->from.reg; break; case D_SHIFT: /* scaled register offset */ a->type = D_SHIFT; case D_CONST: /* immediate offset */ a->offset = p1->from.offset; break; } if(p1->reg != NREG) a->reg = p1->reg; excise(r1); return 1; } break; case AMOVW: if(p1->from.type == D_REG) if((r2 = findinc(r1, r, &p1->from)) != R) { for(r3=uniqs(r2); r3->prog->as==ANOP; r3=uniqs(r3)) ; if(r3 == r) { /* post-indexing */ p1 = r2->prog; a->reg = p1->to.reg; a->offset = p1->from.offset; p->scond |= C_PBIT; if(!finduse(r, &r1->prog->to)) excise(r1); excise(r2); return 1; } } break; } } if(a != &p->from || a->reg != p->to.reg) if((r1 = findinc(r, R, &v)) != R) { /* post-indexing */ p1 = r1->prog; a->offset = p1->from.offset; p->scond |= C_PBIT; excise(r1); return 1; } return 0; }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ static int subprop(Flow *r0) { Prog *p; Adr *v1, *v2; Flow *r; int t; ProgInfo info; p = r0->prog; v1 = &p->from; if(!regtyp(v1)) return 0; v2 = &p->to; if(!regtyp(v2)) return 0; for(r=uniqp(r0); r!=nil; r=uniqp(r)) { if(uniqs(r) == nil) break; p = r->prog; proginfo(&info, p); if(info.flags & Call) return 0; if((info.flags & CanRegRead) && p->to.type == D_REG) { info.flags |= RegRead; info.flags &= ~(CanRegRead | RightRead); p->reg = p->to.reg; } switch(p->as) { case AMULLU: case AMULA: case AMVN: return 0; } if((info.flags & (RightRead|RightWrite)) == RightWrite) { if(p->to.type == v1->type) if(p->to.reg == v1->reg) if(p->scond == C_SCOND_NONE) goto gotit; } if(copyau(&p->from, v2) || copyau1(p, v2) || copyau(&p->to, v2)) break; if(copysub(&p->from, v1, v2, 0) || copysub1(p, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) break; } return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub1(p, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->reg; v1->reg = v2->reg; v2->reg = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } // byte, word arithmetic elimination. elimshortmov(r); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: if(regtyp(&p->to)) if(p->from.sym != S) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ static int subprop(Flow *r0) { Prog *p; Adr *v1, *v2; Flow *r; int t; ProgInfo info; p = r0->prog; v1 = &p->from; if(!regtyp(v1)) return 0; v2 = &p->to; if(!regtyp(v2)) return 0; for(r=uniqp(r0); r!=nil; r=uniqp(r)) { if(debug['P'] && debug['v']) print("\t? %P\n", r->prog); if(uniqs(r) == nil) break; p = r->prog; proginfo(&info, p); if(info.flags & Call) return 0; if(info.reguse | info.regset) return 0; if((info.flags & Move) && (info.flags & (SizeL|SizeQ|SizeF|SizeD)) && p->to.type == v1->type) goto gotit; if(copyau(&p->from, v2) || copyau(&p->to, v2)) break; if(copysub(&p->from, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) break; } return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->type; v1->type = v2->type; v2->type = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ int subprop(Reg *r0) { Prog *p; Adr *v1, *v2; Reg *r; int t; p = r0->prog; v1 = &p->from; if(!regtyp(v1)) return 0; v2 = &p->to; if(!regtyp(v2)) return 0; for(r=uniqp(r0); r!=R; r=uniqp(r)) { if(uniqs(r) == R) break; p = r->prog; switch(p->as) { case ACALL: return 0; case AIMULL: case AIMULQ: case AIMULW: if(p->to.type != D_NONE) break; case ADIVB: case ADIVL: case ADIVQ: case ADIVW: case AIDIVB: case AIDIVL: case AIDIVQ: case AIDIVW: case AIMULB: case AMULB: case AMULL: case AMULQ: case AMULW: case ARCLB: case ARCLL: case ARCLQ: case ARCLW: case ARCRB: case ARCRL: case ARCRQ: case ARCRW: case AROLB: case AROLL: case AROLQ: case AROLW: case ARORB: case ARORL: case ARORQ: case ARORW: case ASALB: case ASALL: case ASALQ: case ASALW: case ASARB: case ASARL: case ASARQ: case ASARW: case ASHLB: case ASHLL: case ASHLQ: case ASHLW: case ASHRB: case ASHRL: case ASHRQ: case ASHRW: case AREP: case AREPN: case ACWD: case ACDQ: case ACQO: case ASTOSB: case ASTOSL: case ASTOSQ: case AMOVSB: case AMOVSL: case AMOVSQ: return 0; case AMOVL: case AMOVQ: if(p->to.type == v1->type) goto gotit; break; } if(copyau(&p->from, v2) || copyau(&p->to, v2)) break; if(copysub(&p->from, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) break; } return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->type; v1->type = v2->type; v2->type = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } pc = 0; /* speculating it won't kill */ loop1: t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLSX: case AMOVBLZX: case AMOVWLSX: case AMOVWLZX: if(regtyp(&p->to)) { r1 = uniqs(r); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type) p1->as = AMOVL; } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; } else if(p->from.offset == 1) { if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; } else if(p->from.offset == 1) { if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; } break; } } if(t) goto loop1; }
void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } flowend(g); }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: case ALOCALS: case ATYPE: p = p->link; } } // byte, word arithmetic elimination. elimshortmov(r); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } }
int shiftprop(Reg *r) { Reg *r1; Prog *p, *p1, *p2; int n, o; Adr a; p = r->prog; if(p->to.type != D_REG) FAIL("BOTCH: result not reg"); n = p->to.reg; a = zprog.from; if(p->reg != NREG && p->reg != p->to.reg) { a.type = D_REG; a.reg = p->reg; } if(debug['H']) print("shiftprop\n%P", p); r1 = r; for(;;) { /* find first use of shift result; abort if shift operands or result are changed */ r1 = uniqs(r1); if(r1 == R) FAIL("branch"); if(uniqp(r1) == R) FAIL("merge"); p1 = r1->prog; if(debug['H']) print("\n%P", p1); switch(copyu(p1, &p->to, A)) { case 0: /* not used or set */ if((p->from.type == D_REG && copyu(p1, &p->from, A) > 1) || (a.type == D_REG && copyu(p1, &a, A) > 1)) FAIL("args modified"); continue; case 3: /* set, not used */ FAIL("BOTCH: noref"); } break; } /* check whether substitution can be done */ switch(p1->as) { default: FAIL("non-dpi"); case AAND: case AEOR: case AADD: case AADC: case AORR: case ASUB: case ARSB: case ASBC: case ARSC: if(p1->reg == n || (p1->reg == NREG && p1->to.type == D_REG && p1->to.reg == n)) { if(p1->from.type != D_REG) FAIL("can't swap"); p1->reg = p1->from.reg; p1->from.reg = n; switch(p1->as) { case ASUB: p1->as = ARSB; break; case ARSB: p1->as = ASUB; break; case ASBC: p1->as = ARSC; break; case ARSC: p1->as = ASBC; break; } if(debug['H']) print("\t=>%P", p1); } case ABIC: case ACMP: case ACMN: if(p1->reg == n) FAIL("can't swap"); if(p1->reg == NREG && p1->to.reg == n) FAIL("shift result used twice"); case AMVN: if(p1->from.type == D_SHIFT) FAIL("shift result used in shift"); if(p1->from.type != D_REG || p1->from.reg != n) FAIL("BOTCH: where is it used?"); break; } /* check whether shift result is used subsequently */ p2 = p1; if(p1->to.reg != n) for (;;) { r1 = uniqs(r1); if(r1 == R) FAIL("inconclusive"); p1 = r1->prog; if(debug['H']) print("\n%P", p1); switch(copyu(p1, &p->to, A)) { case 0: /* not used or set */ continue; case 3: /* set, not used */ break; default:/* used */ FAIL("reused"); } break; } /* make the substitution */ p2->from.type = D_SHIFT; p2->from.reg = NREG; o = p->reg; if(o == NREG) o = p->to.reg; switch(p->from.type){ case D_CONST: o |= (p->from.offset&0x1f)<<7; break; case D_REG: o |= (1<<4) | (p->from.reg<<8); break; } switch(p->as){ case ASLL: o |= 0<<5; break; case ASRL: o |= 1<<5; break; case ASRA: o |= 2<<5; break; } p2->from.offset = o; if(debug['H']) print("\t=>%P\tSUCCEED\n", p2); return 1; }