void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: case ALOCALS: case ATYPE: p = p->link; } } // byte, word arithmetic elimination. elimshortmov(r); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } }
void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } flowend(g); }
// movb elimination. // movb is simulated by the linker // when a register other than ax, bx, cx, dx // is used, so rewrite to other instructions // when possible. a movb into a register // can smash the entire 32-bit register without // causing any trouble. static void elimshortmov(Reg *r) { Prog *p; USED(r); for(r=firstr; r!=R; r=r->link) { p = r->prog; if(regtyp(&p->to)) { switch(p->as) { case AINCB: case AINCW: p->as = AINCQ; break; case ADECB: case ADECW: p->as = ADECQ; break; case ANEGB: case ANEGW: p->as = ANEGQ; break; case ANOTB: case ANOTW: p->as = ANOTQ; break; } if(regtyp(&p->from) || p->from.type == D_CONST) { // move or artihmetic into partial register. // from another register or constant can be movl. // we don't switch to 64-bit arithmetic if it can // change how the carry bit is set (and the carry bit is needed). switch(p->as) { case AMOVB: case AMOVW: p->as = AMOVQ; break; case AADDB: case AADDW: if(!needc(p->link)) p->as = AADDQ; break; case ASUBB: case ASUBW: if(!needc(p->link)) p->as = ASUBQ; break; case AMULB: case AMULW: p->as = AMULQ; break; case AIMULB: case AIMULW: p->as = AIMULQ; break; case AANDB: case AANDW: p->as = AANDQ; break; case AORB: case AORW: p->as = AORQ; break; case AXORB: case AXORW: p->as = AXORQ; break; case ASHLB: case ASHLW: p->as = ASHLQ; break; } } else if(p->from.type >= D_NONE) { // explicit zero extension, but don't // do that if source is a byte register // (only AH can occur and it's forbidden). switch(p->as) { case AMOVB: p->as = AMOVBQZX; break; case AMOVW: p->as = AMOVWQZX; break; } } } } }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } pc = 0; /* speculating it won't kill */ loop1: t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLSX: case AMOVBLZX: case AMOVWLSX: case AMOVWLZX: if(regtyp(&p->to)) { r1 = uniqs(r); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type) p1->as = AMOVL; } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; } else if(p->from.offset == 1) { if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; } else if(p->from.offset == 1) { if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; } break; } } if(t) goto loop1; }
void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } flowend(g); }
// movb elimination. // movb is simulated by the linker // when a register other than ax, bx, cx, dx // is used, so rewrite to other instructions // when possible. a movb into a register // can smash the entire 64-bit register without // causing any trouble. static void elimshortmov(Graph *g) { Prog *p; Flow *r; for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(regtyp(&p->to)) { switch(p->as) { case AINCB: case AINCW: p->as = AINCL; break; case ADECB: case ADECW: p->as = ADECL; break; case ANEGB: case ANEGW: p->as = ANEGL; break; case ANOTB: case ANOTW: p->as = ANOTL; break; } if(regtyp(&p->from) || p->from.type == D_CONST) { // move or artihmetic into partial register. // from another register or constant can be movl. // we don't switch to 32-bit arithmetic if it can // change how the carry bit is set (and the carry bit is needed). switch(p->as) { case AMOVB: case AMOVW: p->as = AMOVL; break; case AADDB: case AADDW: if(!needc(p->link)) p->as = AADDL; break; case ASUBB: case ASUBW: if(!needc(p->link)) p->as = ASUBL; break; case AMULB: case AMULW: p->as = AMULL; break; case AIMULB: case AIMULW: p->as = AIMULL; break; case AANDB: case AANDW: p->as = AANDL; break; case AORB: case AORW: p->as = AORL; break; case AXORB: case AXORW: p->as = AXORL; break; case ASHLB: case ASHLW: p->as = ASHLL; break; } } else { // explicit zero extension switch(p->as) { case AMOVB: p->as = AMOVBLZX; break; case AMOVW: p->as = AMOVWLZX; break; } } } } }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } // byte, word arithmetic elimination. elimshortmov(r); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: if(regtyp(&p->to)) if(p->from.sym != S) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } }