void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: case ALOCALS: case ATYPE: p = p->link; } } // byte, word arithmetic elimination. elimshortmov(r); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } }
void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } flowend(g); }
void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } flowend(g); }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } // byte, word arithmetic elimination. elimshortmov(r); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: if(regtyp(&p->to)) if(p->from.sym != S) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } }