// do we need the carry bit static int needc(Prog *p) { ProgInfo info; while(p != P) { proginfo(&info, p); if(info.flags & UseCarry) return 1; if(info.flags & (SetCarry|KillCarry)) return 0; p = p->link; } return 0; }
// is reg guaranteed to be truncated by a previous L instruction? static int prevl(Flow *r0, int reg) { Prog *p; Flow *r; ProgInfo info; for(r=uniqp(r0); r!=nil; r=uniqp(r)) { p = r->prog; if(p->to.type == reg) { proginfo(&info, p); if(info.flags & RightWrite) { if(info.flags & SizeL) return 1; return 0; } } } return 0; }
static void nilwalkfwd(NilFlow *rcheck) { NilFlow *r; Prog *p; ProgInfo info; // If the path down from rcheck dereferences the address // (possibly with a small offset) before writing to memory // and before any subsequent checks, it's okay to wait for // that implicit check. Only consider this basic block to // avoid problems like: // _ = *x // should panic // for {} // no writes but infinite loop may be considered visible for(r = (NilFlow*)uniqs(&rcheck->f); r != nil; r = (NilFlow*)uniqs(&r->f)) { p = r->f.prog; proginfo(&info, p); if((info.flags & LeftRead) && smallindir(&p->from, &rcheck->f.prog->from)) { rcheck->kill = 1; return; } if((info.flags & (RightRead|RightWrite)) && smallindir(&p->to, &rcheck->f.prog->from)) { rcheck->kill = 1; return; } // Stop if another nil check happens. if(p->as == ACHECKNIL) return; // Stop if value is lost. if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) return; // Stop if memory write. if((info.flags & RightWrite) && !regtyp(&p->to)) return; } }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; Graph *g; ProgInfo info; int i, z; uint32 vreg; Bits bit; if(first) { fmtinstall('Q', Qconv); exregoffset = D_R15; first = 0; } mergetemp(firstp); /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) { if(regnodes[i] == N) regnodes[i] = newname(lookup(regname[i])); var[i].node = regnodes[i]; } regbits = RtoB(D_SP); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ g = flowstart(firstp, sizeof(Reg)); if(g == nil) return; firstr = (Reg*)g->start; for(r = firstr; r != R; r = (Reg*)r->f.link) { p = r->f.prog; if(p->as == AVARDEF) continue; proginfo(&info, p); // Avoid making variables for direct-called functions. if(p->as == ACALL && p->to.type == D_EXTERN) continue; r->use1.b[0] |= info.reguse | info.regindex; r->set.b[0] |= info.regset; bit = mkvar(r, &p->from); if(bany(&bit)) { if(info.flags & LeftAddr) setaddrs(bit); if(info.flags & LeftRead) for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; if(info.flags & LeftWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } bit = mkvar(r, &p->to); if(bany(&bit)) { if(info.flags & RightAddr) setaddrs(bit); if(info.flags & RightRead) for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; if(info.flags & RightWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } } for(i=0; i<nvar; i++) { Var *v = var+i; if(v->addr) { bit = blsh(i); for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; } if(debug['R'] && debug['v']) print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", i, v->addr, v->etype, v->width, v->node, v->offset); } if(debug['R'] && debug['v']) dumpit("pass1", &firstr->f, 1); /* * pass 2 * find looping structure */ flowrpo(g); if(debug['R'] && debug['v']) dumpit("pass2", &firstr->f, 1); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) if(r->f.prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = (Reg*)r->f.link; if(r1 && r1->f.active && !r->f.active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; if(debug['R'] && debug['v']) dumpit("pass3", &firstr->f, 1); /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; synch(firstr, zbits); if(change) goto loop2; if(debug['R'] && debug['v']) dumpit("pass4", &firstr->f, 1); /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = (Reg*)r->f.link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) && !r->f.refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; } } for(r = firstr; r != R; r = (Reg*)r->f.link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->f.refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; excise(&r->f); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) continue; rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] && debug['v']) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); if(debug['R'] && debug['v']) dumpit("pass5", &firstr->f, 1); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(rgp->regno != 0) { if(debug['R'] && debug['v']) { Var *v; v = var + rgp->varno; print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", v->node, v->offset, rgp->varno, v->etype, rgp->regno); } paint3(rgp->enter, rgp->varno, vreg, rgp->regno); } rgp++; } if(debug['R'] && debug['v']) dumpit("pass6", &firstr->f, 1); /* * free aux structures. peep allocates new ones. */ flowend(g); firstr = R; /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) peep(firstp); /* * eliminate nops */ for(p=firstp; p!=P; p=p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.u.branch != P && p->to.u.branch->as == ANOP) p->to.u.branch = p->to.u.branch->link; } if(debug['R']) { if(ostats.ncvtreg || ostats.nspill || ostats.nreload || ostats.ndelmov || ostats.nvar || ostats.naddr || 0) print("\nstats\n"); if(ostats.ncvtreg) print(" %4d cvtreg\n", ostats.ncvtreg); if(ostats.nspill) print(" %4d spill\n", ostats.nspill); if(ostats.nreload) print(" %4d reload\n", ostats.nreload); if(ostats.ndelmov) print(" %4d delmov\n", ostats.ndelmov); if(ostats.nvar) print(" %4d var\n", ostats.nvar); if(ostats.naddr) print(" %4d addr\n", ostats.naddr); memset(&ostats, 0, sizeof(ostats)); } }
/* * return * 1 if v only used (and substitute), * 2 if read-alter-rewrite * 3 if set * 4 if set and used * 0 otherwise (not touched) */ int copyu(Prog *p, Adr *v, Adr *s) { ProgInfo info; switch(p->as) { case AJMP: if(s != A) { if(copysub(&p->to, v, s, 1)) return 1; return 0; } if(copyau(&p->to, v)) return 1; return 0; case ARET: if(s != A) return 1; return 3; case ACALL: if(REGEXT && v->type <= REGEXT && v->type > exregoffset) return 2; if(REGARG >= 0 && v->type == (uchar)REGARG) return 2; if(v->type == p->from.type) return 2; if(s != A) { if(copysub(&p->to, v, s, 1)) return 1; return 0; } if(copyau(&p->to, v)) return 4; return 3; case ATEXT: if(REGARG >= 0 && v->type == (uchar)REGARG) return 3; return 0; } proginfo(&info, p); if((info.reguse|info.regset) & RtoB(v->type)) return 2; if(info.flags & LeftAddr) if(copyas(&p->from, v)) return 2; if((info.flags & (RightRead|RightWrite)) == (RightRead|RightWrite)) if(copyas(&p->to, v)) return 2; if(info.flags & RightWrite) { if(copyas(&p->to, v)) { if(s != A) return copysub(&p->from, v, s, 1); if(copyau(&p->from, v)) return 4; return 3; } } if(info.flags & (LeftAddr|LeftRead|LeftWrite|RightAddr|RightRead|RightWrite)) { if(s != A) { if(copysub(&p->from, v, s, 1)) return 1; return copysub(&p->to, v, s, 1); } if(copyau(&p->from, v)) return 1; if(copyau(&p->to, v)) return 1; } return 0; }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ static int subprop(Flow *r0) { Prog *p; ProgInfo info; Adr *v1, *v2; Flow *r; int t; if(debug['P'] && debug['v']) print("subprop %P\n", r0->prog); p = r0->prog; v1 = &p->from; if(!regtyp(v1)) { if(debug['P'] && debug['v']) print("\tnot regtype %D; return 0\n", v1); return 0; } v2 = &p->to; if(!regtyp(v2)) { if(debug['P'] && debug['v']) print("\tnot regtype %D; return 0\n", v2); return 0; } for(r=uniqp(r0); r!=nil; r=uniqp(r)) { if(debug['P'] && debug['v']) print("\t? %P\n", r->prog); if(uniqs(r) == nil) { if(debug['P'] && debug['v']) print("\tno unique successor\n"); break; } p = r->prog; proginfo(&info, p); if(info.flags & Call) { if(debug['P'] && debug['v']) print("\tfound %P; return 0\n", p); return 0; } if(info.reguse | info.regset) { if(debug['P'] && debug['v']) print("\tfound %P; return 0\n", p); return 0; } if((info.flags & Move) && (info.flags & (SizeL|SizeQ|SizeF|SizeD)) && p->to.type == v1->type) goto gotit; if(copyau(&p->from, v2) || copyau(&p->to, v2)) { if(debug['P'] && debug['v']) print("\tcopyau %D failed\n", v2); break; } if(copysub(&p->from, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) { if(debug['P'] && debug['v']) print("\tcopysub failed\n"); break; } } if(debug['P'] && debug['v']) print("\tran off end; return 0\n"); return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->type; v1->type = v2->type; v2->type = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; Graph *g; int i, z; uint32 vreg; Bits bit; ProgInfo info; if(first) { fmtinstall('Q', Qconv); first = 0; } fixjmp(firstp); mergetemp(firstp); /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) { if(regnodes[i] == N) regnodes[i] = newname(lookup(regname[i])); var[i].node = regnodes[i]; } regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ g = flowstart(firstp, sizeof(Reg)); if(g == nil) return; firstr = (Reg*)g->start; for(r = firstr; r != R; r = (Reg*)r->f.link) { p = r->f.prog; proginfo(&info, p); // Avoid making variables for direct-called functions. if(p->as == ABL && p->to.type == D_EXTERN) continue; bit = mkvar(r, &p->from); if(info.flags & LeftRead) for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; if(info.flags & LeftAddr) setaddrs(bit); if(info.flags & RegRead) { if(p->from.type != D_FREG) r->use1.b[0] |= RtoB(p->reg); else r->use1.b[0] |= FtoB(p->reg); } if(info.flags & (RightAddr | RightRead | RightWrite)) { bit = mkvar(r, &p->to); if(info.flags & RightAddr) setaddrs(bit); if(info.flags & RightRead) for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; if(info.flags & RightWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } } if(firstr == R) return; for(i=0; i<nvar; i++) { Var *v = var+i; if(v->addr) { bit = blsh(i); for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; } if(debug['R'] && debug['v']) print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", i, v->addr, v->etype, v->width, v->node, v->offset); } if(debug['R'] && debug['v']) dumpit("pass1", &firstr->f, 1); /* * pass 2 * find looping structure */ flowrpo(g); if(debug['R'] && debug['v']) dumpit("pass2", &firstr->f, 1); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) if(r->f.prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = (Reg*)r->f.link; if(r1 && r1->f.active && !r->f.active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; if(debug['R'] && debug['v']) dumpit("pass3", &firstr->f, 1); /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; synch(firstr, zbits); if(change) goto loop2; addsplits(); if(debug['R'] && debug['v']) dumpit("pass4", &firstr->f, 1); if(debug['R'] > 1) { print("\nprop structure:\n"); for(r = firstr; r != R; r = (Reg*)r->f.link) { print("%d:%P", r->f.loop, r->f.prog); for(z=0; z<BITS; z++) { bit.b[z] = r->set.b[z] | r->refahead.b[z] | r->calahead.b[z] | r->refbehind.b[z] | r->calbehind.b[z] | r->use1.b[z] | r->use2.b[z]; bit.b[z] &= ~addrs.b[z]; } if(bany(&bit)) { print("\t"); if(bany(&r->use1)) print(" u1=%Q", r->use1); if(bany(&r->use2)) print(" u2=%Q", r->use2); if(bany(&r->set)) print(" st=%Q", r->set); if(bany(&r->refahead)) print(" ra=%Q", r->refahead); if(bany(&r->calahead)) print(" ca=%Q", r->calahead); if(bany(&r->refbehind)) print(" rb=%Q", r->refbehind); if(bany(&r->calbehind)) print(" cb=%Q", r->calbehind); } print("\n"); } } /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = (Reg*)r->f.link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } if(debug['R'] && debug['v']) dumpit("pass4.5", &firstr->f, 1); /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) & !r->f.refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; } } for(r = firstr; r != R; r = (Reg*)r->f.link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->f.refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; excise(&r->f); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; if(debug['R'] > 1) print("\n"); paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) { if(debug['R']) print("%L $%d: %Q\n", r->f.prog->lineno, change, blsh(i)); continue; } rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] > 1) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); if(debug['R'] && debug['v']) dumpit("pass5", &firstr->f, 1); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(debug['R']) { if(rgp->regno >= NREG) print("%L $%d F%d: %Q\n", rgp->enter->f.prog->lineno, rgp->cost, rgp->regno-NREG, bit); else print("%L $%d R%d: %Q\n", rgp->enter->f.prog->lineno, rgp->cost, rgp->regno, bit); } if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } if(debug['R'] && debug['v']) dumpit("pass6", &firstr->f, 1); /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) { peep(firstp); } if(debug['R'] && debug['v']) dumpit("pass7", &firstr->f, 1); /* * last pass * eliminate nops * free aux structures * adjust the stack pointer * MOVW.W R1,-12(R13) <<- start * MOVW R0,R1 * MOVW R1,8(R13) * MOVW $0,R1 * MOVW R1,4(R13) * BL ,runtime.newproc+0(SB) * MOVW &ft+-32(SP),R7 <<- adjust * MOVW &j+-40(SP),R6 <<- adjust * MOVW autotmp_0003+-24(SP),R5 <<- adjust * MOVW $12(R13),R13 <<- finish */ vreg = 0; for(p = firstp; p != P; p = p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.u.branch != P && p->to.u.branch->as == ANOP) p->to.u.branch = p->to.u.branch->link; if(p->as == AMOVW && p->to.reg == 13) { if(p->scond & C_WBIT) { vreg = -p->to.offset; // in adjust region // print("%P adjusting %d\n", p, vreg); continue; } if(p->from.type == D_CONST && p->to.type == D_REG) { if(p->from.offset != vreg) print("in and out different\n"); // print("%P finish %d\n", p, vreg); vreg = 0; // done adjust region continue; } // print("%P %d %d from type\n", p, p->from.type, D_CONST); // print("%P %d %d to type\n\n", p, p->to.type, D_REG); } if(p->as == AMOVW && vreg != 0) { if(p->from.sym != S) if(p->from.name == D_AUTO || p->from.name == D_PARAM) { p->from.offset += vreg; // print("%P adjusting from %d %d\n", p, vreg, p->from.type); } if(p->to.sym != S) if(p->to.name == D_AUTO || p->to.name == D_PARAM) { p->to.offset += vreg; // print("%P adjusting to %d %d\n", p, vreg, p->from.type); } } } flowend(g); }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ static int subprop(Flow *r0) { Prog *p; Adr *v1, *v2; Flow *r; int t; ProgInfo info; p = r0->prog; v1 = &p->from; if(!regtyp(v1)) return 0; v2 = &p->to; if(!regtyp(v2)) return 0; for(r=uniqp(r0); r!=nil; r=uniqp(r)) { if(debug['P'] && debug['v']) print("\t? %P\n", r->prog); if(uniqs(r) == nil) break; p = r->prog; proginfo(&info, p); if(info.flags & Call) return 0; if(info.reguse | info.regset) return 0; if((info.flags & Move) && (info.flags & (SizeL|SizeQ|SizeF|SizeD)) && p->to.type == v1->type) goto gotit; if(copyau(&p->from, v2) || copyau(&p->to, v2)) break; if(copysub(&p->from, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) break; } return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->type; v1->type = v2->type; v2->type = t; if(debug['P']) print("%P last\n", r->prog); return 1; }
static void nilwalkback(NilFlow *rcheck) { Prog *p; ProgInfo info; NilFlow *r; for(r = rcheck; r != nil; r = (NilFlow*)uniqp(&r->f)) { p = r->f.prog; proginfo(&info, p); if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) { // Found initialization of value we're checking for nil. // without first finding the check, so this one is unchecked. return; } if(r != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) { rcheck->kill = 1; return; } } // Here is a more complex version that scans backward across branches. // It assumes rcheck->kill = 1 has been set on entry, and its job is to find a reason // to keep the check (setting rcheck->kill = 0). // It doesn't handle copying of aggregates as well as I would like, // nor variables with their address taken, // and it's too subtle to turn on this late in Go 1.2. Perhaps for Go 1.3. /* for(r1 = r0; r1 != nil; r1 = (NilFlow*)r1->f.p1) { if(r1->f.active == gen) break; r1->f.active = gen; p = r1->f.prog; // If same check, stop this loop but still check // alternate predecessors up to this point. if(r1 != rcheck && p->as == ACHECKNIL && sameaddr(&p->from, &rcheck->f.prog->from)) break; proginfo(&info, p); if((info.flags & RightWrite) && sameaddr(&p->to, &rcheck->f.prog->from)) { // Found initialization of value we're checking for nil. // without first finding the check, so this one is unchecked. rcheck->kill = 0; return; } if(r1->f.p1 == nil && r1->f.p2 == nil) { print("lost pred for %P\n", rcheck->f.prog); for(r1=r0; r1!=nil; r1=(NilFlow*)r1->f.p1) { proginfo(&info, r1->f.prog); print("\t%P %d %d %D %D\n", r1->f.prog, info.flags&RightWrite, sameaddr(&r1->f.prog->to, &rcheck->f.prog->from), &r1->f.prog->to, &rcheck->f.prog->from); } fatal("lost pred trail"); } } for(r = r0; r != r1; r = (NilFlow*)r->f.p1) for(r2 = (NilFlow*)r->f.p2; r2 != nil; r2 = (NilFlow*)r2->f.p2link) nilwalkback(rcheck, r2, gen); */ }
void mergetemp(Prog *firstp) { int i, j, nvar, ninuse, nfree, nkill; TempVar *var, *v, *v1, **bystart, **inuse; TempFlow *r; NodeList *l, **lp; Node *n; Prog *p, *p1; Type *t; ProgInfo info, info1; int32 gen; Graph *g; enum { Debug = 0 }; g = flowstart(firstp, sizeof(TempFlow)); if(g == nil) return; // Build list of all mergeable variables. nvar = 0; for(l = curfn->dcl; l != nil; l = l->next) if(canmerge(l->n)) nvar++; var = calloc(nvar*sizeof var[0], 1); nvar = 0; for(l = curfn->dcl; l != nil; l = l->next) { n = l->n; if(canmerge(n)) { v = &var[nvar++]; n->opt = v; v->node = n; } } // Build list of uses. // We assume that the earliest reference to a temporary is its definition. // This is not true of variables in general but our temporaries are all // single-use (that's why we have so many!). for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { p = r->f.prog; proginfo(&info, p); if(p->from.node != N && p->from.node->opt && p->to.node != N && p->to.node->opt) fatal("double node %P", p); if((n = p->from.node) != N && (v = n->opt) != nil || (n = p->to.node) != N && (v = n->opt) != nil) { if(v->def == nil) v->def = r; r->uselink = v->use; v->use = r; if(n == p->from.node && (info.flags & LeftAddr)) v->addr = 1; } } if(Debug > 1) dumpit("before", g->start, 0); nkill = 0; // Special case. for(v = var; v < var+nvar; v++) { if(v->addr) continue; // Used in only one instruction, which had better be a write. if((r = v->use) != nil && r->uselink == nil) { p = r->f.prog; proginfo(&info, p); if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) { p->as = ANOP; p->to = zprog.to; v->removed = 1; if(Debug) print("drop write-only %S\n", v->node->sym); } else fatal("temp used and not set: %P", p); nkill++; continue; } // Written in one instruction, read in the next, otherwise unused, // no jumps to the next instruction. Happens mainly in 386 compiler. if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) { p = r->f.prog; proginfo(&info, p); p1 = r->f.link->prog; proginfo(&info1, p1); enum { SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD, }; if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) && !((info.flags|info1.flags) & (LeftAddr|RightAddr)) && (info.flags & SizeAny) == (info1.flags & SizeAny)) { p1->from = p->from; excise(&r->f); v->removed = 1; if(Debug) print("drop immediate-use %S\n", v->node->sym); } nkill++; continue; } } // Traverse live range of each variable to set start, end. // Each flood uses a new value of gen so that we don't have // to clear all the r->f.active words after each variable. gen = 0; for(v = var; v < var+nvar; v++) { gen++; for(r = v->use; r != nil; r = r->uselink) mergewalk(v, r, gen); } // Sort variables by start. bystart = malloc(nvar*sizeof bystart[0]); for(i=0; i<nvar; i++) bystart[i] = &var[i]; qsort(bystart, nvar, sizeof bystart[0], startcmp); // List of in-use variables, sorted by end, so that the ones that // will last the longest are the earliest ones in the array. // The tail inuse[nfree:] holds no-longer-used variables. // In theory we should use a sorted tree so that insertions are // guaranteed O(log n) and then the loop is guaranteed O(n log n). // In practice, it doesn't really matter. inuse = malloc(nvar*sizeof inuse[0]); ninuse = 0; nfree = nvar; for(i=0; i<nvar; i++) { v = bystart[i]; if(v->addr || v->removed) continue; // Expire no longer in use. while(ninuse > 0 && inuse[ninuse-1]->end < v->start) { v1 = inuse[--ninuse]; inuse[--nfree] = v1; } // Find old temp to reuse if possible. t = v->node->type; for(j=nfree; j<nvar; j++) { v1 = inuse[j]; if(eqtype(t, v1->node->type)) { inuse[j] = inuse[nfree++]; if(v1->merge) v->merge = v1->merge; else v->merge = v1; nkill++; break; } } // Sort v into inuse. j = ninuse++; while(j > 0 && inuse[j-1]->end < v->end) { inuse[j] = inuse[j-1]; j--; } inuse[j] = v; } if(Debug) { print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill); for(v=var; v<var+nvar; v++) { print("var %#N %T %lld-%lld", v->node, v->node->type, v->start, v->end); if(v->addr) print(" addr=1"); if(v->removed) print(" dead=1"); if(v->merge) print(" merge %#N", v->merge->node); if(v->start == v->end) print(" %P", v->def->f.prog); print("\n"); } if(Debug > 1) dumpit("after", g->start, 0); } // Update node references to use merged temporaries. for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { p = r->f.prog; if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil) p->from.node = v->merge->node; if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil) p->to.node = v->merge->node; } // Delete merged nodes from declaration list. for(lp = &curfn->dcl; (l = *lp); ) { curfn->dcl->end = l; n = l->n; v = n->opt; if(v && (v->merge || v->removed)) { *lp = l->next; continue; } lp = &l->next; } // Clear aux structures. for(v=var; v<var+nvar; v++) v->node->opt = nil; free(var); free(bystart); free(inuse); flowend(g); }
Graph* flowstart(Prog *firstp, int size) { int nf; Flow *f, *f1, *start, *last; Graph *graph; Prog *p; ProgInfo info; // Count and mark instructions to annotate. nf = 0; for(p = firstp; p != P; p = p->link) { p->opt = nil; // should be already, but just in case proginfo(&info, p); if(info.flags & Skip) continue; p->opt = (void*)1; nf++; } if(nf == 0) return nil; if(nf >= 20000) { // fatal("%S is too big (%d instructions)", curfn->nname->sym, nf); return nil; } // Allocate annotations and assign to instructions. graph = calloc(sizeof *graph + size*nf, 1); if(graph == nil) fatal("out of memory"); start = (Flow*)(graph+1); last = nil; f = start; for(p = firstp; p != P; p = p->link) { if(p->opt == nil) continue; p->opt = f; f->prog = p; if(last) last->link = f; last = f; f = (Flow*)((uchar*)f + size); } // Fill in pred/succ information. for(f = start; f != nil; f = f->link) { p = f->prog; proginfo(&info, p); if(!(info.flags & Break)) { f1 = f->link; f->s1 = f1; f1->p1 = f; } if(p->to.type == D_BRANCH) { if(p->to.u.branch == P) fatal("pnil %P", p); f1 = p->to.u.branch->opt; if(f1 == nil) fatal("fnil %P / %P", p, p->to.u.branch); if(f1 == f) { //fatal("self loop %P", p); continue; } f->s2 = f1; f->p2link = f1->p2; f1->p2 = f; } } graph->start = start; graph->num = nf; return graph; }
/* * the idea is to substitute * one register for another * from one MOV to another * MOV a, R0 * ADD b, R0 / no use of R1 * MOV R0, R1 * would be converted to * MOV a, R1 * ADD b, R1 * MOV R1, R0 * hopefully, then the former or latter MOV * will be eliminated by copy propagation. */ static int subprop(Flow *r0) { Prog *p; Adr *v1, *v2; Flow *r; int t; ProgInfo info; p = r0->prog; v1 = &p->from; if(!regtyp(v1)) return 0; v2 = &p->to; if(!regtyp(v2)) return 0; for(r=uniqp(r0); r!=nil; r=uniqp(r)) { if(uniqs(r) == nil) break; p = r->prog; proginfo(&info, p); if(info.flags & Call) return 0; if((info.flags & CanRegRead) && p->to.type == D_REG) { info.flags |= RegRead; info.flags &= ~(CanRegRead | RightRead); p->reg = p->to.reg; } switch(p->as) { case AMULLU: case AMULA: case AMVN: return 0; } if((info.flags & (RightRead|RightWrite)) == RightWrite) { if(p->to.type == v1->type) if(p->to.reg == v1->reg) if(p->scond == C_SCOND_NONE) goto gotit; } if(copyau(&p->from, v2) || copyau1(p, v2) || copyau(&p->to, v2)) break; if(copysub(&p->from, v1, v2, 0) || copysub1(p, v1, v2, 0) || copysub(&p->to, v1, v2, 0)) break; } return 0; gotit: copysub(&p->to, v1, v2, 1); if(debug['P']) { print("gotit: %D->%D\n%P", v1, v2, r->prog); if(p->from.type == v2->type) print(" excise"); print("\n"); } for(r=uniqs(r); r!=r0; r=uniqs(r)) { p = r->prog; copysub(&p->from, v1, v2, 1); copysub1(p, v1, v2, 1); copysub(&p->to, v1, v2, 1); if(debug['P']) print("%P\n", r->prog); } t = v1->reg; v1->reg = v2->reg; v2->reg = t; if(debug['P']) print("%P last\n", r->prog); return 1; }