void proginfo(ProgInfo *info, Prog *p) { *info = progtable[p->as]; if(info->flags == 0) fatal("unknown instruction %P", p); if((info->flags & ShiftCX) && p->from.type != D_CONST) info->reguse |= CX; if(info->flags & ImulAXDX) { if(p->to.type == D_NONE) { info->reguse |= AX; info->regset |= AX | DX; } else { info->flags |= RightRdwr; } } // Addressing makes some registers used. if(p->from.type >= D_INDIR) info->regindex |= RtoB(p->from.type-D_INDIR); if(p->from.index != D_NONE) info->regindex |= RtoB(p->from.index); if(p->to.type >= D_INDIR) info->regindex |= RtoB(p->to.type-D_INDIR); if(p->to.index != D_NONE) info->regindex |= RtoB(p->to.index); }
uint32 doregbits(int r) { uint32 b; b = 0; if(r >= D_INDIR) r -= D_INDIR; if(r >= D_AX && r <= D_R15) b |= RtoB(r); else if(r >= D_AL && r <= D_R15B) b |= RtoB(r-D_AL+D_AX); else if(r >= D_AH && r <= D_BH) b |= RtoB(r-D_AH+D_AX); else if(r >= D_X0 && r <= D_X0+15) b |= FtoB(r); return b; }
uint32_t doregbits(int r) { uint32_t b; b = 0; if(r >= D_INDIR) r -= D_INDIR; if(r >= D_AX && r <= D_DI) b |= RtoB(r); else if(r >= D_AL && r <= D_BL) b |= RtoB(r-D_AL+D_AX); else if(r >= D_AH && r <= D_BH) b |= RtoB(r-D_AH+D_AX); return b; }
uint32 allreg(uint32 b, Rgn *r) { Var *v; int i; v = var + r->varno; r->regno = 0; switch(v->etype) { default: fatal("unknown etype %d/%E", bitno(b), v->etype); break; case TINT8: case TUINT8: case TINT16: case TUINT16: case TINT32: case TUINT32: case TINT: case TUINT: case TUINTPTR: case TBOOL: case TPTR32: i = BtoR(~b); if(i && r->cost >= 0) { r->regno = i; return RtoB(i); } break; case TFLOAT32: case TFLOAT64: i = BtoF(~b); if(i && r->cost >= 0) { r->regno = i+NREG; return FtoB(i); } break; case TINT64: case TUINT64: case TPTR64: case TINTER: case TSTRUCT: case TARRAY: break; } return 0; }
uint32 allreg(uint32 b, Rgn *r) { Var *v; int i; v = var + r->varno; r->regno = 0; switch(v->etype) { default: diag(Z, "unknown etype %d/%d", bitno(b), v->etype); break; case TCHAR: case TUCHAR: case TSHORT: case TUSHORT: case TINT: case TUINT: case TLONG: case TULONG: case TVLONG: case TUVLONG: case TIND: case TARRAY: i = BtoR(~b); if(i && r->cost > 0) { r->regno = i; return RtoB(i); } break; case TDOUBLE: case TFLOAT: i = BtoF(~b); if(i && r->cost > 0) { r->regno = i; return FtoB(i); } break; } return 0; }
void regopt(Prog *p) { Reg *r, *r1, *r2; Prog *p1; int i, z; int32_t initpc, val, npc; uint32_t vreg; Bits bit; struct { int32_t m; int32_t c; Reg* p; } log5[6], *lp; firstr = R; lastr = R; nvar = 0; regbits = RtoB(D_SP) | RtoB(D_AX); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; } /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ val = 5L * 5L * 5L * 5L * 5L; lp = log5; for(i=0; i<5; i++) { lp->m = val; lp->c = 0; lp->p = R; val /= 5L; lp++; } val = 0; for(; p != P; p = p->link) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; r->pc = val; val++; lp = log5; for(i=0; i<5; i++) { lp->c--; if(lp->c <= 0) { lp->c = lp->m; if(lp->p != R) lp->p->log5 = r; lp->p = r; (lp+1)->c = 0; break; } lp++; } r1 = r->p1; if(r1 != R) switch(r1->prog->as) { case ARET: case AJMP: case AIRETL: r->p1 = R; r1->s1 = R; } bit = mkvar(r, &p->from, p->as==AMOVL); if(bany(&bit)) switch(p->as) { /* * funny */ case ALEAL: for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; break; /* * left side read */ default: for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; break; } bit = mkvar(r, &p->to, 0); if(bany(&bit)) switch(p->as) { default: diag(Z, "reg: unknown op: %A", p->as); break; /* * right side read */ case ACMPB: case ACMPL: case ACMPW: for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; break; /* * right side write */ case ANOP: case AMOVL: case AMOVB: case AMOVW: case AMOVBLSX: case AMOVBLZX: case AMOVWLSX: case AMOVWLZX: for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; break; /* * right side read+write */ case AADDB: case AADDL: case AADDW: case AANDB: case AANDL: case AANDW: case ASUBB: case ASUBL: case ASUBW: case AORB: case AORL: case AORW: case AXORB: case AXORL: case AXORW: case ASALB: case ASALL: case ASALW: case ASARB: case ASARL: case ASARW: case AROLB: case AROLL: case AROLW: case ARORB: case ARORL: case ARORW: case ASHLB: case ASHLL: case ASHLW: case ASHRB: case ASHRL: case ASHRW: case AIMULL: case AIMULW: case ANEGL: case ANOTL: case AADCL: case ASBBL: for(z=0; z<BITS; z++) { r->set.b[z] |= bit.b[z]; r->use2.b[z] |= bit.b[z]; } break; /* * funny */ case AFMOVDP: case AFMOVFP: case AFMOVLP: case AFMOVVP: case AFMOVWP: case ACALL: for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; break; } switch(p->as) { case AIMULL: case AIMULW: if(p->to.type != D_NONE) break; case AIDIVB: case AIDIVL: case AIDIVW: case AIMULB: case ADIVB: case ADIVL: case ADIVW: case AMULB: case AMULL: case AMULW: case ACWD: case ACDQ: r->regu |= RtoB(D_AX) | RtoB(D_DX); break; case AREP: case AREPN: case ALOOP: case ALOOPEQ: case ALOOPNE: r->regu |= RtoB(D_CX); break; case AMOVSB: case AMOVSL: case AMOVSW: case ACMPSB: case ACMPSL: case ACMPSW: r->regu |= RtoB(D_SI) | RtoB(D_DI); break; case ASTOSB: case ASTOSL: case ASTOSW: case ASCASB: case ASCASL: case ASCASW: r->regu |= RtoB(D_AX) | RtoB(D_DI); break; case AINSB: case AINSL: case AINSW: case AOUTSB: case AOUTSL: case AOUTSW: r->regu |= RtoB(D_DI) | RtoB(D_DX); break; case AFSTSW: case ASAHF: r->regu |= RtoB(D_AX); break; } } if(firstr == R) return; initpc = pc - val; npc = val; /* * pass 2 * turn branch references to pointers * build back pointers */ for(r = firstr; r != R; r = r->link) { p = r->prog; if(p->to.type == D_BRANCH) { val = p->to.offset - initpc; r1 = firstr; while(r1 != R) { r2 = r1->log5; if(r2 != R && val >= r2->pc) { r1 = r2; continue; } if(r1->pc == val) break; r1 = r1->link; } if(r1 == R) { nearln = p->lineno; diag(Z, "ref not found\n%P", p); continue; } if(r1 == r) { nearln = p->lineno; diag(Z, "ref to self\n%P", p); continue; } r->s2 = r1; r->p2link = r1->p2; r1->p2 = r; } } if(debug['R']) { p = firstr->prog; print("\n%L %D\n", p->lineno, &p->from); } /* * pass 2.5 * find looping structure */ for(r = firstr; r != R; r = r->link) r->active = 0; change = 0; loopit(firstr, npc); if(debug['R'] && debug['v']) { print("\nlooping structure:\n"); for(r = firstr; r != R; r = r->link) { print("%ld:%P", r->loop, r->prog); for(z=0; z<BITS; z++) bit.b[z] = r->use1.b[z] | r->use2.b[z] | r->set.b[z]; if(bany(&bit)) { print("\t"); if(bany(&r->use1)) print(" u1=%B", r->use1); if(bany(&r->use2)) print(" u2=%B", r->use2); if(bany(&r->set)) print(" st=%B", r->set); } print("\n"); } } /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; for(r = firstr; r != R; r = r->link) if(r->prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = r->link; if(r1 && r1->active && !r->active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; synch(firstr, zbits); if(change) goto loop2; /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit)) { nearln = r->prog->lineno; warn(Z, "used and not set: %B", bit); if(debug['R'] && !debug['w']) print("used and not set: %B\n", bit); } } if(debug['R'] && debug['v']) print("\nprop structure:\n"); for(r = firstr; r != R; r = r->link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = r->link) { if(debug['R'] && debug['v']) { print("%P\t", r->prog); if(bany(&r->set)) print("s:%B ", r->set); if(bany(&r->refahead)) print("ra:%B ", r->refahead); if(bany(&r->calahead)) print("ca:%B ", r->calahead); print("\n"); } for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit)) { nearln = r->prog->lineno; warn(Z, "set and not used: %B", bit); if(debug['R']) print("set and not used: %B\n", bit); excise(r); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; if(debug['R'] && debug['v']) print("\n"); paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) { if(debug['R']) print("%L$%d: %B\n", r->prog->lineno, change, blsh(i)); continue; } rgp->cost = change; nregion++; if(nregion >= NRGN) { warn(Z, "too many regions"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(debug['R']) { print("%L$%d %R: %B\n", rgp->enter->prog->lineno, rgp->cost, rgp->regno, bit); } if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) peep(); /* * pass 8 * recalculate pc */ val = initpc; for(r = firstr; r != R; r = r1) { r->pc = val; p = r->prog; p1 = P; r1 = r->link; if(r1 != R) p1 = r1->prog; for(; p != p1; p = p->link) { switch(p->as) { default: val++; break; case ANOP: case ADATA: case AGLOBL: case ANAME: case ASIGNAME: break; } } } pc = val; /* * fix up branches */ if(debug['R']) if(bany(&addrs)) print("addrs: %B\n", addrs); r1 = 0; /* set */ for(r = firstr; r != R; r = r->link) { p = r->prog; if(p->to.type == D_BRANCH) p->to.offset = r->s2->pc; r1 = r; } /* * last pass * eliminate nops * free aux structures */ for(p = firstr->prog; p != P; p = p->link){ while(p->link && p->link->as == ANOP) p->link = p->link->link; } if(r1 != R) { r1->link = freer; freer = firstr; } }
// Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include <u.h> #include <libc.h> #include "gg.h" #include "opt.h" // Matches real RtoB but can be used in global initializer. #define RtoB(r) (1<<((r)-D_AX)) enum { AX = RtoB(D_AX), BX = RtoB(D_BX), CX = RtoB(D_CX), DX = RtoB(D_DX), DI = RtoB(D_DI), SI = RtoB(D_SI), LeftRdwr = LeftRead | LeftWrite, RightRdwr = RightRead | RightWrite, }; #undef RtoB // This table gives the basic information about instruction // generated by the compiler and processed in the optimizer. // See opt.h for bit definitions. // // Instructions not generated need not be listed.
/* * return * 1 if v only used (and substitute), * 2 if read-alter-rewrite * 3 if set * 4 if set and used * 0 otherwise (not touched) */ int copyu(Prog *p, Adr *v, Adr *s) { ProgInfo info; switch(p->as) { case AJMP: if(s != A) { if(copysub(&p->to, v, s, 1)) return 1; return 0; } if(copyau(&p->to, v)) return 1; return 0; case ARET: if(s != A) return 1; return 3; case ACALL: if(REGEXT && v->type <= REGEXT && v->type > exregoffset) return 2; if(REGARG >= 0 && v->type == (uchar)REGARG) return 2; if(v->type == p->from.type) return 2; if(s != A) { if(copysub(&p->to, v, s, 1)) return 1; return 0; } if(copyau(&p->to, v)) return 4; return 3; case ATEXT: if(REGARG >= 0 && v->type == (uchar)REGARG) return 3; return 0; } proginfo(&info, p); if((info.reguse|info.regset) & RtoB(v->type)) return 2; if(info.flags & LeftAddr) if(copyas(&p->from, v)) return 2; if((info.flags & (RightRead|RightWrite)) == (RightRead|RightWrite)) if(copyas(&p->to, v)) return 2; if(info.flags & RightWrite) { if(copyas(&p->to, v)) { if(s != A) return copysub(&p->from, v, s, 1); if(copyau(&p->from, v)) return 4; return 3; } } if(info.flags & (LeftAddr|LeftRead|LeftWrite|RightAddr|RightRead|RightWrite)) { if(s != A) { if(copysub(&p->from, v, s, 1)) return 1; return copysub(&p->to, v, s, 1); } if(copyau(&p->from, v)) return 1; if(copyau(&p->to, v)) return 1; } return 0; }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; int i, z, nr; uint32 vreg; Bits bit; if(first == 0) { fmtinstall('Q', Qconv); } fixjmp(firstp); first++; if(debug['K']) { if(first != 13) return; // debug['R'] = 2; // debug['P'] = 2; print("optimizing %S\n", curfn->nname->sym); } // count instructions nr = 0; for(p=firstp; p!=P; p=p->link) nr++; // if too big dont bother if(nr >= 10000) { // print("********** %S is too big (%d)\n", curfn->nname->sym, nr); return; } r1 = R; firstr = R; lastr = R; /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) var[i].node = newname(lookup(regname[i])); regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ nr = 0; for(p=firstp; p != P; p = p->link) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); nr++; if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; p->regp = r; r1 = r->p1; if(r1 != R) { switch(r1->prog->as) { case ARET: case AB: case ARFE: r->p1 = R; r1->s1 = R; } } /* * left side always read */ bit = mkvar(r, &p->from); for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; /* * middle always read when present */ if(p->reg != NREG) { if(p->from.type != D_FREG) r->use1.b[0] |= RtoB(p->reg); else r->use1.b[0] |= FtoB(p->reg); } /* * right side depends on opcode */ bit = mkvar(r, &p->to); if(bany(&bit)) switch(p->as) { default: yyerror("reg: unknown op: %A", p->as); break; /* * right side read */ case ATST: case ATEQ: case ACMP: case ACMN: case ACMPD: case ACMPF: rightread: for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; break; /* * right side read or read+write, depending on middle * ADD x, z => z += x * ADD x, y, z => z = x + y */ case AADD: case AAND: case AEOR: case ASUB: case ARSB: case AADC: case ASBC: case ARSC: case AORR: case ABIC: case ASLL: case ASRL: case ASRA: case AMUL: case AMULU: case ADIV: case AMOD: case AMODU: case ADIVU: if(p->reg != NREG) goto rightread; // fall through /* * right side read+write */ case AADDF: case AADDD: case ASUBF: case ASUBD: case AMULF: case AMULD: case ADIVF: case ADIVD: case AMULA: case AMULAL: case AMULALU: for(z=0; z<BITS; z++) { r->use2.b[z] |= bit.b[z]; r->set.b[z] |= bit.b[z]; } break; /* * right side write */ case ANOP: case AMOVB: case AMOVBU: case AMOVD: case AMOVDF: case AMOVDW: case AMOVF: case AMOVFW: case AMOVH: case AMOVHU: case AMOVW: case AMOVWD: case AMOVWF: case AMVN: case AMULL: case AMULLU: if((p->scond & C_SCOND) != C_SCOND_NONE) for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; break; /* * funny */ case ABL: setaddrs(bit); break; } if(p->as == AMOVM) { z = p->to.offset; if(p->from.type == D_CONST) z = p->from.offset; for(i=0; z; i++) { if(z&1) regbits |= RtoB(i); z >>= 1; } } }
void proginfo(ProgInfo *info, Prog *p) { initproginfo(); *info = progtable[p->as]; if(info->flags == 0) { *info = progtable[AADD]; fatal("proginfo: unknown instruction %P", p); } if((info->flags & RegRead) && p->reg == NREG) { info->flags &= ~RegRead; info->flags |= /*CanRegRead |*/ RightRead; } if((p->from.type == D_OREG || p->from.type == D_CONST) && p->from.reg != NREG) { info->regindex |= RtoB(p->from.reg); if(info->flags & PostInc) { info->regset |= RtoB(p->from.reg); } } if((p->to.type == D_OREG || p->to.type == D_CONST) && p->to.reg != NREG) { info->regindex |= RtoB(p->to.reg); if(info->flags & PostInc) { info->regset |= RtoB(p->to.reg); } } if(p->from.type == D_CONST && p->from.sym != nil && (info->flags & LeftRead)) { info->flags &= ~LeftRead; info->flags |= LeftAddr; } if(p->as == ADUFFZERO) { info->reguse |= (1<<D_R0) | RtoB(3); info->regset |= RtoB(3); } if(p->as == ADUFFCOPY) { // TODO(austin) Revisit when duffcopy is implemented info->reguse |= RtoB(3) | RtoB(4) | RtoB(5); info->regset |= RtoB(3) | RtoB(4); } }
Bits mkvar(Adr *a, int docon) { Var *v; int i, t, n, et, z; long o; Bits bit; Sym *s; t = a->type; if(t == D_REG && a->reg != NREG) regbits |= RtoB(a->reg); if(t == D_FREG && a->reg != NREG) regbits |= FtoB(a->reg); s = a->sym; o = a->offset; et = a->etype; if(s == S) { if(t != D_CONST || !docon || a->reg != NREG) goto none; et = TLONG; } if(t == D_CONST) { if(s == S && sval(o)) goto none; } n = a->name; v = var; for(i=0; i<nvar; i++) { if(s == v->sym) if(n == v->name) if(o == v->offset) goto out; v++; } if(s) if(s->name[0] == '.') goto none; if(nvar >= NVAR) { if(debug['w'] > 1 && s) warn(Z, "variable not optimized: %s", s->name); goto none; } i = nvar; nvar++; v = &var[i]; v->sym = s; v->offset = o; v->etype = et; v->name = n; if(debug['R']) print("bit=%2d et=%2d %D\n", i, et, a); out: bit = blsh(i); if(n == D_EXTERN || n == D_STATIC) for(z=0; z<BITS; z++) externs.b[z] |= bit.b[z]; if(n == D_PARAM) for(z=0; z<BITS; z++) params.b[z] |= bit.b[z]; if(v->etype != et || !(typechlpfd[et] || typev[et])) /* funny punning */ for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; if(t == D_CONST) { if(s == S) { for(z=0; z<BITS; z++) consts.b[z] |= bit.b[z]; return bit; } if(et != TARRAY) for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; for(z=0; z<BITS; z++) params.b[z] |= bit.b[z]; return bit; } if(t == D_OREG) return bit; none: return zbits; }
void regopt(Prog *p) { Reg *r, *r1, *r2; Prog *p1; int i, z; long initpc, val, npc; ulong vreg; Bits bit; struct { long m; long c; Reg* p; } log5[6], *lp; firstr = R; lastr = R; nvar = 0; regbits = 0; for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; } /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ val = 5L * 5L * 5L * 5L * 5L; lp = log5; for(i=0; i<5; i++) { lp->m = val; lp->c = 0; lp->p = R; val /= 5L; lp++; } val = 0; for(; p != P; p = p->link) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; r->pc = val; val++; lp = log5; for(i=0; i<5; i++) { lp->c--; if(lp->c <= 0) { lp->c = lp->m; if(lp->p != R) lp->p->log5 = r; lp->p = r; (lp+1)->c = 0; break; } lp++; } r1 = r->p1; if(r1 != R) switch(r1->prog->as) { case ARET: case AB: case ARFE: r->p1 = R; r1->s1 = R; } /* * left side always read */ bit = mkvar(&p->from, p->as==AMOVW); for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; /* * right side depends on opcode */ bit = mkvar(&p->to, 0); if(bany(&bit)) switch(p->as) { default: diag(Z, "reg: unknown asop: %A", p->as); break; /* * right side write */ case ANOP: case AMOVB: case AMOVBU: case AMOVH: case AMOVHU: case AMOVW: case AMOVF: case AMOVD: for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; break; /* * funny */ case ABL: for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; break; } if(p->as == AMOVM) { if(p->from.type == D_CONST) z = p->from.offset; else z = p->to.offset; for(i=0; z; i++) { if(z&1) regbits |= RtoB(i); z >>= 1; } } } if(firstr == R) return; initpc = pc - val; npc = val; /* * pass 2 * turn branch references to pointers * build back pointers */ for(r = firstr; r != R; r = r->link) { p = r->prog; if(p->to.type == D_BRANCH) { val = p->to.offset - initpc; r1 = firstr; while(r1 != R) { r2 = r1->log5; if(r2 != R && val >= r2->pc) { r1 = r2; continue; } if(r1->pc == val) break; r1 = r1->link; } if(r1 == R) { nearln = p->lineno; diag(Z, "ref not found\n%P", p); continue; } if(r1 == r) { nearln = p->lineno; diag(Z, "ref to self\n%P", p); continue; } r->s2 = r1; r->p2link = r1->p2; r1->p2 = r; } } if(debug['R']) { p = firstr->prog; print("\n%L %D\n", p->lineno, &p->from); } /* * pass 2.5 * find looping structure */ for(r = firstr; r != R; r = r->link) r->active = 0; change = 0; loopit(firstr, npc); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; for(r = firstr; r != R; r = r->link) if(r->prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = r->link; if(r1 && r1->active && !r->active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; synch(firstr, zbits); if(change) goto loop2; addsplits(); if(debug['R'] && debug['v']) { print("\nprop structure:\n"); for(r = firstr; r != R; r = r->link) { print("%ld:%P", r->loop, r->prog); for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] | r->refahead.b[z] | r->calahead.b[z] | r->refbehind.b[z] | r->calbehind.b[z] | r->use1.b[z] | r->use2.b[z]; if(bany(&bit)) { print("\t"); if(bany(&r->use1)) print(" u1=%B", r->use1); if(bany(&r->use2)) print(" u2=%B", r->use2); if(bany(&r->set)) print(" st=%B", r->set); if(bany(&r->refahead)) print(" ra=%B", r->refahead); if(bany(&r->calahead)) print(" ca=%B", r->calahead); if(bany(&r->refbehind)) print(" rb=%B", r->refbehind); if(bany(&r->calbehind)) print(" cb=%B", r->calbehind); } print("\n"); } } /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit)) { nearln = r->prog->lineno; warn(Z, "used and not set: %B", bit); if(debug['R'] && !debug['w']) print("used and not set: %B\n", bit); } } for(r = firstr; r != R; r = r->link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = r->link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit)) { nearln = r->prog->lineno; warn(Z, "set and not used: %B", bit); if(debug['R']) print("set and not used: %B\n", bit); excise(r); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; if(debug['R'] && debug['v']) print("\n"); paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) { if(debug['R']) print("%L $%d: %B\n", r->prog->lineno, change, blsh(i)); continue; } rgp->cost = change; nregion++; if(nregion >= NRGN) { warn(Z, "too many regions"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(debug['R']) { if(rgp->regno >= NREG) print("%L $%d F%d: %B\n", rgp->enter->prog->lineno, rgp->cost, rgp->regno-NREG, bit); else print("%L $%d R%d: %B\n", rgp->enter->prog->lineno, rgp->cost, rgp->regno, bit); } if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) peep(); /* * pass 8 * recalculate pc */ val = initpc; for(r = firstr; r != R; r = r1) { r->pc = val; p = r->prog; p1 = P; r1 = r->link; if(r1 != R) p1 = r1->prog; for(; p != p1; p = p->link) { switch(p->as) { default: val++; break; case ANOP: case ADATA: case AGLOBL: case ANAME: case ASIGNAME: break; } } } pc = val; /* * fix up branches */ if(debug['R']) if(bany(&addrs)) print("addrs: %B\n", addrs); r1 = 0; /* set */ for(r = firstr; r != R; r = r->link) { p = r->prog; if(p->to.type == D_BRANCH) p->to.offset = r->s2->pc; r1 = r; } /* * last pass * eliminate nops * free aux structures */ for(p = firstr->prog; p != P; p = p->link) { while(p->link && p->link->as == ANOP) p->link = p->link->link; } if(r1 != R) { r1->link = freer; freer = firstr; } }
Bits mkvar(Reg *r, Adr *a) { Var *v; int i, t, n, et, z, w, flag; int32 o; Bits bit; Node *node; // mark registers used t = a->type; flag = 0; switch(t) { default: print("type %d %d %D\n", t, a->name, a); goto none; case D_NONE: case D_FCONST: case D_BRANCH: break; case D_REGREG: case D_REGREG2: bit = zbits; if(a->offset != NREG) bit.b[0] |= RtoB(a->offset); if(a->reg != NREG) bit.b[0] |= RtoB(a->reg); return bit; case D_CONST: case D_REG: case D_SHIFT: if(a->reg != NREG) { bit = zbits; bit.b[0] = RtoB(a->reg); return bit; } break; case D_OREG: if(a->reg != NREG) { if(a == &r->f.prog->from) r->use1.b[0] |= RtoB(a->reg); else r->use2.b[0] |= RtoB(a->reg); if(r->f.prog->scond & (C_PBIT|C_WBIT)) r->set.b[0] |= RtoB(a->reg); } break; case D_FREG: if(a->reg != NREG) { bit = zbits; bit.b[0] = FtoB(a->reg); return bit; } break; } switch(a->name) { default: goto none; case D_EXTERN: case D_STATIC: case D_AUTO: case D_PARAM: n = a->name; break; } node = a->node; if(node == N || node->op != ONAME || node->orig == N) goto none; node = node->orig; if(node->orig != node) fatal("%D: bad node", a); if(node->sym == S || node->sym->name[0] == '.') goto none; et = a->etype; o = a->offset; w = a->width; if(w < 0) fatal("bad width %d for %D", w, a); for(i=0; i<nvar; i++) { v = var+i; if(v->node == node && v->name == n) { if(v->offset == o) if(v->etype == et) if(v->width == w) if(!flag) return blsh(i); // if they overlap, disable both if(overlap(v->offset, v->width, o, w)) { v->addr = 1; flag = 1; } } } switch(et) { case 0: case TFUNC: goto none; } if(nvar >= NVAR) { if(debug['w'] > 1 && node) fatal("variable not optimized: %D", a); goto none; } i = nvar; nvar++; //print("var %d %E %D %S\n", i, et, a, s); v = var+i; v->offset = o; v->name = n; v->etype = et; v->width = w; v->addr = flag; // funny punning v->node = node; if(debug['R']) print("bit=%2d et=%2E w=%d+%d %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); bit = blsh(i); if(n == D_EXTERN || n == D_STATIC) for(z=0; z<BITS; z++) externs.b[z] |= bit.b[z]; if(n == D_PARAM) for(z=0; z<BITS; z++) params.b[z] |= bit.b[z]; return bit; none: return zbits; }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; Graph *g; int i, z; uint32 vreg; Bits bit; ProgInfo info; if(first) { fmtinstall('Q', Qconv); first = 0; } fixjmp(firstp); mergetemp(firstp); /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) { if(regnodes[i] == N) regnodes[i] = newname(lookup(regname[i])); var[i].node = regnodes[i]; } regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ g = flowstart(firstp, sizeof(Reg)); if(g == nil) return; firstr = (Reg*)g->start; for(r = firstr; r != R; r = (Reg*)r->f.link) { p = r->f.prog; proginfo(&info, p); // Avoid making variables for direct-called functions. if(p->as == ABL && p->to.type == D_EXTERN) continue; bit = mkvar(r, &p->from); if(info.flags & LeftRead) for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; if(info.flags & LeftAddr) setaddrs(bit); if(info.flags & RegRead) { if(p->from.type != D_FREG) r->use1.b[0] |= RtoB(p->reg); else r->use1.b[0] |= FtoB(p->reg); } if(info.flags & (RightAddr | RightRead | RightWrite)) { bit = mkvar(r, &p->to); if(info.flags & RightAddr) setaddrs(bit); if(info.flags & RightRead) for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; if(info.flags & RightWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } } if(firstr == R) return; for(i=0; i<nvar; i++) { Var *v = var+i; if(v->addr) { bit = blsh(i); for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; } if(debug['R'] && debug['v']) print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", i, v->addr, v->etype, v->width, v->node, v->offset); } if(debug['R'] && debug['v']) dumpit("pass1", &firstr->f, 1); /* * pass 2 * find looping structure */ flowrpo(g); if(debug['R'] && debug['v']) dumpit("pass2", &firstr->f, 1); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) if(r->f.prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = (Reg*)r->f.link; if(r1 && r1->f.active && !r->f.active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; if(debug['R'] && debug['v']) dumpit("pass3", &firstr->f, 1); /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; synch(firstr, zbits); if(change) goto loop2; addsplits(); if(debug['R'] && debug['v']) dumpit("pass4", &firstr->f, 1); if(debug['R'] > 1) { print("\nprop structure:\n"); for(r = firstr; r != R; r = (Reg*)r->f.link) { print("%d:%P", r->f.loop, r->f.prog); for(z=0; z<BITS; z++) { bit.b[z] = r->set.b[z] | r->refahead.b[z] | r->calahead.b[z] | r->refbehind.b[z] | r->calbehind.b[z] | r->use1.b[z] | r->use2.b[z]; bit.b[z] &= ~addrs.b[z]; } if(bany(&bit)) { print("\t"); if(bany(&r->use1)) print(" u1=%Q", r->use1); if(bany(&r->use2)) print(" u2=%Q", r->use2); if(bany(&r->set)) print(" st=%Q", r->set); if(bany(&r->refahead)) print(" ra=%Q", r->refahead); if(bany(&r->calahead)) print(" ca=%Q", r->calahead); if(bany(&r->refbehind)) print(" rb=%Q", r->refbehind); if(bany(&r->calbehind)) print(" cb=%Q", r->calbehind); } print("\n"); } } /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = (Reg*)r->f.link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } if(debug['R'] && debug['v']) dumpit("pass4.5", &firstr->f, 1); /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) & !r->f.refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; } } for(r = firstr; r != R; r = (Reg*)r->f.link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->f.refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; excise(&r->f); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; if(debug['R'] > 1) print("\n"); paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) { if(debug['R']) print("%L $%d: %Q\n", r->f.prog->lineno, change, blsh(i)); continue; } rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] > 1) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); if(debug['R'] && debug['v']) dumpit("pass5", &firstr->f, 1); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(debug['R']) { if(rgp->regno >= NREG) print("%L $%d F%d: %Q\n", rgp->enter->f.prog->lineno, rgp->cost, rgp->regno-NREG, bit); else print("%L $%d R%d: %Q\n", rgp->enter->f.prog->lineno, rgp->cost, rgp->regno, bit); } if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } if(debug['R'] && debug['v']) dumpit("pass6", &firstr->f, 1); /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) { peep(firstp); } if(debug['R'] && debug['v']) dumpit("pass7", &firstr->f, 1); /* * last pass * eliminate nops * free aux structures * adjust the stack pointer * MOVW.W R1,-12(R13) <<- start * MOVW R0,R1 * MOVW R1,8(R13) * MOVW $0,R1 * MOVW R1,4(R13) * BL ,runtime.newproc+0(SB) * MOVW &ft+-32(SP),R7 <<- adjust * MOVW &j+-40(SP),R6 <<- adjust * MOVW autotmp_0003+-24(SP),R5 <<- adjust * MOVW $12(R13),R13 <<- finish */ vreg = 0; for(p = firstp; p != P; p = p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.u.branch != P && p->to.u.branch->as == ANOP) p->to.u.branch = p->to.u.branch->link; if(p->as == AMOVW && p->to.reg == 13) { if(p->scond & C_WBIT) { vreg = -p->to.offset; // in adjust region // print("%P adjusting %d\n", p, vreg); continue; } if(p->from.type == D_CONST && p->to.type == D_REG) { if(p->from.offset != vreg) print("in and out different\n"); // print("%P finish %d\n", p, vreg); vreg = 0; // done adjust region continue; } // print("%P %d %d from type\n", p, p->from.type, D_CONST); // print("%P %d %d to type\n\n", p, p->to.type, D_REG); } if(p->as == AMOVW && vreg != 0) { if(p->from.sym != S) if(p->from.name == D_AUTO || p->from.name == D_PARAM) { p->from.offset += vreg; // print("%P adjusting from %d %d\n", p, vreg, p->from.type); } if(p->to.sym != S) if(p->to.name == D_AUTO || p->to.name == D_PARAM) { p->to.offset += vreg; // print("%P adjusting to %d %d\n", p, vreg, p->from.type); } } } flowend(g); }
configure() { config1() ; xshift = 0 ; yshift = 0 ; zxshift = 0 ; zyshift = 0 ; for( pad = numcells + 1 ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside == B ) { count = 1 ; height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; for( pad++ ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside != B ) { break ; } count++ ; if( carray[pad]->tileptr->top - carray[pad]->tileptr->bottom > height ) { height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; } width += carray[pad]->tileptr->right - carray[pad]->tileptr->left ; } maxHeight = height ; if( fixLRBT[2] == 0 ) { space = coreWidth - width ; separation = space / (count + 1) ; if( separation < 0 ) { separation = 0 ; zxshift = ABS( space ) ; } } else { space = ( coreWidth >= width ) ? coreWidth : width ; } /* * Shift all rows up by maxHeight + extraSpace */ yshift = maxHeight + extraSpace ; for( row = 1 ; row <= numRows ; row++ ) { rowArray[row].ypos += yshift ; } for( pad = numcells + 1; pad <= numcells + numterms; pad++){ if( carray[pad]->padside == B ) { height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; if( fixLRBT[2] == 0 ) { carray[pad]->cxcenter = separation + width / 2 ; } else { carray[pad]->cxcenter = padspace[ pad - numcells ] * space ; } carray[pad]->cycenter = maxHeight - height / 2 - 1 ; last = separation + width ; for( pad++ ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside != B ) { break ; } height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; if( fixLRBT[2] == 0 ) { carray[pad]->cxcenter = last + separation + width / 2 ; } else { carray[pad]->cxcenter = padspace[ pad - numcells ] * space ; } carray[pad]->cycenter = maxHeight - height / 2 - 1 ; last += separation + width ; } } } } } for( pad = numcells + 1 ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside == T ) { count = 1 ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; for( pad++ ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside != T ) { break ; } count++ ; width += carray[pad]->tileptr->right - carray[pad]->tileptr->left ; } if( fixLRBT[3] == 0 ) { space = coreWidth - width ; separation = space / (count + 1) ; if( separation < 0 ) { separation = 0 ; if( ABS( space ) > zxshift ) { zxshift = ABS( space ) ; } } } else { space = (coreWidth >= width ) ? coreWidth : width ; } /* * Keep all pads above rowArray[numRows].ypos + * rowHeight/2 + extraSpace + mttshift */ coreTop = rowArray[numRows].ypos + rowHeight / 2 + extraSpace + mttshift ; for( pad = numcells + 1; pad <= numcells + numterms; pad++){ if( carray[pad]->padside == T ) { height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; if( fixLRBT[3] == 0 ) { carray[pad]->cxcenter = separation + width / 2 ; } else { carray[pad]->cxcenter = padspace[ pad - numcells ] * space ; } carray[pad]->cycenter = coreTop + height / 2 ; last = separation + width ; for( pad++ ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside != T ) { break ; } height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; if( fixLRBT[3] == 0 ) { carray[pad]->cxcenter = last + separation + width / 2 ; } else { carray[pad]->cxcenter = padspace[ pad - numcells ] * space ; } carray[pad]->cycenter = coreTop + height / 2 ; last += separation + width ; } } } } } for( pad = numcells + 1 ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside == L ) { count = 1 ; height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; for( pad++ ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside != L ) { break ; } count++ ; if( carray[pad]->tileptr->right - carray[pad]->tileptr->left > width ) { width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; } height += carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; } maxWidth = width ; if( fixLRBT[0] == 0 ) { space = coreHeight - height ; separation = space / (count + 1) ; if( separation < 0 ) { separation = 0 ; zyshift = ABS( space ) ; } } else { space = (coreHeight >= height ) ? coreHeight : height ; } /* * Shift all rows rite by maxWidth + extraSpace */ xshift = maxWidth + extraSpace ; for( row = 1 ; row <= numRows ; row++ ) { rowArray[row].startx += xshift ; rowArray[row].endx += xshift ; if( rowArray[row].endx1 > 0 ) { rowArray[row].endx1 += xshift ; rowArray[row].startx2 += xshift ; } } for( pad = numcells + 1; pad <= numcells + numterms; pad++){ if( carray[pad]->padside == L ) { height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; if( fixLRBT[0] == 0 ) { carray[pad]->cycenter = rowArray[1].ypos - rowHeight / 2 + separation + height / 2 - mbbshift ; } else { carray[pad]->cycenter = rowArray[1].ypos - rowHeight / 2 - mbbshift + padspace[ pad - numcells ] * space ; } carray[pad]->cxcenter = maxWidth - width / 2 - 1 ; last = rowArray[1].ypos - rowHeight / 2 + separation + height - mbbshift ; for( pad++ ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside != L ) { break ; } height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; if( fixLRBT[0] == 0 ) { carray[pad]->cycenter = last + separation + height / 2 ; } else { carray[pad]->cycenter = rowArray[1].ypos - rowHeight / 2 - mbbshift + padspace[ pad - numcells ] * space ; } carray[pad]->cxcenter = maxWidth - width / 2 - 1 ; last += separation + height ; } } } } } for( pad = numcells + 1 ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside == R ) { count = 1 ; height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; for( pad++ ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside != R ) { break ; } count++ ; height += carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; } if( fixLRBT[1] == 0 ) { space = coreHeight - height ; separation = space / (count + 1) ; if( separation < 0 ) { separation = 0 ; if( ABS( space ) > zyshift ) { zyshift = ABS( space ) ; } } } else { space = (coreHeight >= height) ? coreHeight : height ; } coreRite = coreWidth + xshift + extraSpace ; for( pad = numcells + 1; pad <= numcells + numterms; pad++){ if( carray[pad]->padside == R ) { height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; if( fixLRBT[1] == 0 ) { carray[pad]->cycenter = rowArray[1].ypos - rowHeight / 2 + separation + height / 2 - mbbshift ; } else { carray[pad]->cycenter = rowArray[1].ypos - rowHeight / 2 - mbbshift + padspace[ pad - numcells ] * space ; } carray[pad]->cxcenter = coreRite + width / 2 ; last = rowArray[1].ypos - rowHeight / 2 + separation + height - mbbshift ; for( pad++ ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside != R ) { break ; } height = carray[pad]->tileptr->top - carray[pad]->tileptr->bottom ; width = carray[pad]->tileptr->right - carray[pad]->tileptr->left ; if( fixLRBT[1] == 0 ) { carray[pad]->cycenter = last + separation + height / 2 ; } else { carray[pad]->cycenter = rowArray[1].ypos - rowHeight / 2 - mbbshift + padspace[ pad - numcells ] * space ; } carray[pad]->cxcenter = coreRite + width / 2 ; last += separation + height ; } } } } } /* * Now its time to shift the macro blocks by the same amount * the rows were shifted above during pad placement */ for( macro = 1 ; macro <= numMacro ; macro++ ) { pad = padArray[macro] ; carray[pad]->cxcenter = macroArray[macro].mx + xshift ; carray[pad]->cycenter = macroArray[macro].my + yshift + mbbshift ; } for( pad = numcells + 1 ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside == B || carray[pad]->padside == T ) { carray[pad]->cxcenter += xshift ; } else if( carray[pad]->padside == MTT || carray[pad]->padside == MBB ) { carray[pad]->cxcenter += xshift ; carray[pad]->cycenter += yshift ; } } /* * Shift all rows rite by zxshift / 2 ( center the core * for pad limited cases ) */ for( row = 1 ; row <= numRows ; row++ ) { rowArray[row].startx += zxshift / 2 ; rowArray[row].endx += zxshift / 2 ; if( rowArray[row].endx1 > 0 ) { rowArray[row].endx1 += zxshift / 2 ; rowArray[row].startx2 += zxshift / 2 ; } } for( macro = 1 ; macro <= numMacro ; macro++ ) { pad = padArray[macro] ; carray[pad]->cxcenter += zxshift / 2 ; } for( pad = numcells + 1 ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside == R ) { carray[pad]->cxcenter += zxshift ; } else if( carray[pad]->padside == MTT || carray[pad]->padside == MBB ) { carray[pad]->cxcenter += zxshift / 2 ; } } /* * Shift all rows up by zyshift / 2 ( center the core for * pad limited cases ) */ yshift = maxHeight + rowHeight + extraSpace ; for( row = 1 ; row <= numRows ; row++ ) { rowArray[row].ypos += zyshift / 2 ; } for( macro = 1 ; macro <= numMacro ; macro++ ) { pad = padArray[macro] ; carray[pad]->cycenter += zyshift / 2 ; } for( pad = numcells + 1 ; pad <= numcells + numterms ; pad++ ) { if( carray[pad]->padside == T ) { carray[pad]->cycenter += zyshift ; } else if( carray[pad]->padside == MTT || carray[pad]->padside == MBB ) { carray[pad]->cycenter += zyshift / 2 ; } } /* * Now its time to blast away the old barray and * generate the new one. */ /* * Find the number of blocks */ block = 0 ; for( row = 1 ; row <= numRows ; row++ ) { block++ ; if( rowArray[row].endx1 > 0 ) { block++ ; } } totalBlock = block ; oldbarray = barray ; blkToRow = (int *) safe_malloc( (totalBlock + 1) * sizeof(int) ) ; barray = (BBOXPTR *) safe_malloc( (totalBlock + 1) * sizeof(BBOXPTR) ) ; for( block = 1 ; block <= totalBlock ; block++ ) { barray[block] = (BBOXPTR) safe_malloc( sizeof(BBOX) ) ; } block = 0 ; for( row = 1 ; row <= numRows ; row++ ) { boxptr = oldbarray[row] ; if( rowArray[row].endx1 > 0 ) { block++ ; blkToRow[block] = row ; barray[block]->bxcenter = (rowArray[row].startx + rowArray[row].endx1 ) / 2 ; barray[block]->bycenter = rowArray[row].ypos ; barray[block]->bleft = rowArray[row].startx - barray[block]->bxcenter ; barray[block]->bright = rowArray[row].endx1 - barray[block]->bxcenter ; barray[block]->bbottom = boxptr->bbottom ; barray[block]->btop = boxptr->btop ; barray[block]->bheight = boxptr->btop - boxptr->bbottom ; barray[block]->blength = rowArray[row].endx1 - rowArray[row].startx ; barray[block]->bclass = boxptr->bclass ; barray[block]->borient = boxptr->borient ; barray[block]->oldsize = 0 ; block++ ; blkToRow[block] = row ; barray[block]->bxcenter = (rowArray[row].startx2 + rowArray[row].endx ) / 2 ; barray[block]->bycenter = rowArray[row].ypos ; barray[block]->bleft = rowArray[row].startx2 - barray[block]->bxcenter ; barray[block]->bright = rowArray[row].endx - barray[block]->bxcenter ; barray[block]->bbottom = boxptr->bbottom ; barray[block]->btop = boxptr->btop ; barray[block]->bheight = boxptr->btop - boxptr->bbottom ; barray[block]->blength = rowArray[row].endx - rowArray[row].startx2 ; barray[block]->bclass = boxptr->bclass ; barray[block]->borient = boxptr->borient ; ratio = (double) barray[block - 1]->blength / (double) (barray[block - 1]->blength + barray[block]->blength) ; barray[block]->desire = 1 + (int)((1.0 - ratio) * rowArray[row].desiredL) ; barray[block - 1]->desire = 1 + (int)(ratio * rowArray[row].desiredL) ; barray[block]->oldsize = 0 ; } else { block++ ; blkToRow[block] = row ; barray[block]->bxcenter = (rowArray[row].startx + rowArray[row].endx ) / 2 ; barray[block]->bycenter = rowArray[row].ypos ; barray[block]->bleft = rowArray[row].startx - barray[block]->bxcenter ; barray[block]->bright = rowArray[row].endx - barray[block]->bxcenter ; barray[block]->bbottom = boxptr->bbottom ; barray[block]->btop = boxptr->btop ; barray[block]->bheight = boxptr->btop - boxptr->bbottom ; barray[block]->blength = rowArray[row].endx - rowArray[row].startx ; barray[block]->bclass = boxptr->bclass ; barray[block]->borient = boxptr->borient ; barray[block]->desire = rowArray[row].desiredL ; barray[block]->oldsize = 0 ; } } /* * Now blow array the old block array, rowArray, macroArray */ for( row = 1 ; row <= numblock ; row++ ) { safe_free( oldbarray[row] ) ; } safe_free( oldbarray ) ; numblock = totalBlock ; fprintf(fpo,"\n******************\nBLOCK DATA\n"); desiredL = 0 ; for( block = 1 ; block <= numblock ; block++ ) { desiredL += barray[block]->desire ; fprintf(fpo,"block:%d desire:%d\n",block,barray[block]->desire); /* barray[block]->bright += 0.10 * barray[block]->blength ; barray[block]->blength += 0.10 * barray[block]->blength ; */ } fprintf(fpo,"Total Desired Length: %d\n", desiredL ) ; ckt_size_factor = (double) desiredL ; /* * And now, folks, its time for cell placement, that is, * real random cell placement */ filledTo = (int *) safe_malloc( (numblock + 1) * sizeof( int ) ) ; endRow = (int *) safe_malloc( (numblock + 1) * sizeof( int ) ) ; for( row = 1 ; row <= numblock ; row++ ) { filledTo[row] = barray[row]->bxcenter + barray[row]->bleft ; endRow[row] = barray[row]->bxcenter + barray[row]->bright ; } /* * On first pass, place the cells with specific block assignments */ for( cell = 1 ; cell <= numcells ; cell++ ) { cellptr = carray[cell] ; row = cellptr->cblock ; if( row == 0 ) { continue ; } blk = RtoB( row , cellptr->cycenter ) ; borient = barray[blk]->borient ; cellptr->cblock = blk ; width = cellptr->tileptr->right - cellptr->tileptr->left ; if( cellptr->cycenter > 0 ) { carray[cell]->cxcenter = barray[blk]->bxcenter + barray[blk]->bleft + cellptr->cxcenter + width / 2 ; carray[cell]->cycenter = barray[blk]->bycenter ; } else { carray[cell]->cxcenter = barray[blk]->bxcenter + barray[blk]->bright + cellptr->cxcenter - (width - width / 2) ; carray[cell]->cycenter = barray[blk]->bycenter ; } if( borient == 2 ) { cellptr->corient = 1 ; } else { /* borient was 1 */ cellptr->corient = 0 ; } } /* * Final pass, place all unassigned cells */ for( cell = 1 ; cell <= numcells ; cell++ ) { if( carray[cell]->cblock != 0 ) { continue ; } /* * cute strategy: place cell in the most empty row */ empty = -32000 ; for( row = 1 ; row <= numblock ; row++ ) { if( endRow[row] - filledTo[row] > empty ) { empty = endRow[row] - filledTo[row] ; block = row ; } } borient = barray[block]->borient ; width = carray[cell]->tileptr->right - carray[cell]->tileptr->left ; carray[cell]->cblock = block ; if( borient > 0 ) { carray[cell]->cxcenter = filledTo[block] + width / 2 ; carray[cell]->cycenter = barray[block]->bycenter ; if( borient == 2 ) { carray[cell]->corient = 1 ; } else { /* borient was 1 */ carray[cell]->corient = 0 ; } } else { carray[cell]->cycenter = filledTo[block] + width / 2 ; carray[cell]->cxcenter = barray[block]->bxcenter ; if( borient == -2 ) { carray[cell]->corient = 4 ; } else { /* borient was -1 */ carray[cell]->corient = 7 ; } } filledTo[block] += width ; } /* * Ah ha, done */ return ; }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; int i, z, nr; uint32 vreg; Bits bit; if(first) { fmtinstall('Q', Qconv); exregoffset = D_DI; // no externals first = 0; } fixjmp(firstp); // count instructions nr = 0; for(p=firstp; p!=P; p=p->link) nr++; // if too big dont bother if(nr >= 10000) { // print("********** %S is too big (%d)\n", curfn->nname->sym, nr); return; } r1 = R; firstr = R; lastr = R; /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) var[i].node = newname(lookup(regname[i])); regbits = RtoB(D_SP); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ nr = 0; for(p=firstp; p!=P; p=p->link) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); nr++; if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; p->reg = r; r1 = r->p1; if(r1 != R) { switch(r1->prog->as) { case ARET: case AJMP: case AIRETL: r->p1 = R; r1->s1 = R; } } bit = mkvar(r, &p->from); if(bany(&bit)) switch(p->as) { /* * funny */ case ALEAL: case AFMOVL: case AFMOVW: case AFMOVV: setaddrs(bit); break; /* * left side read */ default: for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; break; /* * left side read+write */ case AXCHGB: case AXCHGW: case AXCHGL: for(z=0; z<BITS; z++) { r->use1.b[z] |= bit.b[z]; r->set.b[z] |= bit.b[z]; } break; } bit = mkvar(r, &p->to); if(bany(&bit)) switch(p->as) { default: yyerror("reg: unknown op: %A", p->as); break; /* * right side read */ case ACMPB: case ACMPL: case ACMPW: case ATESTB: case ATESTL: case ATESTW: for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; break; /* * right side write */ case AFSTSW: case ALEAL: case ANOP: case AMOVL: case AMOVB: case AMOVW: case AMOVBLSX: case AMOVBLZX: case AMOVBWSX: case AMOVBWZX: case AMOVWLSX: case AMOVWLZX: case APOPL: for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; break; /* * right side read+write */ case AINCB: case AINCL: case AINCW: case ADECB: case ADECL: case ADECW: case AADDB: case AADDL: case AADDW: case AANDB: case AANDL: case AANDW: case ASUBB: case ASUBL: case ASUBW: case AORB: case AORL: case AORW: case AXORB: case AXORL: case AXORW: case ASALB: case ASALL: case ASALW: case ASARB: case ASARL: case ASARW: case ARCLB: case ARCLL: case ARCLW: case ARCRB: case ARCRL: case ARCRW: case AROLB: case AROLL: case AROLW: case ARORB: case ARORL: case ARORW: case ASHLB: case ASHLL: case ASHLW: case ASHRB: case ASHRL: case ASHRW: case AIMULL: case AIMULW: case ANEGB: case ANEGL: case ANEGW: case ANOTB: case ANOTL: case ANOTW: case AADCL: case ASBBL: case ASETCC: case ASETCS: case ASETEQ: case ASETGE: case ASETGT: case ASETHI: case ASETLE: case ASETLS: case ASETLT: case ASETMI: case ASETNE: case ASETOC: case ASETOS: case ASETPC: case ASETPL: case ASETPS: case AXCHGB: case AXCHGW: case AXCHGL: for(z=0; z<BITS; z++) { r->set.b[z] |= bit.b[z]; r->use2.b[z] |= bit.b[z]; } break; /* * funny */ case AFMOVDP: case AFMOVFP: case AFMOVLP: case AFMOVVP: case AFMOVWP: case ACALL: setaddrs(bit); break; } switch(p->as) { case AIMULL: case AIMULW: if(p->to.type != D_NONE) break; case AIDIVL: case AIDIVW: case ADIVL: case ADIVW: case AMULL: case AMULW: r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX); r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX); break; case AIDIVB: case AIMULB: case ADIVB: case AMULB: r->set.b[0] |= RtoB(D_AX); r->use1.b[0] |= RtoB(D_AX); break; case ACWD: r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX); r->use1.b[0] |= RtoB(D_AX); break; case ACDQ: r->set.b[0] |= RtoB(D_DX); r->use1.b[0] |= RtoB(D_AX); break; case AREP: case AREPN: case ALOOP: case ALOOPEQ: case ALOOPNE: r->set.b[0] |= RtoB(D_CX); r->use1.b[0] |= RtoB(D_CX); break; case AMOVSB: case AMOVSL: case AMOVSW: case ACMPSB: case ACMPSL: case ACMPSW: r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI); r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI); break; case ASTOSB: case ASTOSL: case ASTOSW: case ASCASB: case ASCASL: case ASCASW: r->set.b[0] |= RtoB(D_DI); r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI); break; case AINSB: case AINSL: case AINSW: r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI); r->use1.b[0] |= RtoB(D_DI); break; case AOUTSB: case AOUTSL: case AOUTSW: r->set.b[0] |= RtoB(D_DI); r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI); break; } } if(firstr == R) return; for(i=0; i<nvar; i++) { Var *v = var+i; if(v->addr) { bit = blsh(i); for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; } // print("bit=%2d addr=%d et=%-6E w=%-2d s=%S + %lld\n", // i, v->addr, v->etype, v->width, v->sym, v->offset); } if(debug['R'] && debug['v']) dumpit("pass1", firstr); /* * pass 2 * turn branch references to pointers * build back pointers */ for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->to.type == D_BRANCH) { if(p->to.branch == P) fatal("pnil %P", p); r1 = p->to.branch->reg; if(r1 == R) fatal("rnil %P", p); if(r1 == r) { //fatal("ref to self %P", p); continue; } r->s2 = r1; r->p2link = r1->p2; r1->p2 = r; } } if(debug['R'] && debug['v']) dumpit("pass2", firstr); /* * pass 2.5 * find looping structure */ for(r = firstr; r != R; r = r->link) r->active = 0; change = 0; loopit(firstr, nr); if(debug['R'] && debug['v']) dumpit("pass2.5", firstr); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; for(r = firstr; r != R; r = r->link) if(r->prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = r->link; if(r1 && r1->active && !r->active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; if(debug['R'] && debug['v']) dumpit("pass3", firstr); /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; synch(firstr, zbits); if(change) goto loop2; if(debug['R'] && debug['v']) dumpit("pass4", firstr); /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = r->link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) && !r->refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->prog->lineno, bit); r->refset = 1; } } for(r = firstr; r != R; r = r->link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = r->link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->prog->lineno, bit); r->refset = 1; excise(r); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) continue; rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] && debug['v']) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } if(debug['R'] && debug['v']) dumpit("pass6", firstr); /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) { peep(); } /* * eliminate nops * free aux structures */ for(p=firstp; p!=P; p=p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.branch != P && p->to.branch->as == ANOP) p->to.branch = p->to.branch->link; } if(r1 != R) { r1->link = freer; freer = firstr; } if(debug['R']) { if(ostats.ncvtreg || ostats.nspill || ostats.nreload || ostats.ndelmov || ostats.nvar || ostats.naddr || 0) print("\nstats\n"); if(ostats.ncvtreg) print(" %4d cvtreg\n", ostats.ncvtreg); if(ostats.nspill) print(" %4d spill\n", ostats.nspill); if(ostats.nreload) print(" %4d reload\n", ostats.nreload); if(ostats.ndelmov) print(" %4d delmov\n", ostats.ndelmov); if(ostats.nvar) print(" %4d var\n", ostats.nvar); if(ostats.naddr) print(" %4d addr\n", ostats.naddr); memset(&ostats, 0, sizeof(ostats)); } }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; Graph *g; ProgInfo info; int i, z; uint32 vreg; Bits bit; if(first) { fmtinstall('Q', Qconv); exregoffset = D_R15; first = 0; } mergetemp(firstp); /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) { if(regnodes[i] == N) regnodes[i] = newname(lookup(regname[i])); var[i].node = regnodes[i]; } regbits = RtoB(D_SP); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ g = flowstart(firstp, sizeof(Reg)); if(g == nil) return; firstr = (Reg*)g->start; for(r = firstr; r != R; r = (Reg*)r->f.link) { p = r->f.prog; if(p->as == AVARDEF) continue; proginfo(&info, p); // Avoid making variables for direct-called functions. if(p->as == ACALL && p->to.type == D_EXTERN) continue; r->use1.b[0] |= info.reguse | info.regindex; r->set.b[0] |= info.regset; bit = mkvar(r, &p->from); if(bany(&bit)) { if(info.flags & LeftAddr) setaddrs(bit); if(info.flags & LeftRead) for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; if(info.flags & LeftWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } bit = mkvar(r, &p->to); if(bany(&bit)) { if(info.flags & RightAddr) setaddrs(bit); if(info.flags & RightRead) for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; if(info.flags & RightWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } } for(i=0; i<nvar; i++) { Var *v = var+i; if(v->addr) { bit = blsh(i); for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; } if(debug['R'] && debug['v']) print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", i, v->addr, v->etype, v->width, v->node, v->offset); } if(debug['R'] && debug['v']) dumpit("pass1", &firstr->f, 1); /* * pass 2 * find looping structure */ flowrpo(g); if(debug['R'] && debug['v']) dumpit("pass2", &firstr->f, 1); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) if(r->f.prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = (Reg*)r->f.link; if(r1 && r1->f.active && !r->f.active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; if(debug['R'] && debug['v']) dumpit("pass3", &firstr->f, 1); /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; synch(firstr, zbits); if(change) goto loop2; if(debug['R'] && debug['v']) dumpit("pass4", &firstr->f, 1); /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = (Reg*)r->f.link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) && !r->f.refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; } } for(r = firstr; r != R; r = (Reg*)r->f.link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->f.refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; excise(&r->f); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) continue; rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] && debug['v']) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); if(debug['R'] && debug['v']) dumpit("pass5", &firstr->f, 1); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(rgp->regno != 0) { if(debug['R'] && debug['v']) { Var *v; v = var + rgp->varno; print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", v->node, v->offset, rgp->varno, v->etype, rgp->regno); } paint3(rgp->enter, rgp->varno, vreg, rgp->regno); } rgp++; } if(debug['R'] && debug['v']) dumpit("pass6", &firstr->f, 1); /* * free aux structures. peep allocates new ones. */ flowend(g); firstr = R; /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) peep(firstp); /* * eliminate nops */ for(p=firstp; p!=P; p=p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.u.branch != P && p->to.u.branch->as == ANOP) p->to.u.branch = p->to.u.branch->link; } if(debug['R']) { if(ostats.ncvtreg || ostats.nspill || ostats.nreload || ostats.ndelmov || ostats.nvar || ostats.naddr || 0) print("\nstats\n"); if(ostats.ncvtreg) print(" %4d cvtreg\n", ostats.ncvtreg); if(ostats.nspill) print(" %4d spill\n", ostats.nspill); if(ostats.nreload) print(" %4d reload\n", ostats.nreload); if(ostats.ndelmov) print(" %4d delmov\n", ostats.ndelmov); if(ostats.nvar) print(" %4d var\n", ostats.nvar); if(ostats.naddr) print(" %4d addr\n", ostats.naddr); memset(&ostats, 0, sizeof(ostats)); } }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; int i, z, nr; uint32 vreg; Bits bit; if(first == 0) { fmtinstall('Q', Qconv); } first++; if(debug['K']) { if(first != 13) return; // debug['R'] = 2; // debug['P'] = 2; print("optimizing %S\n", curfn->nname->sym); } // count instructions nr = 0; for(p=firstp; p!=P; p=p->link) nr++; // if too big dont bother if(nr >= 10000) { // print("********** %S is too big (%d)\n", curfn->nname->sym, nr); return; } r1 = R; firstr = R; lastr = R; nvar = 0; regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ nr = 0; for(p=firstp; p != P; p = p->link) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); nr++; if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; p->regp = r; r1 = r->p1; if(r1 != R) { switch(r1->prog->as) { case ARET: case AB: case ARFE: r->p1 = R; r1->s1 = R; } } /* * left side always read */ bit = mkvar(r, &p->from); for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; /* * right side depends on opcode */ bit = mkvar(r, &p->to); if(bany(&bit)) switch(p->as) { default: yyerror("reg: unknown op: %A", p->as); break; /* * right side write */ case ANOP: case AMOVB: case AMOVBU: case AMOVH: case AMOVHU: case AMOVW: case AMOVF: case AMOVD: for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; break; /* * funny */ case ABL: setaddrs(bit); break; } if(p->as == AMOVM) { z = p->to.offset; if(p->from.type == D_CONST) z = p->from.offset; for(i=0; z; i++) { if(z&1) regbits |= RtoB(i); z >>= 1; } } }