void nilopt(Prog *firstp) { NilFlow *r; Prog *p; Graph *g; int ncheck, nkill; g = flowstart(firstp, sizeof(NilFlow)); if(g == nil) return; if(debug_checknil > 1 /* || strcmp(curfn->nname->sym->name, "f1") == 0 */) dumpit("nilopt", g->start, 0); ncheck = 0; nkill = 0; for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { p = r->f.prog; if(p->as != ACHECKNIL || !regtyp(&p->from)) continue; ncheck++; if(stackaddr(&p->from)) { if(debug_checknil && p->lineno > 1) warnl(p->lineno, "removed nil check of SP address"); r->kill = 1; continue; } nilwalkfwd(r); if(r->kill) { if(debug_checknil && p->lineno > 1) warnl(p->lineno, "removed nil check before indirect"); continue; } nilwalkback(r); if(r->kill) { if(debug_checknil && p->lineno > 1) warnl(p->lineno, "removed repeated nil check"); continue; } } for(r = (NilFlow*)g->start; r != nil; r = (NilFlow*)r->f.link) { if(r->kill) { nkill++; excise(&r->f); } } flowend(g); if(debug_checknil > 1) print("%S: removed %d of %d nil checks\n", curfn->nname->sym, nkill, ncheck); }
static void conprop(Reg *r0) { Reg *r; Prog *p, *p0; int t; Adr *v0; p0 = r0->prog; v0 = &p0->to; r = r0; loop: r = uniqs(r); if(r == R || r == r0) return; if(uniqp(r) == R) return; p = r->prog; t = copyu(p, v0, A); switch(t) { case 0: // miss case 1: // use goto loop; case 2: // rar case 4: // use and set break; case 3: // set if(p->as == p0->as) if(p->from.type == p0->from.type) if(p->from.sym == p0->from.sym) if(p->from.offset == p0->from.offset) if(p->from.scale == p0->from.scale) if(p->from.dval == p0->from.dval) if(p->from.index == p0->from.index) { excise(r); t++; goto loop; } break; } }
static void conprop(Flow *r0) { Flow *r; Prog *p, *p0; int t; Adr *v0; p0 = r0->prog; v0 = &p0->to; r = r0; loop: r = uniqs(r); if(r == nil || r == r0) return; if(uniqp(r) == nil) return; p = r->prog; t = copyu(p, v0, nil); switch(t) { case 0: // miss case 1: // use goto loop; case 2: // rar case 4: // use and set break; case 3: // set if(p->as == p0->as) if(p->from.type == p0->from.type) if(p->from.node == p0->from.node) if(p->from.offset == p0->from.offset) if(p->from.scale == p0->from.scale) if(p->from.type == D_FCONST && p->from.u.dval == p0->from.u.dval) if(p->from.index == p0->from.index) { excise(r); goto loop; } break; } }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } loop1: t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVW || p->as == AFMOVF || p->as == AFMOVD) if(regtyp(&p->to)) { if(regtyp(&p->from)) if(p->from.type == p->to.type) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } if(regzer(&p->from)) if(p->to.type == D_REG) { p->from.type = D_REG; p->from.reg = 0; if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } } } if(t) goto loop1; /* * look for MOVB x,R; MOVB R,R */ for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { default: continue; case AMOVH: case AMOVHU: case AMOVB: case AMOVBU: if(p->to.type != D_REG) continue; break; } r1 = r->link; if(r1 == R) continue; p1 = r1->prog; if(p1->as != p->as) continue; if(p1->from.type != D_REG || p1->from.reg != p->to.reg) continue; if(p1->to.type != D_REG || p1->to.reg != p->to.reg) continue; excise(r1); } }
int xtramodes(Reg *r, Adr *a) { Reg *r1, *r2, *r3; Prog *p, *p1; Adr v; p = r->prog; if(debug['h'] && p->as == AMOVB && p->from.type == D_OREG) /* byte load */ return 0; v = *a; v.type = D_REG; r1 = findpre(r, &v); if(r1 != R) { p1 = r1->prog; if(p1->to.type == D_REG && p1->to.reg == v.reg) switch(p1->as) { case AADD: if(p1->from.type == D_REG || (p1->from.type == D_SHIFT && (p1->from.offset&(1<<4)) == 0 && (p->as != AMOVB || (a == &p->from && (p1->from.offset&~0xf) == 0))) || (p1->from.type == D_CONST && p1->from.offset > -4096 && p1->from.offset < 4096)) if(nochange(uniqs(r1), r, p1)) { if(a != &p->from || v.reg != p->to.reg) if (finduse(r->s1, &v)) { if(p1->reg == NREG || p1->reg == v.reg) /* pre-indexing */ p->scond |= C_WBIT; else return 0; } switch (p1->from.type) { case D_REG: /* register offset */ a->type = D_SHIFT; a->offset = p1->from.reg; break; case D_SHIFT: /* scaled register offset */ a->type = D_SHIFT; case D_CONST: /* immediate offset */ a->offset = p1->from.offset; break; } if(p1->reg != NREG) a->reg = p1->reg; excise(r1); return 1; } break; case AMOVW: if(p1->from.type == D_REG) if((r2 = findinc(r1, r, &p1->from)) != R) { for(r3=uniqs(r2); r3->prog->as==ANOP; r3=uniqs(r3)) ; if(r3 == r) { /* post-indexing */ p1 = r2->prog; a->reg = p1->to.reg; a->offset = p1->from.offset; p->scond |= C_PBIT; if(!finduse(r, &r1->prog->to)) excise(r1); excise(r2); return 1; } } break; } } if(a != &p->from || a->reg != p->to.reg) if((r1 = findinc(r, R, &v)) != R) { /* post-indexing */ p1 = r1->prog; a->offset = p1->from.offset; p->scond |= C_PBIT; excise(r1); return 1; } return 0; }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } loop1: t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == ASLL || p->as == ASRL || p->as == ASRA) { /* * elide shift into D_SHIFT operand of subsequent instruction */ if(shiftprop(r)) { excise(r); t++; } } if(p->as == AMOVW || p->as == AMOVF || p->as == AMOVD) if(regtyp(&p->to)) { if(p->from.type == D_CONST) constprop(&p->from, &p->to, r->s1); else if(regtyp(&p->from)) if(p->from.type == p->to.type) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } } } if(t) goto loop1; /* * look for MOVB x,R; MOVB R,R */ for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { default: continue; case AEOR: /* * EOR -1,x,y => MVN x,y */ if(p->from.type == D_CONST && p->from.offset == -1) { p->as = AMVN; p->from.type = D_REG; if(p->reg != NREG) p->from.reg = p->reg; else p->from.reg = p->to.reg; p->reg = NREG; } continue; case AMOVH: case AMOVHU: case AMOVB: case AMOVBU: if(p->to.type != D_REG) continue; break; } r1 = r->link; if(r1 == R) continue; p1 = r1->prog; if(p1->as != p->as) continue; if(p1->from.type != D_REG || p1->from.reg != p->to.reg) continue; if(p1->to.type != D_REG || p1->to.reg != p->to.reg) continue; excise(r1); } for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVW: case AMOVB: case AMOVBU: if(p->from.type == D_OREG && p->from.offset == 0) xtramodes(r, &p->from); else if(p->to.type == D_OREG && p->to.offset == 0) xtramodes(r, &p->to); else continue; break; case ACMP: /* * elide CMP $0,x if calculation of x can set condition codes */ if(p->from.type != D_CONST || p->from.offset != 0) continue; r2 = r->s1; if(r2 == R) continue; t = r2->prog->as; switch(t) { default: continue; case ABEQ: case ABNE: case ABMI: case ABPL: break; case ABGE: t = ABPL; break; case ABLT: t = ABMI; break; case ABHI: t = ABNE; break; case ABLS: t = ABEQ; break; } r1 = r; do r1 = uniqp(r1); while (r1 != R && r1->prog->as == ANOP); if(r1 == R) continue; p1 = r1->prog; if(p1->to.type != D_REG) continue; if(p1->to.reg != p->reg) if(!(p1->as == AMOVW && p1->from.type == D_REG && p1->from.reg == p->reg)) continue; switch(p1->as) { default: continue; case AMOVW: if(p1->from.type != D_REG) continue; case AAND: case AEOR: case AORR: case ABIC: case AMVN: case ASUB: case ARSB: case AADD: case AADC: case ASBC: case ARSC: break; } p1->scond |= C_SBIT; r2->prog->as = t; excise(r); continue; } } predicate(); }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } // byte, word arithmetic elimination. elimshortmov(r); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: if(regtyp(&p->to)) if(p->from.sym != S) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } }
/* This function is called once for every file system object that fts encounters. fts performs a depth-first traversal. A directory is usually processed twice, first with fts_info == FTS_D, and later, after all of its entries have been processed, with FTS_DP. Return RM_ERROR upon error, RM_USER_DECLINED for a negative response to an interactive prompt, and otherwise, RM_OK. */ static enum RM_status rm_fts (FTS *fts, FTSENT *ent, struct rm_options const *x) { switch (ent->fts_info) { case FTS_D: /* preorder directory */ if (! x->recursive && !(x->remove_empty_directories && is_empty_dir (fts->fts_cwd_fd, ent->fts_accpath))) { /* This is the first (pre-order) encounter with a directory that we cannot delete. Not recursive, and it's not an empty directory (if we're removing them) so arrange to skip contents. */ int err = x->remove_empty_directories ? ENOTEMPTY : EISDIR; error (0, err, _("cannot remove %s"), quote (ent->fts_path)); mark_ancestor_dirs (ent); fts_skip_tree (fts, ent); return RM_ERROR; } /* Perform checks that can apply only for command-line arguments. */ if (ent->fts_level == FTS_ROOTLEVEL) { if (strip_trailing_slashes (ent->fts_path)) ent->fts_pathlen = strlen (ent->fts_path); /* If the basename of a command line argument is "." or "..", diagnose it and do nothing more with that argument. */ if (dot_or_dotdot (last_component (ent->fts_accpath))) { error (0, 0, _("cannot remove directory: %s"), quote (ent->fts_path)); fts_skip_tree (fts, ent); return RM_ERROR; } /* If a command line argument resolves to "/" (and --preserve-root is in effect -- default) diagnose and skip it. */ if (ROOT_DEV_INO_CHECK (x->root_dev_ino, ent->fts_statp)) { ROOT_DEV_INO_WARN (ent->fts_path); fts_skip_tree (fts, ent); return RM_ERROR; } } { Ternary is_empty_directory; enum RM_status s = prompt (fts, ent, true /*is_dir*/, x, PA_DESCEND_INTO_DIR, &is_empty_directory); if (s == RM_OK && is_empty_directory == T_YES) { /* When we know (from prompt when in interactive mode) that this is an empty directory, don't prompt twice. */ s = excise (fts, ent, x, true); fts_skip_tree (fts, ent); } if (s != RM_OK) { mark_ancestor_dirs (ent); fts_skip_tree (fts, ent); } return s; } case FTS_F: /* regular file */ case FTS_NS: /* stat(2) failed */ case FTS_SL: /* symbolic link */ case FTS_SLNONE: /* symbolic link without target */ case FTS_DP: /* postorder directory */ case FTS_DNR: /* unreadable directory */ case FTS_NSOK: /* e.g., dangling symlink */ case FTS_DEFAULT: /* none of the above */ { /* With --one-file-system, do not attempt to remove a mount point. fts' FTS_XDEV ensures that we don't process any entries under the mount point. */ if (ent->fts_info == FTS_DP && x->one_file_system && FTS_ROOTLEVEL < ent->fts_level && ent->fts_statp->st_dev != fts->fts_dev) { mark_ancestor_dirs (ent); error (0, 0, _("skipping %s, since it's on a different device"), quote (ent->fts_path)); return RM_ERROR; } bool is_dir = ent->fts_info == FTS_DP || ent->fts_info == FTS_DNR; enum RM_status s = prompt (fts, ent, is_dir, x, PA_REMOVE_DIR, NULL); if (s != RM_OK) return s; return excise (fts, ent, x, is_dir); } case FTS_DC: /* directory that causes cycles */ emit_cycle_warning (ent->fts_path); fts_skip_tree (fts, ent); return RM_ERROR; case FTS_ERR: /* Various failures, from opendir to ENOMEM, to failure to "return" to preceding directory, can provoke this. */ error (0, ent->fts_errno, _("traversal failed: %s"), quote (ent->fts_path)); fts_skip_tree (fts, ent); return RM_ERROR; default: error (0, 0, _("unexpected failure: fts_info=%d: %s\n" "please report to %s"), ent->fts_info, quote (ent->fts_path), PACKAGE_BUGREPORT); abort (); } }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: case ALOCALS: case ATYPE: p = p->link; } } // byte, word arithmetic elimination. elimshortmov(r); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; Graph *g; int i, z; uint32 vreg; Bits bit; ProgInfo info; if(first) { fmtinstall('Q', Qconv); first = 0; } fixjmp(firstp); mergetemp(firstp); /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) { if(regnodes[i] == N) regnodes[i] = newname(lookup(regname[i])); var[i].node = regnodes[i]; } regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ g = flowstart(firstp, sizeof(Reg)); if(g == nil) return; firstr = (Reg*)g->start; for(r = firstr; r != R; r = (Reg*)r->f.link) { p = r->f.prog; proginfo(&info, p); // Avoid making variables for direct-called functions. if(p->as == ABL && p->to.type == D_EXTERN) continue; bit = mkvar(r, &p->from); if(info.flags & LeftRead) for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; if(info.flags & LeftAddr) setaddrs(bit); if(info.flags & RegRead) { if(p->from.type != D_FREG) r->use1.b[0] |= RtoB(p->reg); else r->use1.b[0] |= FtoB(p->reg); } if(info.flags & (RightAddr | RightRead | RightWrite)) { bit = mkvar(r, &p->to); if(info.flags & RightAddr) setaddrs(bit); if(info.flags & RightRead) for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; if(info.flags & RightWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } } if(firstr == R) return; for(i=0; i<nvar; i++) { Var *v = var+i; if(v->addr) { bit = blsh(i); for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; } if(debug['R'] && debug['v']) print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", i, v->addr, v->etype, v->width, v->node, v->offset); } if(debug['R'] && debug['v']) dumpit("pass1", &firstr->f, 1); /* * pass 2 * find looping structure */ flowrpo(g); if(debug['R'] && debug['v']) dumpit("pass2", &firstr->f, 1); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) if(r->f.prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = (Reg*)r->f.link; if(r1 && r1->f.active && !r->f.active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; if(debug['R'] && debug['v']) dumpit("pass3", &firstr->f, 1); /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; synch(firstr, zbits); if(change) goto loop2; addsplits(); if(debug['R'] && debug['v']) dumpit("pass4", &firstr->f, 1); if(debug['R'] > 1) { print("\nprop structure:\n"); for(r = firstr; r != R; r = (Reg*)r->f.link) { print("%d:%P", r->f.loop, r->f.prog); for(z=0; z<BITS; z++) { bit.b[z] = r->set.b[z] | r->refahead.b[z] | r->calahead.b[z] | r->refbehind.b[z] | r->calbehind.b[z] | r->use1.b[z] | r->use2.b[z]; bit.b[z] &= ~addrs.b[z]; } if(bany(&bit)) { print("\t"); if(bany(&r->use1)) print(" u1=%Q", r->use1); if(bany(&r->use2)) print(" u2=%Q", r->use2); if(bany(&r->set)) print(" st=%Q", r->set); if(bany(&r->refahead)) print(" ra=%Q", r->refahead); if(bany(&r->calahead)) print(" ca=%Q", r->calahead); if(bany(&r->refbehind)) print(" rb=%Q", r->refbehind); if(bany(&r->calbehind)) print(" cb=%Q", r->calbehind); } print("\n"); } } /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = (Reg*)r->f.link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } if(debug['R'] && debug['v']) dumpit("pass4.5", &firstr->f, 1); /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) & !r->f.refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; } } for(r = firstr; r != R; r = (Reg*)r->f.link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->f.refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; excise(&r->f); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; if(debug['R'] > 1) print("\n"); paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) { if(debug['R']) print("%L $%d: %Q\n", r->f.prog->lineno, change, blsh(i)); continue; } rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] > 1) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); if(debug['R'] && debug['v']) dumpit("pass5", &firstr->f, 1); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(debug['R']) { if(rgp->regno >= NREG) print("%L $%d F%d: %Q\n", rgp->enter->f.prog->lineno, rgp->cost, rgp->regno-NREG, bit); else print("%L $%d R%d: %Q\n", rgp->enter->f.prog->lineno, rgp->cost, rgp->regno, bit); } if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } if(debug['R'] && debug['v']) dumpit("pass6", &firstr->f, 1); /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) { peep(firstp); } if(debug['R'] && debug['v']) dumpit("pass7", &firstr->f, 1); /* * last pass * eliminate nops * free aux structures * adjust the stack pointer * MOVW.W R1,-12(R13) <<- start * MOVW R0,R1 * MOVW R1,8(R13) * MOVW $0,R1 * MOVW R1,4(R13) * BL ,runtime.newproc+0(SB) * MOVW &ft+-32(SP),R7 <<- adjust * MOVW &j+-40(SP),R6 <<- adjust * MOVW autotmp_0003+-24(SP),R5 <<- adjust * MOVW $12(R13),R13 <<- finish */ vreg = 0; for(p = firstp; p != P; p = p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.u.branch != P && p->to.u.branch->as == ANOP) p->to.u.branch = p->to.u.branch->link; if(p->as == AMOVW && p->to.reg == 13) { if(p->scond & C_WBIT) { vreg = -p->to.offset; // in adjust region // print("%P adjusting %d\n", p, vreg); continue; } if(p->from.type == D_CONST && p->to.type == D_REG) { if(p->from.offset != vreg) print("in and out different\n"); // print("%P finish %d\n", p, vreg); vreg = 0; // done adjust region continue; } // print("%P %d %d from type\n", p, p->from.type, D_CONST); // print("%P %d %d to type\n\n", p, p->to.type, D_REG); } if(p->as == AMOVW && vreg != 0) { if(p->from.sym != S) if(p->from.name == D_AUTO || p->from.name == D_PARAM) { p->from.offset += vreg; // print("%P adjusting from %d %d\n", p, vreg, p->from.type); } if(p->to.sym != S) if(p->to.name == D_AUTO || p->to.name == D_PARAM) { p->to.offset += vreg; // print("%P adjusting to %d %d\n", p, vreg, p->from.type); } } } flowend(g); }
void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } flowend(g); }
void mergetemp(Prog *firstp) { int i, j, nvar, ninuse, nfree, nkill; TempVar *var, *v, *v1, **bystart, **inuse; TempFlow *r; NodeList *l, **lp; Node *n; Prog *p, *p1; Type *t; ProgInfo info, info1; int32 gen; Graph *g; enum { Debug = 0 }; g = flowstart(firstp, sizeof(TempFlow)); if(g == nil) return; // Build list of all mergeable variables. nvar = 0; for(l = curfn->dcl; l != nil; l = l->next) if(canmerge(l->n)) nvar++; var = calloc(nvar*sizeof var[0], 1); nvar = 0; for(l = curfn->dcl; l != nil; l = l->next) { n = l->n; if(canmerge(n)) { v = &var[nvar++]; n->opt = v; v->node = n; } } // Build list of uses. // We assume that the earliest reference to a temporary is its definition. // This is not true of variables in general but our temporaries are all // single-use (that's why we have so many!). for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { p = r->f.prog; proginfo(&info, p); if(p->from.node != N && p->from.node->opt && p->to.node != N && p->to.node->opt) fatal("double node %P", p); if((n = p->from.node) != N && (v = n->opt) != nil || (n = p->to.node) != N && (v = n->opt) != nil) { if(v->def == nil) v->def = r; r->uselink = v->use; v->use = r; if(n == p->from.node && (info.flags & LeftAddr)) v->addr = 1; } } if(Debug > 1) dumpit("before", g->start, 0); nkill = 0; // Special case. for(v = var; v < var+nvar; v++) { if(v->addr) continue; // Used in only one instruction, which had better be a write. if((r = v->use) != nil && r->uselink == nil) { p = r->f.prog; proginfo(&info, p); if(p->to.node == v->node && (info.flags & RightWrite) && !(info.flags & RightRead)) { p->as = ANOP; p->to = zprog.to; v->removed = 1; if(Debug) print("drop write-only %S\n", v->node->sym); } else fatal("temp used and not set: %P", p); nkill++; continue; } // Written in one instruction, read in the next, otherwise unused, // no jumps to the next instruction. Happens mainly in 386 compiler. if((r = v->use) != nil && r->f.link == &r->uselink->f && r->uselink->uselink == nil && uniqp(r->f.link) == &r->f) { p = r->f.prog; proginfo(&info, p); p1 = r->f.link->prog; proginfo(&info1, p1); enum { SizeAny = SizeB | SizeW | SizeL | SizeQ | SizeF | SizeD, }; if(p->from.node == v->node && p1->to.node == v->node && (info.flags & Move) && !((info.flags|info1.flags) & (LeftAddr|RightAddr)) && (info.flags & SizeAny) == (info1.flags & SizeAny)) { p1->from = p->from; excise(&r->f); v->removed = 1; if(Debug) print("drop immediate-use %S\n", v->node->sym); } nkill++; continue; } } // Traverse live range of each variable to set start, end. // Each flood uses a new value of gen so that we don't have // to clear all the r->f.active words after each variable. gen = 0; for(v = var; v < var+nvar; v++) { gen++; for(r = v->use; r != nil; r = r->uselink) mergewalk(v, r, gen); } // Sort variables by start. bystart = malloc(nvar*sizeof bystart[0]); for(i=0; i<nvar; i++) bystart[i] = &var[i]; qsort(bystart, nvar, sizeof bystart[0], startcmp); // List of in-use variables, sorted by end, so that the ones that // will last the longest are the earliest ones in the array. // The tail inuse[nfree:] holds no-longer-used variables. // In theory we should use a sorted tree so that insertions are // guaranteed O(log n) and then the loop is guaranteed O(n log n). // In practice, it doesn't really matter. inuse = malloc(nvar*sizeof inuse[0]); ninuse = 0; nfree = nvar; for(i=0; i<nvar; i++) { v = bystart[i]; if(v->addr || v->removed) continue; // Expire no longer in use. while(ninuse > 0 && inuse[ninuse-1]->end < v->start) { v1 = inuse[--ninuse]; inuse[--nfree] = v1; } // Find old temp to reuse if possible. t = v->node->type; for(j=nfree; j<nvar; j++) { v1 = inuse[j]; if(eqtype(t, v1->node->type)) { inuse[j] = inuse[nfree++]; if(v1->merge) v->merge = v1->merge; else v->merge = v1; nkill++; break; } } // Sort v into inuse. j = ninuse++; while(j > 0 && inuse[j-1]->end < v->end) { inuse[j] = inuse[j-1]; j--; } inuse[j] = v; } if(Debug) { print("%S [%d - %d]\n", curfn->nname->sym, nvar, nkill); for(v=var; v<var+nvar; v++) { print("var %#N %T %lld-%lld", v->node, v->node->type, v->start, v->end); if(v->addr) print(" addr=1"); if(v->removed) print(" dead=1"); if(v->merge) print(" merge %#N", v->merge->node); if(v->start == v->end) print(" %P", v->def->f.prog); print("\n"); } if(Debug > 1) dumpit("after", g->start, 0); } // Update node references to use merged temporaries. for(r = (TempFlow*)g->start; r != nil; r = (TempFlow*)r->f.link) { p = r->f.prog; if((n = p->from.node) != N && (v = n->opt) != nil && v->merge != nil) p->from.node = v->merge->node; if((n = p->to.node) != N && (v = n->opt) != nil && v->merge != nil) p->to.node = v->merge->node; } // Delete merged nodes from declaration list. for(lp = &curfn->dcl; (l = *lp); ) { curfn->dcl->end = l; n = l->n; v = n->opt; if(v && (v->merge || v->removed)) { *lp = l->next; continue; } lp = &l->next; } // Clear aux structures. for(v=var; v<var+nvar; v++) v->node->opt = nil; free(var); free(bystart); free(inuse); flowend(g); }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } loop1: t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->as == AMOVW || p->as == AFMOVS || p->as == AFMOVD) if(regtyp(&p->to)) { if(regtyp(&p->from)) if(p->from.type == p->to.type) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } if(regzer(&p->from)) if(p->to.type == D_REG) { p->from.type = D_REG; p->from.reg = REGZERO; if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } } } if(t) goto loop1; /* * look for MOVB x,R; MOVB R,R */ for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { default: continue; case AMOVH: case AMOVHZ: case AMOVB: case AMOVBZ: if(p->to.type != D_REG) continue; break; } r1 = r->link; if(r1 == R) continue; p1 = r1->prog; if(p1->as != p->as) continue; if(p1->from.type != D_REG || p1->from.reg != p->to.reg) continue; if(p1->to.type != D_REG || p1->to.reg != p->to.reg) continue; excise(r1); } if(debug['Q'] > 1) return; /* allow following code improvement to be suppressed */ /* * look for OP x,y,R; CMP R, $0 -> OPCC x,y,R * when OP can set condition codes correctly */ for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ACMP: if(!regzer(&p->to)) continue; r1 = r->s1; if(r1 == R) continue; switch(r1->prog->as) { default: continue; case ABCL: case ABC: /* the conditions can be complex and these are currently little used */ continue; case ABEQ: case ABGE: case ABGT: case ABLE: case ABLT: case ABNE: case ABVC: case ABVS: break; } r1 = r; do r1 = uniqp(r1); while (r1 != R && r1->prog->as == ANOP); if(r1 == R) continue; p1 = r1->prog; if(p1->to.type != D_REG || p1->to.reg != p->from.reg) continue; switch(p1->as) { case ASUB: case AADD: case AXOR: case AOR: /* irregular instructions */ if(p1->from.type == D_CONST) continue; break; } switch(p1->as) { default: continue; case AMOVW: if(p1->from.type != D_REG) continue; continue; case AANDCC: case AANDNCC: case AORCC: case AORNCC: case AXORCC: case ASUBCC: case AADDCC: t = p1->as; break; /* don't deal with floating point instructions for now */ /* case AFABS: t = AFABSCC; break; case AFADD: t = AFADDCC; break; case AFADDS: t = AFADDSCC; break; case AFCTIW: t = AFCTIWCC; break; case AFCTIWZ: t = AFCTIWZCC; break; case AFDIV: t = AFDIVCC; break; case AFDIVS: t = AFDIVSCC; break; case AFMADD: t = AFMADDCC; break; case AFMADDS: t = AFMADDSCC; break; case AFMOVD: t = AFMOVDCC; break; case AFMSUB: t = AFMSUBCC; break; case AFMSUBS: t = AFMSUBSCC; break; case AFMUL: t = AFMULCC; break; case AFMULS: t = AFMULSCC; break; case AFNABS: t = AFNABSCC; break; case AFNEG: t = AFNEGCC; break; case AFNMADD: t = AFNMADDCC; break; case AFNMADDS: t = AFNMADDSCC; break; case AFNMSUB: t = AFNMSUBCC; break; case AFNMSUBS: t = AFNMSUBSCC; break; case AFRSP: t = AFRSPCC; break; case AFSUB: t = AFSUBCC; break; case AFSUBS: t = AFSUBSCC; break; case ACNTLZW: t = ACNTLZWCC; break; case AMTFSB0: t = AMTFSB0CC; break; case AMTFSB1: t = AMTFSB1CC; break; */ case AADD: t = AADDCC; break; case AADDV: t = AADDVCC; break; case AADDC: t = AADDCCC; break; case AADDCV: t = AADDCVCC; break; case AADDME: t = AADDMECC; break; case AADDMEV: t = AADDMEVCC; break; case AADDE: t = AADDECC; break; case AADDEV: t = AADDEVCC; break; case AADDZE: t = AADDZECC; break; case AADDZEV: t = AADDZEVCC; break; case AAND: t = AANDCC; break; case AANDN: t = AANDNCC; break; case ADIVW: t = ADIVWCC; break; case ADIVWV: t = ADIVWVCC; break; case ADIVWU: t = ADIVWUCC; break; case ADIVWUV: t = ADIVWUVCC; break; case AEQV: t = AEQVCC; break; case AEXTSB: t = AEXTSBCC; break; case AEXTSH: t = AEXTSHCC; break; case AMULHW: t = AMULHWCC; break; case AMULHWU: t = AMULHWUCC; break; case AMULLW: t = AMULLWCC; break; case AMULLWV: t = AMULLWVCC; break; case ANAND: t = ANANDCC; break; case ANEG: t = ANEGCC; break; case ANEGV: t = ANEGVCC; break; case ANOR: t = ANORCC; break; case AOR: t = AORCC; break; case AORN: t = AORNCC; break; case AREM: t = AREMCC; break; case AREMV: t = AREMVCC; break; case AREMU: t = AREMUCC; break; case AREMUV: t = AREMUVCC; break; case ARLWMI: t = ARLWMICC; break; case ARLWNM: t = ARLWNMCC; break; case ASLW: t = ASLWCC; break; case ASRAW: t = ASRAWCC; break; case ASRW: t = ASRWCC; break; case ASUB: t = ASUBCC; break; case ASUBV: t = ASUBVCC; break; case ASUBC: t = ASUBCCC; break; case ASUBCV: t = ASUBCVCC; break; case ASUBME: t = ASUBMECC; break; case ASUBMEV: t = ASUBMEVCC; break; case ASUBE: t = ASUBECC; break; case ASUBEV: t = ASUBEVCC; break; case ASUBZE: t = ASUBZECC; break; case ASUBZEV: t = ASUBZEVCC; break; case AXOR: t = AXORCC; break; break; } if(debug['Q']) print("cmp %P; %P -> ", p1, p); p1->as = t; if(debug['Q']) print("%P\n", p1); excise(r); continue; } } }
void peep(Prog *firstp) { Flow *r; Graph *g; Prog *p; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ASLL: case ASRL: case ASRA: /* * elide shift into D_SHIFT operand of subsequent instruction */ // if(shiftprop(r)) { // excise(r); // t++; // break; // } break; case AMOVB: case AMOVH: case AMOVW: case AMOVF: case AMOVD: if(regtyp(&p->from)) if(p->from.type == p->to.type) if(p->scond == C_SCOND_NONE) { if(copyprop(g, r)) { excise(r); t++; break; } if(subprop(r) && copyprop(g, r)) { excise(r); t++; break; } } break; case AMOVHS: case AMOVHU: case AMOVBS: case AMOVBU: if(p->from.type == D_REG) { if(shortprop(r)) t++; } break; #ifdef NOTDEF if(p->scond == C_SCOND_NONE) if(regtyp(&p->to)) if(isdconst(&p->from)) { constprop(&p->from, &p->to, r->s1); } break; #endif } } if(t) goto loop1; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AEOR: /* * EOR -1,x,y => MVN x,y */ if(isdconst(&p->from) && p->from.offset == -1) { p->as = AMVN; p->from.type = D_REG; if(p->reg != NREG) p->from.reg = p->reg; else p->from.reg = p->to.reg; p->reg = NREG; } break; } } for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVW: case AMOVB: case AMOVBS: case AMOVBU: if(p->from.type == D_OREG && p->from.offset == 0) xtramodes(g, r, &p->from); else if(p->to.type == D_OREG && p->to.offset == 0) xtramodes(g, r, &p->to); else continue; break; // case ACMP: // /* // * elide CMP $0,x if calculation of x can set condition codes // */ // if(isdconst(&p->from) || p->from.offset != 0) // continue; // r2 = r->s1; // if(r2 == nil) // continue; // t = r2->prog->as; // switch(t) { // default: // continue; // case ABEQ: // case ABNE: // case ABMI: // case ABPL: // break; // case ABGE: // t = ABPL; // break; // case ABLT: // t = ABMI; // break; // case ABHI: // t = ABNE; // break; // case ABLS: // t = ABEQ; // break; // } // r1 = r; // do // r1 = uniqp(r1); // while (r1 != nil && r1->prog->as == ANOP); // if(r1 == nil) // continue; // p1 = r1->prog; // if(p1->to.type != D_REG) // continue; // if(p1->to.reg != p->reg) // if(!(p1->as == AMOVW && p1->from.type == D_REG && p1->from.reg == p->reg)) // continue; // // switch(p1->as) { // default: // continue; // case AMOVW: // if(p1->from.type != D_REG) // continue; // case AAND: // case AEOR: // case AORR: // case ABIC: // case AMVN: // case ASUB: // case ARSB: // case AADD: // case AADC: // case ASBC: // case ARSC: // break; // } // p1->scond |= C_SBIT; // r2->prog->as = t; // excise(r); // continue; } } // predicate(g); flowend(g); }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; p1 = nil; USED(p1); // ... in unreachable code... /* * complete R structure */ for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } //dumpit("begin", firstr); loop1: t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ASLL: case ASRL: case ASRA: /* * elide shift into D_SHIFT operand of subsequent instruction */ // if(shiftprop(r)) { // excise(r); // t++; // break; // } break; case AMOVW: case AMOVF: case AMOVD: if(regtyp(&p->from)) if(p->from.type == p->to.type) if(p->scond == C_SCOND_NONE) { if(copyprop(r)) { excise(r); t++; break; } if(subprop(r) && copyprop(r)) { excise(r); t++; break; } } break; #ifdef NOTDEF if(p->scond == C_SCOND_NONE) if(regtyp(&p->to)) if(isdconst(&p->from)) { constprop(&p->from, &p->to, r->s1); } break; #endif } } if(t) goto loop1; return; #ifdef NOTDEF for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { // case AEOR: // /* // * EOR -1,x,y => MVN x,y // */ // if(isdconst(&p->from) && p->from.offset == -1) { // p->as = AMVN; // p->from.type = D_REG; // if(p->reg != NREG) // p->from.reg = p->reg; // else // p->from.reg = p->to.reg; // p->reg = NREG; // } // break; case AMOVH: case AMOVHU: case AMOVB: case AMOVBU: /* * look for MOVB x,R; MOVB R,R */ if(p->to.type != D_REG) break; if(r1 == R) break; p1 = r1->prog; if(p1->as != p->as) break; if(p1->from.type != D_REG || p1->from.reg != p->to.reg) break; if(p1->to.type != D_REG || p1->to.reg != p->to.reg) break; excise(r1); break; } r1 = r->link; } // for(r=firstr; r!=R; r=r->link) { // p = r->prog; // switch(p->as) { // case AMOVW: // case AMOVB: // case AMOVBU: // if(p->from.type == D_OREG && p->from.offset == 0) // xtramodes(r, &p->from); // else // if(p->to.type == D_OREG && p->to.offset == 0) // xtramodes(r, &p->to); // else // continue; // break; // case ACMP: // /* // * elide CMP $0,x if calculation of x can set condition codes // */ // if(isdconst(&p->from) || p->from.offset != 0) // continue; // r2 = r->s1; // if(r2 == R) // continue; // t = r2->prog->as; // switch(t) { // default: // continue; // case ABEQ: // case ABNE: // case ABMI: // case ABPL: // break; // case ABGE: // t = ABPL; // break; // case ABLT: // t = ABMI; // break; // case ABHI: // t = ABNE; // break; // case ABLS: // t = ABEQ; // break; // } // r1 = r; // do // r1 = uniqp(r1); // while (r1 != R && r1->prog->as == ANOP); // if(r1 == R) // continue; // p1 = r1->prog; // if(p1->to.type != D_REG) // continue; // if(p1->to.reg != p->reg) // if(!(p1->as == AMOVW && p1->from.type == D_REG && p1->from.reg == p->reg)) // continue; // // switch(p1->as) { // default: // continue; // case AMOVW: // if(p1->from.type != D_REG) // continue; // case AAND: // case AEOR: // case AORR: // case ABIC: // case AMVN: // case ASUB: // case ARSB: // case AADD: // case AADC: // case ASBC: // case ARSC: // break; // } // p1->scond |= C_SBIT; // r2->prog->as = t; // excise(r); // continue; // } // } predicate(); #endif }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; int i, z, nr; uint32 vreg; Bits bit; if(first) { fmtinstall('Q', Qconv); exregoffset = D_DI; // no externals first = 0; } fixjmp(firstp); // count instructions nr = 0; for(p=firstp; p!=P; p=p->link) nr++; // if too big dont bother if(nr >= 10000) { // print("********** %S is too big (%d)\n", curfn->nname->sym, nr); return; } r1 = R; firstr = R; lastr = R; /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) var[i].node = newname(lookup(regname[i])); regbits = RtoB(D_SP); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ nr = 0; for(p=firstp; p!=P; p=p->link) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); nr++; if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; p->reg = r; r1 = r->p1; if(r1 != R) { switch(r1->prog->as) { case ARET: case AJMP: case AIRETL: r->p1 = R; r1->s1 = R; } } bit = mkvar(r, &p->from); if(bany(&bit)) switch(p->as) { /* * funny */ case ALEAL: case AFMOVL: case AFMOVW: case AFMOVV: setaddrs(bit); break; /* * left side read */ default: for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; break; /* * left side read+write */ case AXCHGB: case AXCHGW: case AXCHGL: for(z=0; z<BITS; z++) { r->use1.b[z] |= bit.b[z]; r->set.b[z] |= bit.b[z]; } break; } bit = mkvar(r, &p->to); if(bany(&bit)) switch(p->as) { default: yyerror("reg: unknown op: %A", p->as); break; /* * right side read */ case ACMPB: case ACMPL: case ACMPW: case ATESTB: case ATESTL: case ATESTW: for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; break; /* * right side write */ case AFSTSW: case ALEAL: case ANOP: case AMOVL: case AMOVB: case AMOVW: case AMOVBLSX: case AMOVBLZX: case AMOVBWSX: case AMOVBWZX: case AMOVWLSX: case AMOVWLZX: case APOPL: for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; break; /* * right side read+write */ case AINCB: case AINCL: case AINCW: case ADECB: case ADECL: case ADECW: case AADDB: case AADDL: case AADDW: case AANDB: case AANDL: case AANDW: case ASUBB: case ASUBL: case ASUBW: case AORB: case AORL: case AORW: case AXORB: case AXORL: case AXORW: case ASALB: case ASALL: case ASALW: case ASARB: case ASARL: case ASARW: case ARCLB: case ARCLL: case ARCLW: case ARCRB: case ARCRL: case ARCRW: case AROLB: case AROLL: case AROLW: case ARORB: case ARORL: case ARORW: case ASHLB: case ASHLL: case ASHLW: case ASHRB: case ASHRL: case ASHRW: case AIMULL: case AIMULW: case ANEGB: case ANEGL: case ANEGW: case ANOTB: case ANOTL: case ANOTW: case AADCL: case ASBBL: case ASETCC: case ASETCS: case ASETEQ: case ASETGE: case ASETGT: case ASETHI: case ASETLE: case ASETLS: case ASETLT: case ASETMI: case ASETNE: case ASETOC: case ASETOS: case ASETPC: case ASETPL: case ASETPS: case AXCHGB: case AXCHGW: case AXCHGL: for(z=0; z<BITS; z++) { r->set.b[z] |= bit.b[z]; r->use2.b[z] |= bit.b[z]; } break; /* * funny */ case AFMOVDP: case AFMOVFP: case AFMOVLP: case AFMOVVP: case AFMOVWP: case ACALL: setaddrs(bit); break; } switch(p->as) { case AIMULL: case AIMULW: if(p->to.type != D_NONE) break; case AIDIVL: case AIDIVW: case ADIVL: case ADIVW: case AMULL: case AMULW: r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX); r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DX); break; case AIDIVB: case AIMULB: case ADIVB: case AMULB: r->set.b[0] |= RtoB(D_AX); r->use1.b[0] |= RtoB(D_AX); break; case ACWD: r->set.b[0] |= RtoB(D_AX) | RtoB(D_DX); r->use1.b[0] |= RtoB(D_AX); break; case ACDQ: r->set.b[0] |= RtoB(D_DX); r->use1.b[0] |= RtoB(D_AX); break; case AREP: case AREPN: case ALOOP: case ALOOPEQ: case ALOOPNE: r->set.b[0] |= RtoB(D_CX); r->use1.b[0] |= RtoB(D_CX); break; case AMOVSB: case AMOVSL: case AMOVSW: case ACMPSB: case ACMPSL: case ACMPSW: r->set.b[0] |= RtoB(D_SI) | RtoB(D_DI); r->use1.b[0] |= RtoB(D_SI) | RtoB(D_DI); break; case ASTOSB: case ASTOSL: case ASTOSW: case ASCASB: case ASCASL: case ASCASW: r->set.b[0] |= RtoB(D_DI); r->use1.b[0] |= RtoB(D_AX) | RtoB(D_DI); break; case AINSB: case AINSL: case AINSW: r->set.b[0] |= RtoB(D_DX) | RtoB(D_DI); r->use1.b[0] |= RtoB(D_DI); break; case AOUTSB: case AOUTSL: case AOUTSW: r->set.b[0] |= RtoB(D_DI); r->use1.b[0] |= RtoB(D_DX) | RtoB(D_DI); break; } } if(firstr == R) return; for(i=0; i<nvar; i++) { Var *v = var+i; if(v->addr) { bit = blsh(i); for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; } // print("bit=%2d addr=%d et=%-6E w=%-2d s=%S + %lld\n", // i, v->addr, v->etype, v->width, v->sym, v->offset); } if(debug['R'] && debug['v']) dumpit("pass1", firstr); /* * pass 2 * turn branch references to pointers * build back pointers */ for(r=firstr; r!=R; r=r->link) { p = r->prog; if(p->to.type == D_BRANCH) { if(p->to.branch == P) fatal("pnil %P", p); r1 = p->to.branch->reg; if(r1 == R) fatal("rnil %P", p); if(r1 == r) { //fatal("ref to self %P", p); continue; } r->s2 = r1; r->p2link = r1->p2; r1->p2 = r; } } if(debug['R'] && debug['v']) dumpit("pass2", firstr); /* * pass 2.5 * find looping structure */ for(r = firstr; r != R; r = r->link) r->active = 0; change = 0; loopit(firstr, nr); if(debug['R'] && debug['v']) dumpit("pass2.5", firstr); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; for(r = firstr; r != R; r = r->link) if(r->prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = r->link; if(r1 && r1->active && !r->active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; if(debug['R'] && debug['v']) dumpit("pass3", firstr); /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; synch(firstr, zbits); if(change) goto loop2; if(debug['R'] && debug['v']) dumpit("pass4", firstr); /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = r->link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) && !r->refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->prog->lineno, bit); r->refset = 1; } } for(r = firstr; r != R; r = r->link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = r->link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->prog->lineno, bit); r->refset = 1; excise(r); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) continue; rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] && debug['v']) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } if(debug['R'] && debug['v']) dumpit("pass6", firstr); /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) { peep(); } /* * eliminate nops * free aux structures */ for(p=firstp; p!=P; p=p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.branch != P && p->to.branch->as == ANOP) p->to.branch = p->to.branch->link; } if(r1 != R) { r1->link = freer; freer = firstr; } if(debug['R']) { if(ostats.ncvtreg || ostats.nspill || ostats.nreload || ostats.ndelmov || ostats.nvar || ostats.naddr || 0) print("\nstats\n"); if(ostats.ncvtreg) print(" %4d cvtreg\n", ostats.ncvtreg); if(ostats.nspill) print(" %4d spill\n", ostats.nspill); if(ostats.nreload) print(" %4d reload\n", ostats.nreload); if(ostats.ndelmov) print(" %4d delmov\n", ostats.ndelmov); if(ostats.nvar) print(" %4d var\n", ostats.nvar); if(ostats.naddr) print(" %4d addr\n", ostats.naddr); memset(&ostats, 0, sizeof(ostats)); } }
void peep(Prog *firstp) { Flow *r, *r1; Graph *g; Prog *p, *p1; int t; g = flowstart(firstp, sizeof(Flow)); if(g == nil) return; for(r=g->start, t=0; r!=nil; r=r->link, t++) r->active = t; // byte, word arithmetic elimination. elimshortmov(g); // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) if(p->from.index == D_NONE || p->from.index == D_CONST) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", g->start, 0); t = 0; for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(g, r)) { excise(r); t++; } else if(subprop(r) && copyprop(g, r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: case AMOVQL: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != nil) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; // MOVLQZX removal. // The MOVLQZX exists to avoid being confused for a // MOVL that is just copying 32-bit data around during // copyprop. Now that copyprop is done, remov MOVLQZX R1, R2 // if it is dominated by an earlier ADDL/MOVL/etc into R1 that // will have already cleared the high bits. // // MOVSD removal. // We never use packed registers, so a MOVSD between registers // can be replaced by MOVAPD, which moves the pair of float64s // instead of just the lower one. We only use the lower one, but // the processor can do better if we do moves using both. for(r=g->start; r!=nil; r=r->link) { p = r->prog; if(p->as == AMOVLQZX) if(regtyp(&p->from)) if(p->from.type == p->to.type) if(prevl(r, p->from.type)) excise(r); if(p->as == AMOVSD) if(regtyp(&p->from)) if(regtyp(&p->to)) p->as = AMOVAPD; } // load pipelining // push any load from memory as early as possible // to give it time to complete before use. for(r=g->start; r!=nil; r=r->link) { p = r->prog; switch(p->as) { case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVLQZX: if(regtyp(&p->to) && !regconsttyp(&p->from)) pushback(r); } } flowend(g); }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } pc = 0; /* speculating it won't kill */ loop1: t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLSX: case AMOVBLZX: case AMOVWLSX: case AMOVWLZX: if(regtyp(&p->to)) { r1 = uniqs(r); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type) p1->as = AMOVL; } } break; case AADDL: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; } else if(p->from.offset == 1) { if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; } break; case ASUBL: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; } else if(p->from.offset == 1) { if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; } break; } } if(t) goto loop1; }
void regopt(Prog *p) { Reg *r, *r1, *r2; Prog *p1; int i, z; int32_t initpc, val, npc; uint32_t vreg; Bits bit; struct { int32_t m; int32_t c; Reg* p; } log5[6], *lp; firstr = R; lastr = R; nvar = 0; regbits = RtoB(D_SP) | RtoB(D_AX); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; } /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ val = 5L * 5L * 5L * 5L * 5L; lp = log5; for(i=0; i<5; i++) { lp->m = val; lp->c = 0; lp->p = R; val /= 5L; lp++; } val = 0; for(; p != P; p = p->link) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; r->pc = val; val++; lp = log5; for(i=0; i<5; i++) { lp->c--; if(lp->c <= 0) { lp->c = lp->m; if(lp->p != R) lp->p->log5 = r; lp->p = r; (lp+1)->c = 0; break; } lp++; } r1 = r->p1; if(r1 != R) switch(r1->prog->as) { case ARET: case AJMP: case AIRETL: r->p1 = R; r1->s1 = R; } bit = mkvar(r, &p->from, p->as==AMOVL); if(bany(&bit)) switch(p->as) { /* * funny */ case ALEAL: for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; break; /* * left side read */ default: for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; break; } bit = mkvar(r, &p->to, 0); if(bany(&bit)) switch(p->as) { default: diag(Z, "reg: unknown op: %A", p->as); break; /* * right side read */ case ACMPB: case ACMPL: case ACMPW: for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; break; /* * right side write */ case ANOP: case AMOVL: case AMOVB: case AMOVW: case AMOVBLSX: case AMOVBLZX: case AMOVWLSX: case AMOVWLZX: for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; break; /* * right side read+write */ case AADDB: case AADDL: case AADDW: case AANDB: case AANDL: case AANDW: case ASUBB: case ASUBL: case ASUBW: case AORB: case AORL: case AORW: case AXORB: case AXORL: case AXORW: case ASALB: case ASALL: case ASALW: case ASARB: case ASARL: case ASARW: case AROLB: case AROLL: case AROLW: case ARORB: case ARORL: case ARORW: case ASHLB: case ASHLL: case ASHLW: case ASHRB: case ASHRL: case ASHRW: case AIMULL: case AIMULW: case ANEGL: case ANOTL: case AADCL: case ASBBL: for(z=0; z<BITS; z++) { r->set.b[z] |= bit.b[z]; r->use2.b[z] |= bit.b[z]; } break; /* * funny */ case AFMOVDP: case AFMOVFP: case AFMOVLP: case AFMOVVP: case AFMOVWP: case ACALL: for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; break; } switch(p->as) { case AIMULL: case AIMULW: if(p->to.type != D_NONE) break; case AIDIVB: case AIDIVL: case AIDIVW: case AIMULB: case ADIVB: case ADIVL: case ADIVW: case AMULB: case AMULL: case AMULW: case ACWD: case ACDQ: r->regu |= RtoB(D_AX) | RtoB(D_DX); break; case AREP: case AREPN: case ALOOP: case ALOOPEQ: case ALOOPNE: r->regu |= RtoB(D_CX); break; case AMOVSB: case AMOVSL: case AMOVSW: case ACMPSB: case ACMPSL: case ACMPSW: r->regu |= RtoB(D_SI) | RtoB(D_DI); break; case ASTOSB: case ASTOSL: case ASTOSW: case ASCASB: case ASCASL: case ASCASW: r->regu |= RtoB(D_AX) | RtoB(D_DI); break; case AINSB: case AINSL: case AINSW: case AOUTSB: case AOUTSL: case AOUTSW: r->regu |= RtoB(D_DI) | RtoB(D_DX); break; case AFSTSW: case ASAHF: r->regu |= RtoB(D_AX); break; } } if(firstr == R) return; initpc = pc - val; npc = val; /* * pass 2 * turn branch references to pointers * build back pointers */ for(r = firstr; r != R; r = r->link) { p = r->prog; if(p->to.type == D_BRANCH) { val = p->to.offset - initpc; r1 = firstr; while(r1 != R) { r2 = r1->log5; if(r2 != R && val >= r2->pc) { r1 = r2; continue; } if(r1->pc == val) break; r1 = r1->link; } if(r1 == R) { nearln = p->lineno; diag(Z, "ref not found\n%P", p); continue; } if(r1 == r) { nearln = p->lineno; diag(Z, "ref to self\n%P", p); continue; } r->s2 = r1; r->p2link = r1->p2; r1->p2 = r; } } if(debug['R']) { p = firstr->prog; print("\n%L %D\n", p->lineno, &p->from); } /* * pass 2.5 * find looping structure */ for(r = firstr; r != R; r = r->link) r->active = 0; change = 0; loopit(firstr, npc); if(debug['R'] && debug['v']) { print("\nlooping structure:\n"); for(r = firstr; r != R; r = r->link) { print("%ld:%P", r->loop, r->prog); for(z=0; z<BITS; z++) bit.b[z] = r->use1.b[z] | r->use2.b[z] | r->set.b[z]; if(bany(&bit)) { print("\t"); if(bany(&r->use1)) print(" u1=%B", r->use1); if(bany(&r->use2)) print(" u2=%B", r->use2); if(bany(&r->set)) print(" st=%B", r->set); } print("\n"); } } /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; for(r = firstr; r != R; r = r->link) if(r->prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = r->link; if(r1 && r1->active && !r->active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; synch(firstr, zbits); if(change) goto loop2; /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit)) { nearln = r->prog->lineno; warn(Z, "used and not set: %B", bit); if(debug['R'] && !debug['w']) print("used and not set: %B\n", bit); } } if(debug['R'] && debug['v']) print("\nprop structure:\n"); for(r = firstr; r != R; r = r->link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = r->link) { if(debug['R'] && debug['v']) { print("%P\t", r->prog); if(bany(&r->set)) print("s:%B ", r->set); if(bany(&r->refahead)) print("ra:%B ", r->refahead); if(bany(&r->calahead)) print("ca:%B ", r->calahead); print("\n"); } for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit)) { nearln = r->prog->lineno; warn(Z, "set and not used: %B", bit); if(debug['R']) print("set and not used: %B\n", bit); excise(r); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; if(debug['R'] && debug['v']) print("\n"); paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) { if(debug['R']) print("%L$%d: %B\n", r->prog->lineno, change, blsh(i)); continue; } rgp->cost = change; nregion++; if(nregion >= NRGN) { warn(Z, "too many regions"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(debug['R']) { print("%L$%d %R: %B\n", rgp->enter->prog->lineno, rgp->cost, rgp->regno, bit); } if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) peep(); /* * pass 8 * recalculate pc */ val = initpc; for(r = firstr; r != R; r = r1) { r->pc = val; p = r->prog; p1 = P; r1 = r->link; if(r1 != R) p1 = r1->prog; for(; p != p1; p = p->link) { switch(p->as) { default: val++; break; case ANOP: case ADATA: case AGLOBL: case ANAME: case ASIGNAME: break; } } } pc = val; /* * fix up branches */ if(debug['R']) if(bany(&addrs)) print("addrs: %B\n", addrs); r1 = 0; /* set */ for(r = firstr; r != R; r = r->link) { p = r->prog; if(p->to.type == D_BRANCH) p->to.offset = r->s2->pc; r1 = r; } /* * last pass * eliminate nops * free aux structures */ for(p = firstr->prog; p != P; p = p->link){ while(p->link && p->link->as == ANOP) p->link = p->link->link; } if(r1 != R) { r1->link = freer; freer = firstr; } }
void regopt(Prog *p) { Reg *r, *r1, *r2; Prog *p1; int i, z; long initpc, val, npc; ulong vreg; Bits bit; struct { long m; long c; Reg* p; } log5[6], *lp; firstr = R; lastr = R; nvar = 0; regbits = 0; for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; } /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ val = 5L * 5L * 5L * 5L * 5L; lp = log5; for(i=0; i<5; i++) { lp->m = val; lp->c = 0; lp->p = R; val /= 5L; lp++; } val = 0; for(; p != P; p = p->link) { switch(p->as) { case ADATA: case AGLOBL: case ANAME: case ASIGNAME: continue; } r = rega(); if(firstr == R) { firstr = r; lastr = r; } else { lastr->link = r; r->p1 = lastr; lastr->s1 = r; lastr = r; } r->prog = p; r->pc = val; val++; lp = log5; for(i=0; i<5; i++) { lp->c--; if(lp->c <= 0) { lp->c = lp->m; if(lp->p != R) lp->p->log5 = r; lp->p = r; (lp+1)->c = 0; break; } lp++; } r1 = r->p1; if(r1 != R) switch(r1->prog->as) { case ARETURN: case ABR: case ARFI: case ARFCI: case ARFID: r->p1 = R; r1->s1 = R; } /* * left side always read */ bit = mkvar(&p->from, p->as==AMOVW || p->as == AMOVWZ || p->as == AMOVD); for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; /* * right side depends on opcode */ bit = mkvar(&p->to, 0); if(bany(&bit)) switch(p->as) { default: diag(Z, "reg: unknown asop: %A", p->as); break; /* * right side write */ case ANOP: case AMOVB: case AMOVBU: case AMOVBZ: case AMOVBZU: case AMOVH: case AMOVHBR: case AMOVWBR: case AMOVHU: case AMOVHZ: case AMOVHZU: case AMOVW: case AMOVWU: case AMOVWZ: case AMOVWZU: case AMOVD: case AMOVDU: case AFMOVD: case AFMOVDCC: case AFMOVDU: case AFMOVS: case AFMOVSU: case AFRSP: for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; break; /* * funny */ case ABL: for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; break; } } if(firstr == R) return; initpc = pc - val; npc = val; /* * pass 2 * turn branch references to pointers * build back pointers */ for(r = firstr; r != R; r = r->link) { p = r->prog; if(p->to.type == D_BRANCH) { val = p->to.offset - initpc; r1 = firstr; while(r1 != R) { r2 = r1->log5; if(r2 != R && val >= r2->pc) { r1 = r2; continue; } if(r1->pc == val) break; r1 = r1->link; } if(r1 == R) { nearln = p->lineno; diag(Z, "ref not found\n%P", p); continue; } if(r1 == r) { nearln = p->lineno; diag(Z, "ref to self\n%P", p); continue; } r->s2 = r1; r->p2link = r1->p2; r1->p2 = r; } } if(debug['R']) { p = firstr->prog; print("\n%L %D\n", p->lineno, &p->from); } /* * pass 2.5 * find looping structure */ for(r = firstr; r != R; r = r->link) r->active = 0; change = 0; loopit(firstr, npc); if(debug['R'] && debug['v']) { print("\nlooping structure:\n"); for(r = firstr; r != R; r = r->link) { print("%ld:%P", r->loop, r->prog); for(z=0; z<BITS; z++) bit.b[z] = r->use1.b[z] | r->use2.b[z] | r->set.b[z]; if(bany(&bit)) { print("\t"); if(bany(&r->use1)) print(" u1=%B", r->use1); if(bany(&r->use2)) print(" u2=%B", r->use2); if(bany(&r->set)) print(" st=%B", r->set); } print("\n"); } } /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; for(r = firstr; r != R; r = r->link) if(r->prog->as == ARETURN) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = r->link; if(r1 && r1->active && !r->active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = r->link) r->active = 0; synch(firstr, zbits); if(change) goto loop2; /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit)) { nearln = r->prog->lineno; warn(Z, "used and not set: %B", bit); if(debug['R'] && !debug['w']) print("used and not set: %B\n", bit); } } if(debug['R'] && debug['v']) print("\nprop structure:\n"); for(r = firstr; r != R; r = r->link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = r->link) { if(debug['R'] && debug['v']) print("%P\n set = %B; rah = %B; cal = %B\n", r->prog, r->set, r->refahead, r->calahead); for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit)) { nearln = r->prog->lineno; warn(Z, "set and not used: %B", bit); if(debug['R']) print("set an not used: %B\n", bit); excise(r); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; if(debug['R'] && debug['v']) print("\n"); paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) { if(debug['R']) print("%L$%d: %B\n", r->prog->lineno, change, blsh(i)); continue; } rgp->cost = change; nregion++; if(nregion >= NRGN) { warn(Z, "too many regions"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(debug['R']) { if(rgp->regno >= NREG) print("%L$%d F%d: %B\n", rgp->enter->prog->lineno, rgp->cost, rgp->regno-NREG, bit); else print("%L$%d R%d: %B\n", rgp->enter->prog->lineno, rgp->cost, rgp->regno, bit); } if(rgp->regno != 0) paint3(rgp->enter, rgp->varno, vreg, rgp->regno); rgp++; } /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) peep(); /* * pass 8 * recalculate pc */ val = initpc; for(r = firstr; r != R; r = r1) { r->pc = val; p = r->prog; p1 = P; r1 = r->link; if(r1 != R) p1 = r1->prog; for(; p != p1; p = p->link) { switch(p->as) { default: val++; break; case ANOP: case ADATA: case AGLOBL: case ANAME: case ASIGNAME: break; } } } pc = val; /* * fix up branches */ if(debug['R']) if(bany(&addrs)) print("addrs: %B\n", addrs); r1 = 0; /* set */ for(r = firstr; r != R; r = r->link) { p = r->prog; if(p->to.type == D_BRANCH) p->to.offset = r->s2->pc; r1 = r; } /* * last pass * eliminate nops * free aux structures */ for(p = firstr->prog; p != P; p = p->link){ while(p->link && p->link->as == ANOP) p->link = p->link->link; } if(r1 != R) { r1->link = freer; freer = firstr; } }
void regopt(Prog *firstp) { Reg *r, *r1; Prog *p; Graph *g; ProgInfo info; int i, z; uint32 vreg; Bits bit; if(first) { fmtinstall('Q', Qconv); exregoffset = D_R15; first = 0; } mergetemp(firstp); /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for * registers, so we have complete register usage information. */ nvar = NREGVAR; memset(var, 0, NREGVAR*sizeof var[0]); for(i=0; i<NREGVAR; i++) { if(regnodes[i] == N) regnodes[i] = newname(lookup(regname[i])); var[i].node = regnodes[i]; } regbits = RtoB(D_SP); for(z=0; z<BITS; z++) { externs.b[z] = 0; params.b[z] = 0; consts.b[z] = 0; addrs.b[z] = 0; ovar.b[z] = 0; } // build list of return variables setoutvar(); /* * pass 1 * build aux data structure * allocate pcs * find use and set of variables */ g = flowstart(firstp, sizeof(Reg)); if(g == nil) return; firstr = (Reg*)g->start; for(r = firstr; r != R; r = (Reg*)r->f.link) { p = r->f.prog; if(p->as == AVARDEF) continue; proginfo(&info, p); // Avoid making variables for direct-called functions. if(p->as == ACALL && p->to.type == D_EXTERN) continue; r->use1.b[0] |= info.reguse | info.regindex; r->set.b[0] |= info.regset; bit = mkvar(r, &p->from); if(bany(&bit)) { if(info.flags & LeftAddr) setaddrs(bit); if(info.flags & LeftRead) for(z=0; z<BITS; z++) r->use1.b[z] |= bit.b[z]; if(info.flags & LeftWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } bit = mkvar(r, &p->to); if(bany(&bit)) { if(info.flags & RightAddr) setaddrs(bit); if(info.flags & RightRead) for(z=0; z<BITS; z++) r->use2.b[z] |= bit.b[z]; if(info.flags & RightWrite) for(z=0; z<BITS; z++) r->set.b[z] |= bit.b[z]; } } for(i=0; i<nvar; i++) { Var *v = var+i; if(v->addr) { bit = blsh(i); for(z=0; z<BITS; z++) addrs.b[z] |= bit.b[z]; } if(debug['R'] && debug['v']) print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n", i, v->addr, v->etype, v->width, v->node, v->offset); } if(debug['R'] && debug['v']) dumpit("pass1", &firstr->f, 1); /* * pass 2 * find looping structure */ flowrpo(g); if(debug['R'] && debug['v']) dumpit("pass2", &firstr->f, 1); /* * pass 3 * iterate propagating usage * back until flow graph is complete */ loop1: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) if(r->f.prog->as == ARET) prop(r, zbits, zbits); loop11: /* pick up unreachable code */ i = 0; for(r = firstr; r != R; r = r1) { r1 = (Reg*)r->f.link; if(r1 && r1->f.active && !r->f.active) { prop(r, zbits, zbits); i = 1; } } if(i) goto loop11; if(change) goto loop1; if(debug['R'] && debug['v']) dumpit("pass3", &firstr->f, 1); /* * pass 4 * iterate propagating register/variable synchrony * forward until graph is complete */ loop2: change = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) r->f.active = 0; synch(firstr, zbits); if(change) goto loop2; if(debug['R'] && debug['v']) dumpit("pass4", &firstr->f, 1); /* * pass 4.5 * move register pseudo-variables into regu. */ for(r = firstr; r != R; r = (Reg*)r->f.link) { r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; r->set.b[0] &= ~REGBITS; r->use1.b[0] &= ~REGBITS; r->use2.b[0] &= ~REGBITS; r->refbehind.b[0] &= ~REGBITS; r->refahead.b[0] &= ~REGBITS; r->calbehind.b[0] &= ~REGBITS; r->calahead.b[0] &= ~REGBITS; r->regdiff.b[0] &= ~REGBITS; r->act.b[0] &= ~REGBITS; } /* * pass 5 * isolate regions * calculate costs (paint1) */ r = firstr; if(r) { for(z=0; z<BITS; z++) bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); if(bany(&bit) && !r->f.refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; } } for(r = firstr; r != R; r = (Reg*)r->f.link) r->act = zbits; rgp = region; nregion = 0; for(r = firstr; r != R; r = (Reg*)r->f.link) { for(z=0; z<BITS; z++) bit.b[z] = r->set.b[z] & ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); if(bany(&bit) && !r->f.refset) { if(debug['w']) print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); r->f.refset = 1; excise(&r->f); } for(z=0; z<BITS; z++) bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]); while(bany(&bit)) { i = bnum(bit); rgp->enter = r; rgp->varno = i; change = 0; paint1(r, i); bit.b[i/32] &= ~(1L<<(i%32)); if(change <= 0) continue; rgp->cost = change; nregion++; if(nregion >= NRGN) { if(debug['R'] && debug['v']) print("too many regions\n"); goto brk; } rgp++; } } brk: qsort(region, nregion, sizeof(region[0]), rcmp); if(debug['R'] && debug['v']) dumpit("pass5", &firstr->f, 1); /* * pass 6 * determine used registers (paint2) * replace code (paint3) */ rgp = region; for(i=0; i<nregion; i++) { bit = blsh(rgp->varno); vreg = paint2(rgp->enter, rgp->varno); vreg = allreg(vreg, rgp); if(rgp->regno != 0) { if(debug['R'] && debug['v']) { Var *v; v = var + rgp->varno; print("registerize %N+%lld (bit=%2d et=%2E) in %R\n", v->node, v->offset, rgp->varno, v->etype, rgp->regno); } paint3(rgp->enter, rgp->varno, vreg, rgp->regno); } rgp++; } if(debug['R'] && debug['v']) dumpit("pass6", &firstr->f, 1); /* * free aux structures. peep allocates new ones. */ flowend(g); firstr = R; /* * pass 7 * peep-hole on basic block */ if(!debug['R'] || debug['P']) peep(firstp); /* * eliminate nops */ for(p=firstp; p!=P; p=p->link) { while(p->link != P && p->link->as == ANOP) p->link = p->link->link; if(p->to.type == D_BRANCH) while(p->to.u.branch != P && p->to.u.branch->as == ANOP) p->to.u.branch = p->to.u.branch->link; } if(debug['R']) { if(ostats.ncvtreg || ostats.nspill || ostats.nreload || ostats.ndelmov || ostats.nvar || ostats.naddr || 0) print("\nstats\n"); if(ostats.ncvtreg) print(" %4d cvtreg\n", ostats.ncvtreg); if(ostats.nspill) print(" %4d spill\n", ostats.nspill); if(ostats.nreload) print(" %4d reload\n", ostats.nreload); if(ostats.ndelmov) print(" %4d delmov\n", ostats.ndelmov); if(ostats.nvar) print(" %4d var\n", ostats.nvar); if(ostats.naddr) print(" %4d addr\n", ostats.naddr); memset(&ostats, 0, sizeof(ostats)); } }
void peep(void) { Reg *r, *r1, *r2; Prog *p, *p1; int t; /* * complete R structure */ t = 0; for(r=firstr; r!=R; r=r1) { r1 = r->link; if(r1 == R) break; p = r->prog->link; while(p != r1->prog) switch(p->as) { default: r2 = rega(); r->link = r2; r2->link = r1; r2->prog = p; p->reg = r2; r2->p1 = r; r->s1 = r2; r2->s1 = r1; r1->p1 = r2; r = r2; t++; case ADATA: case AGLOBL: case ANAME: case ASIGNAME: p = p->link; } } // constant propagation // find MOV $con,R followed by // another MOV $con,R without // setting R in the interim for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case ALEAL: case ALEAQ: if(regtyp(&p->to)) if(p->from.sym != S) conprop(r); break; case AMOVB: case AMOVW: case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(p->from.type == D_CONST) conprop(r); break; } } loop1: if(debug['P'] && debug['v']) dumpit("loop1", firstr); t = 0; for(r=firstr; r!=R; r=r->link) { p = r->prog; switch(p->as) { case AMOVL: case AMOVQ: case AMOVSS: case AMOVSD: if(regtyp(&p->to)) if(regtyp(&p->from)) { if(copyprop(r)) { excise(r); t++; } else if(subprop(r) && copyprop(r)) { excise(r); t++; } } break; case AMOVBLZX: case AMOVWLZX: case AMOVBLSX: case AMOVWLSX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVL; t++; } } } break; case AMOVBQSX: case AMOVBQZX: case AMOVWQSX: case AMOVWQZX: case AMOVLQSX: case AMOVLQZX: if(regtyp(&p->to)) { r1 = rnops(uniqs(r)); if(r1 != R) { p1 = r1->prog; if(p->as == p1->as && p->to.type == p1->from.type){ p1->as = AMOVQ; t++; } } } break; case AADDL: case AADDQ: case AADDW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1){ if(p->as == AADDQ) p->as = ADECQ; else if(p->as == AADDL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == AADDQ) p->as = AINCQ; else if(p->as == AADDL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } break; case ASUBL: case ASUBQ: case ASUBW: if(p->from.type != D_CONST || needc(p->link)) break; if(p->from.offset == -1) { if(p->as == ASUBQ) p->as = AINCQ; else if(p->as == ASUBL) p->as = AINCL; else p->as = AINCW; p->from = zprog.from; break; } if(p->from.offset == 1){ if(p->as == ASUBQ) p->as = ADECQ; else if(p->as == ASUBL) p->as = ADECL; else p->as = ADECW; p->from = zprog.from; break; } break; } } if(t) goto loop1; }