void PerformanceDebugger::report_context(InlinedScope* s) { if (!DebugPerformance) return; Reporter r(this); GrowableArray<Expr*>* temps = s->contextTemporaries(); const int len = temps->length(); int nused = 0; for (int i = 0; i < len; i++) { PReg* r = temps->at(i)->preg(); if (r->uplevelR() || r->uplevelW() || (r->isBlockPReg() && !r->isUnused())) nused++; } if (nused == 0) { str->print(" could not eliminate context of scope %s (fixable compiler restriction; should be eliminated)\n", s->key()->print_string()); } else { str->print(" could not eliminate context of scope %s; temp(s) still used: ", s->key()->print_string()); for (int j = 0; j < len; j++) { PReg* r = temps->at(j)->preg(); if (r->uplevelR() || r->uplevelW()) { str->print("%d ", j); } else if (r->isBlockPReg() && !r->isUnused()) { str->print("%d (non-inlined block)", j); } } str->print("\n"); } }
void BB::allocateTempRegisters(BitVector** hardwired, PRegBList* tempRegs, BitVectorBList* lives) { if (!nnodes) return; // empty BB RegisterEqClassBList regClasses(nnodes + 1); regClasses.append(NULL); // first reg class has index 1 fint use_count[NumRegisters], def_count[NumRegisters]; for (fint i = 0; i < NumRegisters; i++) use_count[i] = def_count[i] = 0; allocate_to_preferred_candidates_if_possible(use_count, def_count); // allocate other temp regs (using the untouched temp regs of this BB) fint temp = 0; for (int i = 0; i < duInfo.info->length(); i++) { // collect temp regs PReg* r = duInfo.info->nth(i)->reg; if (r->loc == UnAllocated && !r->isUnused() && r->isLocalTo(this)) { assert(r->dus.first()->index == i, "should be the same"); for ( ; temp < NumTempRegs && use_count[TempRegs[temp]] + def_count[TempRegs[temp]] > 0; temp++) ; if (temp == NumTempRegs) break; // ran out of regs // ok, allocate TempRegs[temp] to the preg and equivalent pregs Location t = TempRegs[temp++]; PReg* frst = r->regClass ? regClasses.nth(r->regClass)->first : r; for (PReg* pr = frst; pr; pr = pr->regClassLink) { doAlloc(pr, t); pr->regClass = 0; } } r->regClass = 0; } if (temp == NumTempRegs) { // ran out of temp regs with the simple strategy - try using slow // allocation algorithm slowAllocateTempRegisters(hardwired, tempRegs, lives); } }
void Compiler::computeBlockInfo() { FlagSetting(EliminateUnneededNodes, true); // unused context nodes must be eliminated GrowableArray<InlinedScope*>* allContexts = new GrowableArray<InlinedScope*>(25); topScope->collectContextInfo(allContexts); // for now, just allocate all contexts as in interpreter // fix this later: collect all uplevel-accessed PRegs at same loop depth, form physical // contexts for these // also, if uplevel-read and single def --> could copy into context and keep // stack/register copy // remove all unused contexts // need to iterate because removing a nested context may enable removal of a parent context // (could avoid iteration with topo sort, but there are few contexts anyway) bool changed = EliminateContexts; while (changed) { changed = false; for (int i = allContexts->length() - 1; i >= 0; i--) { InlinedScope* s = allContexts->at(i); if (s == NULL) continue; PReg* contextPR = s->context(); assert(contextPR->isSinglyAssigned(), "should have exactly one def"); GrowableArray<Expr*>* temps = s->contextTemporaries(); bool noUplevelAccesses = true; // check if all context temps can be stack-allocated for (int j = temps->length() - 1; j >= 0; j--) { PReg* r = temps->at(j)->preg(); if (r->uplevelR() || r->uplevelW() // this temp is still uplevel-accessed, so can't eliminate context || (r->isBlockPReg() && !r->isUnused()) // this block still forces a context ) { noUplevelAccesses = false; break; } } // TO DO: check if context is needed for NLRs // (noUplevelAccesses alone does not allow elimination) if (/*noUplevelAccesses || */contextPR->isSinglyUsed()) { // can eliminate context -- no uplevel-accessed vars // (single use is context initializer) if (CompilerDebug) cout(PrintEliminateContexts)->print("%*s*eliminating context %s\n", s->depth, "", contextPR->safeName()); contextPR->scope()->gen()->removeContextCreation(); allContexts->at_put(i, NULL); // make code generator break if it tries to access this context changed = true; } } } // now collect all remaining contexts int i = allContexts->length(); contextList = new GrowableArray<InlinedScope*>(i, i, NULL); while (i-- > 0) { // should merge several contexts into one physical context if possible // fix this later InlinedScope* s = allContexts->at(i); if (s == NULL) continue; PReg* contextPR = s->context(); if (CompilerDebug) { cout(PrintEliminateContexts)->print("%*s*could not eliminate context %s in scope %s\n", s->depth, "", contextPR->safeName(), s->key()->print_string()); } reporter->report_context(s); contextList->at_put(i, s); ContextCreateNode* c = s->contextInitializer()->creator(); c->set_contextNo(i); GrowableArray<Expr*>* temps = s->contextTemporaries(); // allocate the temps in this context (but only if they're used) int ntemps = temps->length(); int size = 0; for (int j = 0; j < ntemps; j++) { PReg* p = temps->at(j)->preg(); // should be: // if (p->isUsed() && (p->uplevelR() || p->uplevelW())) { // but doesn't work yet (probably must fix set_self_via_context etc.) // -Urs 6/96 if (p->isUsed()) { // allocate p to context temp assert(p->scope() == s || p->isBlockPReg(), "oops"); Location loc = Mapping::contextTemporary(i, size, s->scopeID()); if (p->isBlockPReg()) { // Blocks aren't actually assigned (at the PReg level) so that the inlining info // isn't lost. Thus we need to create a fake destination here if the context exists. SAPReg* dest = new SAPReg(s, loc, true, true, PrologueBCI, EpilogueBCI); Expr* e = new UnknownExpr(dest, NULL); //contextPR->scope()->contextInitializer()->initialize(j, init); temps->at_put(j, e); } else { p->allocateTo(loc); } size++; } } c->set_sizeOfContext(size); if (size < ntemps && c->scope()->number_of_noninlined_blocks() > 0) { // this hasn't been exercised much compiler_warning("while compiling %s: eliminated some context temps", key->print_string()); } } // Compute the number of noninlined blocks for the nmethod and allocate const int nblocks = topScope->number_of_noninlined_blocks(); if (is_method_compile() || nblocks > 0) { // allocate nblocks+1 jumpTable entries const jumpTableID id = Universe::code->jump_table()->allocate(nblocks + 1); if (is_method_compile()) { main_jumpTable_id = id; } else { promoted_jumpTable_id = id; } // first is for nmethod itself int block_index = 1; for (int i = bbIterator->exposedBlks->length() - 1; i >= 0; i--) { BlockPReg* blk = bbIterator->exposedBlks->at(i); if (blk->isUsed()) { assert(block_index <= nblocks, "nblocks too small"); blk->closure()->set_id(id.sub(block_index++)); } } assert(nblocks + 1 == block_index, "just checking"); } }
// allocate PRegs that are used & defined solely within this BB void BB::slowAllocateTempRegisters(BitVector** hardwired, PRegBList* tempRegs, BitVectorBList* lives) { // clear temporary data structures tempRegs->clear(); lives->clear(); fint i; for (i = 0; i < NumTempRegs; i++) { hardwired[i]->setLength(nnodes); hardwired[i]->clear(); } for (i = 0; i < duInfo.info->length(); i++) { // collect temp regs and hardwired temp regs PReg* r = duInfo.info->nth(i)->reg; if (r->isLocalTo(this)) { assert(r->dus.first()->index == i, "should be the same"); if (r->isUnused()) { // unused register - ignore } else { DUInfo* info = duInfo.info->nth(r->dus.first()->index); tempRegs->append(r); BitVector* bv = new BitVector(nnodes); lives->append(bv); fint firstUse = 0, lastUse = nnodes - 1; duInfo.info->nth(i)->getLiveRange(firstUse, lastUse); bv->addFromTo(firstUse, lastUse); } } else if (isTempReg(r->loc)) { fint firstUse = 0, lastUse = nnodes - 1; if (!r->incorrectDU()) { duInfo.info->nth(i)->getLiveRange(firstUse, lastUse); } else { // can't really compute live range since the temp might be non-local // so assume it's live from first node til the end } hardwired[RegToTempNo[r->loc]]->addFromTo(firstUse, lastUse); } } // now, tempRegs holds all temp regs, and lives contains each register's // live range (one bit per node, 1 = reg is live); hardwired contains // the ranges where temp regs are already taken (e.g. for NLR, calls, etc) // cycle through the temp registers to (hopefully) allow more optimizations // later (e.g. scheduling) fint lastTemp = 0; # define nextTemp(n) (n == NumTempRegs - 1) ? 0 : n + 1 for (i = 0; i < tempRegs->length(); i++) { // try to allocate tempRegs[i] to a temp register PReg* r = tempRegs->nth(i); if (r->loc != UnAllocated) { assert(r->regClass == 0, "should have been cleared"); continue; } BitVector* liveRange = lives->nth(i); for (fint tempNo = lastTemp, ntries = 0; ntries < NumTempRegs; tempNo = nextTemp(tempNo), ntries++) { if (liveRange->isDisjointFrom(hardwired[tempNo])) { Location temp = TempRegs[tempNo]; doAlloc(r, temp); hardwired[tempNo]->unionWith(liveRange); lastTemp = nextTemp(tempNo); break; } } if ( r->loc == UnAllocated && (PrintSICTempRegisterAllocation || WizardMode && TARGET_ARCH != I386_ARCH /* happens normally in I386; few regs */ )) { lprintf("*could NOT find temp assignment for local %s in BB%ld\n", r->name(), (void*)id()); } else if (r->loc == UnAllocated) { if (PrintSICTempRegisterAllocation) lprintf("out of temp regs"); } r->regClass = 0; } }