示例#1
0
void lowerStackArgs(Code& code)
{
    PhaseScope phaseScope(code, "lowerStackArgs");

    // Now we need to deduce how much argument area we need.
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            for (Arg& arg : inst.args) {
                if (arg.isCallArg()) {
                    // For now, we assume that we use 8 bytes of the call arg. But that's not
                    // such an awesome assumption.
                    // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454
                    ASSERT(arg.offset() >= 0);
                    code.requestCallArgAreaSizeInBytes(arg.offset() + 8);
                }
            }
        }
    }

    code.setFrameSize(code.frameSize() + code.callArgAreaSizeInBytes());

    // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless
    // transformation since we can search the StackSlots array to figure out which StackSlot any
    // offset-from-FP refers to.

    InsertionSet insertionSet(code);
    for (BasicBlock* block : code) {
        // FIXME We can keep track of the last large offset which was materialized in this block, and reuse the register
        // if it hasn't been clobbered instead of renetating imm+add+addr every time. https://bugs.webkit.org/show_bug.cgi?id=171387

        for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) {
            Inst& inst = block->at(instIndex);

            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Bank, Width width) {
                    auto stackAddr = [&] (Value::OffsetType offsetFromFP) -> Arg {
                        int32_t offsetFromSP = offsetFromFP + code.frameSize();

                        if (inst.admitsExtendedOffsetAddr(arg)) {
                            // Stackmaps and patchpoints expect addr inputs relative to SP or FP only. We might as well
                            // not even bother generating an addr with valid form for these opcodes since extended offset
                            // addr is always valid.
                            return Arg::extendedOffsetAddr(offsetFromFP);
                        }

                        Arg result = Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP);
                        if (result.isValidForm(width))
                            return result;

                        result = Arg::addr(Air::Tmp(MacroAssembler::stackPointerRegister), offsetFromSP);
                        if (result.isValidForm(width))
                            return result;
#if CPU(ARM64)
                        ASSERT(pinnedExtendedOffsetAddrRegister());
                        Air::Tmp tmp = Air::Tmp(*pinnedExtendedOffsetAddrRegister());

                        Arg largeOffset = Arg::isValidImmForm(offsetFromSP) ? Arg::imm(offsetFromSP) : Arg::bigImm(offsetFromSP);
                        insertionSet.insert(instIndex, Move, inst.origin, largeOffset, tmp);
                        insertionSet.insert(instIndex, Add64, inst.origin, Air::Tmp(MacroAssembler::stackPointerRegister), tmp);
                        result = Arg::addr(tmp, 0);
                        return result;
#elif CPU(X86_64)
                        // Can't happen on x86: immediates are always big enough for frame size.
                        RELEASE_ASSERT_NOT_REACHED();
#else
#error Unhandled architecture.
#endif
                    };

                    switch (arg.kind()) {
                    case Arg::Stack: {
                        StackSlot* slot = arg.stackSlot();
                        if (Arg::isZDef(role)
                            && slot->kind() == StackSlotKind::Spill
                            && slot->byteSize() > bytes(width)) {
                            // Currently we only handle this simple case because it's the only one
                            // that arises: ZDef's are only 32-bit right now. So, when we hit these
                            // assertions it means that we need to implement those other kinds of
                            // zero fills.
                            RELEASE_ASSERT(slot->byteSize() == 8);
                            RELEASE_ASSERT(width == Width32);

                            RELEASE_ASSERT(isValidForm(StoreZero32, Arg::Stack));
                            insertionSet.insert(
                                instIndex + 1, StoreZero32, inst.origin,
                                stackAddr(arg.offset() + 4 + slot->offsetFromFP()));
                        }
                        arg = stackAddr(arg.offset() + slot->offsetFromFP());
                        break;
                    }
                    case Arg::CallArg:
                        arg = stackAddr(arg.offset() - code.frameSize());
                        break;
                    default:
                        break;
                    }
                }
            );
        }
        insertionSet.execute(block);
    }
}
示例#2
0
void allocateStack(Code& code)
{
    PhaseScope phaseScope(code, "allocateStack");

    // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or
    // is explicitly escaped in the code.
    IndexSet<StackSlot> escapingStackSlots;
    for (StackSlot* slot : code.stackSlots()) {
        if (slot->isLocked())
            escapingStackSlots.add(slot);
    }
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                    if (role == Arg::UseAddr && arg.isStack())
                        escapingStackSlots.add(arg.stackSlot());
                });
        }
    }

    // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for
    // the possibility of stack slots being assigned frame offsets before we even get here.
    ASSERT(!code.frameSize());
    Vector<StackSlot*> assignedEscapedStackSlots;
    Vector<StackSlot*> escapedStackSlotsWorklist;
    for (StackSlot* slot : code.stackSlots()) {
        if (escapingStackSlots.contains(slot)) {
            if (slot->offsetFromFP())
                assignedEscapedStackSlots.append(slot);
            else
                escapedStackSlotsWorklist.append(slot);
        } else {
            // It would be super strange to have an unlocked stack slot that has an offset already.
            ASSERT(!slot->offsetFromFP());
        }
    }
    // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of
    // escaped stack slots.
    while (!escapedStackSlotsWorklist.isEmpty()) {
        StackSlot* slot = escapedStackSlotsWorklist.takeLast();
        assign(slot, assignedEscapedStackSlots);
        assignedEscapedStackSlots.append(slot);
    }

    // Now we handle the anonymous slots.
    StackSlotLiveness liveness(code);
    IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size());
    Vector<StackSlot*> slots;

    for (BasicBlock* block : code) {
        StackSlotLiveness::LocalCalc localCalc(liveness, block);

        auto interfere = [&] (Inst& inst) {
            if (verbose)
                dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n");

            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                    if (!Arg::isDef(role))
                        return;
                    if (!arg.isStack())
                        return;
                    StackSlot* slot = arg.stackSlot();
                    if (slot->kind() != StackSlotKind::Anonymous)
                        return;

                    for (StackSlot* otherSlot : localCalc.live()) {
                        interference[slot].add(otherSlot);
                        interference[otherSlot].add(slot);
                    }
                });
        };

        for (unsigned instIndex = block->size(); instIndex--;) {
            if (verbose)
                dataLog("Analyzing: ", block->at(instIndex), "\n");
            Inst& inst = block->at(instIndex);
            interfere(inst);
            localCalc.execute(instIndex);
        }
        Inst nop;
        interfere(nop);
    }

    if (verbose) {
        for (StackSlot* slot : code.stackSlots())
            dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n");
    }

    // Now we assign stack locations. At its heart this algorithm is just first-fit. For each
    // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no
    // overlap with other StackSlots that this overlaps with.
    Vector<StackSlot*> otherSlots = assignedEscapedStackSlots;
    for (StackSlot* slot : code.stackSlots()) {
        if (slot->offsetFromFP()) {
            // Already assigned an offset.
            continue;
        }

        HashSet<StackSlot*>& interferingSlots = interference[slot];
        otherSlots.resize(assignedEscapedStackSlots.size());
        otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size());
        unsigned nextIndex = assignedEscapedStackSlots.size();
        for (StackSlot* otherSlot : interferingSlots)
            otherSlots[nextIndex++] = otherSlot;

        assign(slot, otherSlots);
    }

    // Figure out how much stack we're using for stack slots.
    unsigned frameSizeForStackSlots = 0;
    for (StackSlot* slot : code.stackSlots()) {
        frameSizeForStackSlots = std::max(
            frameSizeForStackSlots,
            static_cast<unsigned>(-slot->offsetFromFP()));
    }

    frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots);

    // Now we need to deduce how much argument area we need.
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            for (Arg& arg : inst.args) {
                if (arg.isCallArg()) {
                    // For now, we assume that we use 8 bytes of the call arg. But that's not
                    // such an awesome assumption.
                    // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454
                    ASSERT(arg.offset() >= 0);
                    code.requestCallArgAreaSize(arg.offset() + 8);
                }
            }
        }
    }

    code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize());

    // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless
    // transformation since we can search the StackSlots array to figure out which StackSlot any
    // offset-from-FP refers to.

    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            for (Arg& arg : inst.args) {
                switch (arg.kind()) {
                case Arg::Stack:
                    arg = Arg::addr(
                        Tmp(GPRInfo::callFrameRegister),
                        arg.offset() + arg.stackSlot()->offsetFromFP());
                    break;
                case Arg::CallArg:
                    arg = Arg::addr(
                        Tmp(GPRInfo::callFrameRegister),
                        arg.offset() - code.frameSize());
                    break;
                default:
                    break;
                }
            }
        }
    }
}
void handleCalleeSaves(Code& code)
{
    PhaseScope phaseScope(code, "handleCalleeSaves");

    RegisterSet usedCalleeSaves;

    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            inst.forEachTmpFast(
                [&] (Tmp& tmp) {
                    // At first we just record all used regs.
                    usedCalleeSaves.set(tmp.reg());
                });

            if (inst.hasSpecial())
                usedCalleeSaves.merge(inst.extraClobberedRegs());
        }
    }

    // Now we filter to really get the callee saves.
    usedCalleeSaves.filter(RegisterSet::calleeSaveRegisters());
    usedCalleeSaves.exclude(RegisterSet::stackRegisters()); // We don't need to save FP here.

    if (!usedCalleeSaves.numberOfSetRegisters())
        return;

    code.calleeSaveRegisters() = RegisterAtOffsetList(usedCalleeSaves);

    size_t byteSize = 0;
    for (const RegisterAtOffset& entry : code.calleeSaveRegisters())
        byteSize = std::max(static_cast<size_t>(-entry.offset()), byteSize);

    StackSlot* savesArea = code.addStackSlot(byteSize, StackSlotKind::Locked);
    // This is a bit weird since we could have already pinned a different stack slot to this
    // area. Also, our runtime does not require us to pin the saves area. Maybe we shouldn't pin it?
    savesArea->setOffsetFromFP(-byteSize);

    auto argFor = [&] (const RegisterAtOffset& entry) -> Arg {
        return Arg::stack(savesArea, entry.offset() + byteSize);
    };

    InsertionSet insertionSet(code);
    
    // First insert saving code in the prologue.
    for (const RegisterAtOffset& entry : code.calleeSaveRegisters()) {
        insertionSet.insert(
            0, entry.reg().isGPR() ? Move : MoveDouble, code[0]->at(0).origin,
            Tmp(entry.reg()), argFor(entry));
    }
    insertionSet.execute(code[0]);

    // Now insert restore code at epilogues.
    for (BasicBlock* block : code) {
        Inst& last = block->last();
        if (!isReturn(last.opcode))
            continue;

        for (const RegisterAtOffset& entry : code.calleeSaveRegisters()) {
            insertionSet.insert(
                block->size() - 1, entry.reg().isGPR() ? Move : MoveDouble, last.origin,
                argFor(entry), Tmp(entry.reg()));
        }
        insertionSet.execute(block);
    }
}
示例#4
0
void allocateStack(Code& code)
{
    PhaseScope phaseScope(code, "allocateStack");

    // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or
    // is explicitly escaped in the code.
    IndexSet<StackSlot> escapingStackSlots;
    for (StackSlot* slot : code.stackSlots()) {
        if (slot->isLocked())
            escapingStackSlots.add(slot);
    }
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                    if (role == Arg::UseAddr && arg.isStack())
                        escapingStackSlots.add(arg.stackSlot());
                });
        }
    }

    // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for
    // the possibility of stack slots being assigned frame offsets before we even get here.
    ASSERT(!code.frameSize());
    Vector<StackSlot*> assignedEscapedStackSlots;
    Vector<StackSlot*> escapedStackSlotsWorklist;
    for (StackSlot* slot : code.stackSlots()) {
        if (escapingStackSlots.contains(slot)) {
            if (slot->offsetFromFP())
                assignedEscapedStackSlots.append(slot);
            else
                escapedStackSlotsWorklist.append(slot);
        } else {
            // It would be super strange to have an unlocked stack slot that has an offset already.
            ASSERT(!slot->offsetFromFP());
        }
    }
    // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of
    // escaped stack slots.
    while (!escapedStackSlotsWorklist.isEmpty()) {
        StackSlot* slot = escapedStackSlotsWorklist.takeLast();
        assign(slot, assignedEscapedStackSlots);
        assignedEscapedStackSlots.append(slot);
    }

    // Now we handle the anonymous slots.
    StackSlotLiveness liveness(code);
    IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size());
    Vector<StackSlot*> slots;

    for (BasicBlock* block : code) {
        StackSlotLiveness::LocalCalc localCalc(liveness, block);

        auto interfere = [&] (unsigned instIndex) {
            if (verbose)
                dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n");

            Inst::forEachDef<Arg>(
                block->get(instIndex), block->get(instIndex + 1),
                [&] (Arg& arg, Arg::Role, Arg::Type, Arg::Width) {
                    if (!arg.isStack())
                        return;
                    StackSlot* slot = arg.stackSlot();
                    if (slot->kind() != StackSlotKind::Anonymous)
                        return;

                    for (StackSlot* otherSlot : localCalc.live()) {
                        interference[slot].add(otherSlot);
                        interference[otherSlot].add(slot);
                    }
                });
        };

        for (unsigned instIndex = block->size(); instIndex--;) {
            if (verbose)
                dataLog("Analyzing: ", block->at(instIndex), "\n");

            // Kill dead stores. For simplicity we say that a store is killable if it has only late
            // defs and those late defs are to things that are dead right now. We only do that
            // because that's the only kind of dead stack store we will see here.
            Inst& inst = block->at(instIndex);
            if (!inst.hasNonArgEffects()) {
                bool ok = true;
                inst.forEachArg(
                    [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                        if (Arg::isEarlyDef(role)) {
                            ok = false;
                            return;
                        }
                        if (!Arg::isLateDef(role))
                            return;
                        if (!arg.isStack()) {
                            ok = false;
                            return;
                        }
                        StackSlot* slot = arg.stackSlot();
                        if (slot->kind() != StackSlotKind::Anonymous) {
                            ok = false;
                            return;
                        }

                        if (localCalc.isLive(slot)) {
                            ok = false;
                            return;
                        }
                    });
                if (ok)
                    inst = Inst();
            }
            
            interfere(instIndex);
            localCalc.execute(instIndex);
        }
        interfere(-1);
        
        block->insts().removeAllMatching(
            [&] (const Inst& inst) -> bool {
                return !inst;
            });
    }

    if (verbose) {
        for (StackSlot* slot : code.stackSlots())
            dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n");
    }

    // Now we assign stack locations. At its heart this algorithm is just first-fit. For each
    // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no
    // overlap with other StackSlots that this overlaps with.
    Vector<StackSlot*> otherSlots = assignedEscapedStackSlots;
    for (StackSlot* slot : code.stackSlots()) {
        if (slot->offsetFromFP()) {
            // Already assigned an offset.
            continue;
        }

        HashSet<StackSlot*>& interferingSlots = interference[slot];
        otherSlots.resize(assignedEscapedStackSlots.size());
        otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size());
        unsigned nextIndex = assignedEscapedStackSlots.size();
        for (StackSlot* otherSlot : interferingSlots)
            otherSlots[nextIndex++] = otherSlot;

        assign(slot, otherSlots);
    }

    // Figure out how much stack we're using for stack slots.
    unsigned frameSizeForStackSlots = 0;
    for (StackSlot* slot : code.stackSlots()) {
        frameSizeForStackSlots = std::max(
            frameSizeForStackSlots,
            static_cast<unsigned>(-slot->offsetFromFP()));
    }

    frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots);

    // Now we need to deduce how much argument area we need.
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            for (Arg& arg : inst.args) {
                if (arg.isCallArg()) {
                    // For now, we assume that we use 8 bytes of the call arg. But that's not
                    // such an awesome assumption.
                    // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454
                    ASSERT(arg.offset() >= 0);
                    code.requestCallArgAreaSize(arg.offset() + 8);
                }
            }
        }
    }

    code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize());

    // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless
    // transformation since we can search the StackSlots array to figure out which StackSlot any
    // offset-from-FP refers to.

    // FIXME: This may produce addresses that aren't valid if we end up with a ginormous stack frame.
    // We would have to scavenge for temporaries if this happened. Fortunately, this case will be
    // extremely rare so we can do crazy things when it arises.
    // https://bugs.webkit.org/show_bug.cgi?id=152530

    InsertionSet insertionSet(code);
    for (BasicBlock* block : code) {
        for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) {
            Inst& inst = block->at(instIndex);
            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width width) {
                    auto stackAddr = [&] (int32_t offset) -> Arg {
                        return Arg::stackAddr(offset, code.frameSize(), width);
                    };
                    
                    switch (arg.kind()) {
                    case Arg::Stack: {
                        StackSlot* slot = arg.stackSlot();
                        if (Arg::isZDef(role)
                            && slot->kind() == StackSlotKind::Anonymous
                            && slot->byteSize() > Arg::bytes(width)) {
                            // Currently we only handle this simple case because it's the only one
                            // that arises: ZDef's are only 32-bit right now. So, when we hit these
                            // assertions it means that we need to implement those other kinds of
                            // zero fills.
                            RELEASE_ASSERT(slot->byteSize() == 8);
                            RELEASE_ASSERT(width == Arg::Width32);

                            RELEASE_ASSERT(isValidForm(StoreZero32, Arg::Stack));
                            insertionSet.insert(
                                instIndex + 1, StoreZero32, inst.origin,
                                stackAddr(arg.offset() + 4 + slot->offsetFromFP()));
                        }
                        arg = stackAddr(arg.offset() + slot->offsetFromFP());
                        break;
                    }
                    case Arg::CallArg:
                        arg = stackAddr(arg.offset() - code.frameSize());
                        break;
                    default:
                        break;
                    }
                }
            );
        }
        insertionSet.execute(block);
    }
}