コード例 #1
0
void fixPartialRegisterStalls(Code& code)
{
    if (!isX86())
        return;

    PhaseScope phaseScope(code, "fixPartialRegisterStalls");

    Vector<BasicBlock*> candidates;

    for (BasicBlock* block : code) {
        for (const Inst& inst : *block) {
            if (hasPartialXmmRegUpdate(inst)) {
                candidates.append(block);
                break;
            }
        }
    }

    // Fortunately, Partial Stalls are rarely used. Return early if no block
    // cares about them.
    if (candidates.isEmpty())
        return;

    // For each block, this provides the distance to the last instruction setting each register
    // on block *entry*.
    IndexMap<BasicBlock, FPDefDistance> lastDefDistance(code.size());

    // Blocks with dirty distance at head.
    IndexSet<BasicBlock> dirty;

    // First, we compute the local distance for each block and push it to the successors.
    for (BasicBlock* block : code) {
        FPDefDistance localDistance;

        unsigned distanceToBlockEnd = block->size();
        for (Inst& inst : *block)
            updateDistances(inst, localDistance, distanceToBlockEnd);

        for (BasicBlock* successor : block->successorBlocks()) {
            if (lastDefDistance[successor].updateFromPrecessor(localDistance))
                dirty.add(successor);
        }
    }

    // Now we propagate the minimums accross blocks.
    bool changed;
    do {
        changed = false;

        for (BasicBlock* block : code) {
            if (!dirty.remove(block))
                continue;

            // Little shortcut: if the block is big enough, propagating it won't add any information.
            if (block->size() >= minimumSafeDistance)
                continue;

            unsigned blockSize = block->size();
            FPDefDistance& blockDistance = lastDefDistance[block];
            for (BasicBlock* successor : block->successorBlocks()) {
                if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) {
                    dirty.add(successor);
                    changed = true;
                }
            }
        }
    } while (changed);

    // Finally, update each block as needed.
    InsertionSet insertionSet(code);
    for (BasicBlock* block : candidates) {
        unsigned distanceToBlockEnd = block->size();
        FPDefDistance& localDistance = lastDefDistance[block];

        for (unsigned i = 0; i < block->size(); ++i) {
            Inst& inst = block->at(i);

            if (hasPartialXmmRegUpdate(inst)) {
                RegisterSet defs;
                RegisterSet uses;
                inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type) {
                    if (tmp.isFPR()) {
                        if (Arg::isDef(role))
                            defs.set(tmp.fpr());
                        if (Arg::isAnyUse(role))
                            uses.set(tmp.fpr());
                    }
                });
                // We only care about values we define but not use. Otherwise we have to wait
                // for the value to be resolved anyway.
                defs.exclude(uses);

                defs.forEach([&] (Reg reg) {
                    if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance)
                        insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg));
                });
            }

            updateDistances(inst, localDistance, distanceToBlockEnd);
        }
        insertionSet.execute(block);
    }
}
コード例 #2
0
CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> const& prev_rows,
                                                           std::vector<size_t> const& next_rows,
                                                           std::function<bool (size_t)> row_did_change,
                                                           util::Optional<IndexSet> const& move_candidates)
{
    REALM_ASSERT_DEBUG(!move_candidates || std::is_sorted(begin(next_rows), end(next_rows)));

    CollectionChangeBuilder ret;

    size_t deleted = 0;
    std::vector<RowInfo> old_rows;
    old_rows.reserve(prev_rows.size());
    for (size_t i = 0; i < prev_rows.size(); ++i) {
        if (prev_rows[i] == IndexSet::npos) {
            ++deleted;
            ret.deletions.add(i);
        }
        else
            old_rows.push_back({prev_rows[i], IndexSet::npos, i, i - deleted});
    }
    std::sort(begin(old_rows), end(old_rows), [](auto& lft, auto& rgt) {
        return lft.row_index < rgt.row_index;
    });

    std::vector<RowInfo> new_rows;
    new_rows.reserve(next_rows.size());
    for (size_t i = 0; i < next_rows.size(); ++i) {
        new_rows.push_back({next_rows[i], IndexSet::npos, i, 0});
    }
    std::sort(begin(new_rows), end(new_rows), [](auto& lft, auto& rgt) {
        return lft.row_index < rgt.row_index;
    });

    // Don't add rows which were modified to not match the query to `deletions`
    // immediately because the unsorted move logic needs to be able to
    // distinguish them from rows which were outright deleted
    IndexSet removed;

    // Now that our old and new sets of rows are sorted by row index, we can
    // iterate over them and either record old+new TV indices for rows present
    // in both, or mark them as inserted/deleted if they appear only in one
    size_t i = 0, j = 0;
    while (i < old_rows.size() && j < new_rows.size()) {
        auto old_index = old_rows[i];
        auto new_index = new_rows[j];
        if (old_index.row_index == new_index.row_index) {
            new_rows[j].prev_tv_index = old_rows[i].tv_index;
            new_rows[j].shifted_tv_index = old_rows[i].shifted_tv_index;
            ++i;
            ++j;
        }
        else if (old_index.row_index < new_index.row_index) {
            removed.add(old_index.tv_index);
            ++i;
        }
        else {
            ret.insertions.add(new_index.tv_index);
            ++j;
        }
    }

    for (; i < old_rows.size(); ++i)
        removed.add(old_rows[i].tv_index);
    for (; j < new_rows.size(); ++j)
        ret.insertions.add(new_rows[j].tv_index);

    // Filter out the new insertions since we don't need them for any of the
    // further calculations
    new_rows.erase(std::remove_if(begin(new_rows), end(new_rows),
                                  [](auto& row) { return row.prev_tv_index == IndexSet::npos; }),
                   end(new_rows));
    std::sort(begin(new_rows), end(new_rows),
              [](auto& lft, auto& rgt) { return lft.tv_index < rgt.tv_index; });

    for (auto& row : new_rows) {
        if (row_did_change(row.row_index)) {
            ret.modifications.add(row.tv_index);
        }
    }

    if (move_candidates) {
        calculate_moves_unsorted(new_rows, removed, *move_candidates, ret);
    }
    else {
        calculate_moves_sorted(new_rows, ret);
    }
    ret.deletions.add(removed);
    ret.verify();

#ifdef REALM_DEBUG
    { // Verify that applying the calculated change to prev_rows actually produces next_rows
        auto rows = prev_rows;
        auto it = util::make_reverse_iterator(ret.deletions.end());
        auto end = util::make_reverse_iterator(ret.deletions.begin());
        for (; it != end; ++it) {
            rows.erase(rows.begin() + it->first, rows.begin() + it->second);
        }

        for (auto i : ret.insertions.as_indexes()) {
            rows.insert(rows.begin() + i, next_rows[i]);
        }

        REALM_ASSERT(rows == next_rows);
    }
#endif

    return ret;
}
コード例 #3
0
bool eliminateDeadCode(Code& code)
{
    PhaseScope phaseScope(code, "eliminateDeadCode");

    HashSet<Tmp> liveTmps;
    IndexSet<StackSlot> liveStackSlots;
    bool changed;

    auto isArgLive = [&] (const Arg& arg) -> bool {
        switch (arg.kind()) {
        case Arg::Tmp:
            if (arg.isReg())
                return true;
            return liveTmps.contains(arg.tmp());
        case Arg::Stack:
            if (arg.stackSlot()->isLocked())
                return true;
            return liveStackSlots.contains(arg.stackSlot());
        default:
            return true;
        }
    };

    auto addLiveArg = [&] (const Arg& arg) -> bool {
        switch (arg.kind()) {
        case Arg::Tmp:
            if (arg.isReg())
                return false;
            return liveTmps.add(arg.tmp()).isNewEntry;
        case Arg::Stack:
            if (arg.stackSlot()->isLocked())
                return false;
            return liveStackSlots.add(arg.stackSlot());
        default:
            return false;
        }
    };

    auto isInstLive = [&] (Inst& inst) -> bool {
        if (inst.hasNonArgEffects())
            return true;

        // This instruction should be presumed dead, if its Args are all dead.
        bool storesToLive = false;
        inst.forEachArg(
            [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                if (!Arg::isDef(role))
                    return;
                storesToLive |= isArgLive(arg);
            });
        return storesToLive;
    };

    auto handleInst = [&] (Inst& inst) {
        if (!isInstLive(inst))
            return;

        // We get here if the Inst is live. For simplicity we say that a live instruction forces
        // liveness upon everything it mentions.
        for (Arg& arg : inst.args) {
            changed |= addLiveArg(arg);
            arg.forEachTmpFast(
                [&] (Tmp& tmp) {
                    changed |= addLiveArg(tmp);
                });
        }
    };

    auto runForward = [&] () -> bool {
        changed = false;
        for (BasicBlock* block : code) {
            for (Inst& inst : *block)
                handleInst(inst);
        }
        return changed;
    };

    auto runBackward = [&] () -> bool {
        changed = false;
        for (unsigned blockIndex = code.size(); blockIndex--;) {
            BasicBlock* block = code[blockIndex];
            for (unsigned instIndex = block->size(); instIndex--;)
                handleInst(block->at(instIndex));
        }
        return changed;
    };

    for (;;) {
        // Propagating backward is most likely to be profitable.
        if (!runBackward())
            break;
        if (!runBackward())
            break;

        // Occasionally propagating forward greatly reduces the likelihood of pathologies.
        if (!runForward())
            break;
    }

    unsigned removedInstCount = 0;
    for (BasicBlock* block : code) {
        removedInstCount += block->insts().removeAllMatching(
            [&] (Inst& inst) -> bool {
                return !isInstLive(inst);
            });
    }

    return !!removedInstCount;
}
コード例 #4
0
void allocateStack(Code& code)
{
    PhaseScope phaseScope(code, "allocateStack");

    // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or
    // is explicitly escaped in the code.
    IndexSet<StackSlot> escapingStackSlots;
    for (StackSlot* slot : code.stackSlots()) {
        if (slot->isLocked())
            escapingStackSlots.add(slot);
    }
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                    if (role == Arg::UseAddr && arg.isStack())
                        escapingStackSlots.add(arg.stackSlot());
                });
        }
    }

    // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for
    // the possibility of stack slots being assigned frame offsets before we even get here.
    ASSERT(!code.frameSize());
    Vector<StackSlot*> assignedEscapedStackSlots;
    Vector<StackSlot*> escapedStackSlotsWorklist;
    for (StackSlot* slot : code.stackSlots()) {
        if (escapingStackSlots.contains(slot)) {
            if (slot->offsetFromFP())
                assignedEscapedStackSlots.append(slot);
            else
                escapedStackSlotsWorklist.append(slot);
        } else {
            // It would be super strange to have an unlocked stack slot that has an offset already.
            ASSERT(!slot->offsetFromFP());
        }
    }
    // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of
    // escaped stack slots.
    while (!escapedStackSlotsWorklist.isEmpty()) {
        StackSlot* slot = escapedStackSlotsWorklist.takeLast();
        assign(slot, assignedEscapedStackSlots);
        assignedEscapedStackSlots.append(slot);
    }

    // Now we handle the anonymous slots.
    StackSlotLiveness liveness(code);
    IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size());
    Vector<StackSlot*> slots;

    for (BasicBlock* block : code) {
        StackSlotLiveness::LocalCalc localCalc(liveness, block);

        auto interfere = [&] (Inst& inst) {
            if (verbose)
                dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n");

            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                    if (!Arg::isDef(role))
                        return;
                    if (!arg.isStack())
                        return;
                    StackSlot* slot = arg.stackSlot();
                    if (slot->kind() != StackSlotKind::Anonymous)
                        return;

                    for (StackSlot* otherSlot : localCalc.live()) {
                        interference[slot].add(otherSlot);
                        interference[otherSlot].add(slot);
                    }
                });
        };

        for (unsigned instIndex = block->size(); instIndex--;) {
            if (verbose)
                dataLog("Analyzing: ", block->at(instIndex), "\n");
            Inst& inst = block->at(instIndex);
            interfere(inst);
            localCalc.execute(instIndex);
        }
        Inst nop;
        interfere(nop);
    }

    if (verbose) {
        for (StackSlot* slot : code.stackSlots())
            dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n");
    }

    // Now we assign stack locations. At its heart this algorithm is just first-fit. For each
    // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no
    // overlap with other StackSlots that this overlaps with.
    Vector<StackSlot*> otherSlots = assignedEscapedStackSlots;
    for (StackSlot* slot : code.stackSlots()) {
        if (slot->offsetFromFP()) {
            // Already assigned an offset.
            continue;
        }

        HashSet<StackSlot*>& interferingSlots = interference[slot];
        otherSlots.resize(assignedEscapedStackSlots.size());
        otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size());
        unsigned nextIndex = assignedEscapedStackSlots.size();
        for (StackSlot* otherSlot : interferingSlots)
            otherSlots[nextIndex++] = otherSlot;

        assign(slot, otherSlots);
    }

    // Figure out how much stack we're using for stack slots.
    unsigned frameSizeForStackSlots = 0;
    for (StackSlot* slot : code.stackSlots()) {
        frameSizeForStackSlots = std::max(
            frameSizeForStackSlots,
            static_cast<unsigned>(-slot->offsetFromFP()));
    }

    frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots);

    // Now we need to deduce how much argument area we need.
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            for (Arg& arg : inst.args) {
                if (arg.isCallArg()) {
                    // For now, we assume that we use 8 bytes of the call arg. But that's not
                    // such an awesome assumption.
                    // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454
                    ASSERT(arg.offset() >= 0);
                    code.requestCallArgAreaSize(arg.offset() + 8);
                }
            }
        }
    }

    code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize());

    // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless
    // transformation since we can search the StackSlots array to figure out which StackSlot any
    // offset-from-FP refers to.

    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            for (Arg& arg : inst.args) {
                switch (arg.kind()) {
                case Arg::Stack:
                    arg = Arg::addr(
                        Tmp(GPRInfo::callFrameRegister),
                        arg.offset() + arg.stackSlot()->offsetFromFP());
                    break;
                case Arg::CallArg:
                    arg = Arg::addr(
                        Tmp(GPRInfo::callFrameRegister),
                        arg.offset() - code.frameSize());
                    break;
                default:
                    break;
                }
            }
        }
    }
}
コード例 #5
0
ファイル: AirAllocateStack.cpp プロジェクト: TigerWFH/webkit
void allocateStack(Code& code)
{
    PhaseScope phaseScope(code, "allocateStack");

    // Perform an escape analysis over stack slots. An escaping stack slot is one that is locked or
    // is explicitly escaped in the code.
    IndexSet<StackSlot> escapingStackSlots;
    for (StackSlot* slot : code.stackSlots()) {
        if (slot->isLocked())
            escapingStackSlots.add(slot);
    }
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                    if (role == Arg::UseAddr && arg.isStack())
                        escapingStackSlots.add(arg.stackSlot());
                });
        }
    }

    // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for
    // the possibility of stack slots being assigned frame offsets before we even get here.
    ASSERT(!code.frameSize());
    Vector<StackSlot*> assignedEscapedStackSlots;
    Vector<StackSlot*> escapedStackSlotsWorklist;
    for (StackSlot* slot : code.stackSlots()) {
        if (escapingStackSlots.contains(slot)) {
            if (slot->offsetFromFP())
                assignedEscapedStackSlots.append(slot);
            else
                escapedStackSlotsWorklist.append(slot);
        } else {
            // It would be super strange to have an unlocked stack slot that has an offset already.
            ASSERT(!slot->offsetFromFP());
        }
    }
    // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of
    // escaped stack slots.
    while (!escapedStackSlotsWorklist.isEmpty()) {
        StackSlot* slot = escapedStackSlotsWorklist.takeLast();
        assign(slot, assignedEscapedStackSlots);
        assignedEscapedStackSlots.append(slot);
    }

    // Now we handle the anonymous slots.
    StackSlotLiveness liveness(code);
    IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size());
    Vector<StackSlot*> slots;

    for (BasicBlock* block : code) {
        StackSlotLiveness::LocalCalc localCalc(liveness, block);

        auto interfere = [&] (unsigned instIndex) {
            if (verbose)
                dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n");

            Inst::forEachDef<Arg>(
                block->get(instIndex), block->get(instIndex + 1),
                [&] (Arg& arg, Arg::Role, Arg::Type, Arg::Width) {
                    if (!arg.isStack())
                        return;
                    StackSlot* slot = arg.stackSlot();
                    if (slot->kind() != StackSlotKind::Anonymous)
                        return;

                    for (StackSlot* otherSlot : localCalc.live()) {
                        interference[slot].add(otherSlot);
                        interference[otherSlot].add(slot);
                    }
                });
        };

        for (unsigned instIndex = block->size(); instIndex--;) {
            if (verbose)
                dataLog("Analyzing: ", block->at(instIndex), "\n");

            // Kill dead stores. For simplicity we say that a store is killable if it has only late
            // defs and those late defs are to things that are dead right now. We only do that
            // because that's the only kind of dead stack store we will see here.
            Inst& inst = block->at(instIndex);
            if (!inst.hasNonArgEffects()) {
                bool ok = true;
                inst.forEachArg(
                    [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) {
                        if (Arg::isEarlyDef(role)) {
                            ok = false;
                            return;
                        }
                        if (!Arg::isLateDef(role))
                            return;
                        if (!arg.isStack()) {
                            ok = false;
                            return;
                        }
                        StackSlot* slot = arg.stackSlot();
                        if (slot->kind() != StackSlotKind::Anonymous) {
                            ok = false;
                            return;
                        }

                        if (localCalc.isLive(slot)) {
                            ok = false;
                            return;
                        }
                    });
                if (ok)
                    inst = Inst();
            }
            
            interfere(instIndex);
            localCalc.execute(instIndex);
        }
        interfere(-1);
        
        block->insts().removeAllMatching(
            [&] (const Inst& inst) -> bool {
                return !inst;
            });
    }

    if (verbose) {
        for (StackSlot* slot : code.stackSlots())
            dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n");
    }

    // Now we assign stack locations. At its heart this algorithm is just first-fit. For each
    // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no
    // overlap with other StackSlots that this overlaps with.
    Vector<StackSlot*> otherSlots = assignedEscapedStackSlots;
    for (StackSlot* slot : code.stackSlots()) {
        if (slot->offsetFromFP()) {
            // Already assigned an offset.
            continue;
        }

        HashSet<StackSlot*>& interferingSlots = interference[slot];
        otherSlots.resize(assignedEscapedStackSlots.size());
        otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size());
        unsigned nextIndex = assignedEscapedStackSlots.size();
        for (StackSlot* otherSlot : interferingSlots)
            otherSlots[nextIndex++] = otherSlot;

        assign(slot, otherSlots);
    }

    // Figure out how much stack we're using for stack slots.
    unsigned frameSizeForStackSlots = 0;
    for (StackSlot* slot : code.stackSlots()) {
        frameSizeForStackSlots = std::max(
            frameSizeForStackSlots,
            static_cast<unsigned>(-slot->offsetFromFP()));
    }

    frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots);

    // Now we need to deduce how much argument area we need.
    for (BasicBlock* block : code) {
        for (Inst& inst : *block) {
            for (Arg& arg : inst.args) {
                if (arg.isCallArg()) {
                    // For now, we assume that we use 8 bytes of the call arg. But that's not
                    // such an awesome assumption.
                    // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454
                    ASSERT(arg.offset() >= 0);
                    code.requestCallArgAreaSize(arg.offset() + 8);
                }
            }
        }
    }

    code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSize());

    // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless
    // transformation since we can search the StackSlots array to figure out which StackSlot any
    // offset-from-FP refers to.

    // FIXME: This may produce addresses that aren't valid if we end up with a ginormous stack frame.
    // We would have to scavenge for temporaries if this happened. Fortunately, this case will be
    // extremely rare so we can do crazy things when it arises.
    // https://bugs.webkit.org/show_bug.cgi?id=152530

    InsertionSet insertionSet(code);
    for (BasicBlock* block : code) {
        for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) {
            Inst& inst = block->at(instIndex);
            inst.forEachArg(
                [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width width) {
                    auto stackAddr = [&] (int32_t offset) -> Arg {
                        return Arg::stackAddr(offset, code.frameSize(), width);
                    };
                    
                    switch (arg.kind()) {
                    case Arg::Stack: {
                        StackSlot* slot = arg.stackSlot();
                        if (Arg::isZDef(role)
                            && slot->kind() == StackSlotKind::Anonymous
                            && slot->byteSize() > Arg::bytes(width)) {
                            // Currently we only handle this simple case because it's the only one
                            // that arises: ZDef's are only 32-bit right now. So, when we hit these
                            // assertions it means that we need to implement those other kinds of
                            // zero fills.
                            RELEASE_ASSERT(slot->byteSize() == 8);
                            RELEASE_ASSERT(width == Arg::Width32);

                            RELEASE_ASSERT(isValidForm(StoreZero32, Arg::Stack));
                            insertionSet.insert(
                                instIndex + 1, StoreZero32, inst.origin,
                                stackAddr(arg.offset() + 4 + slot->offsetFromFP()));
                        }
                        arg = stackAddr(arg.offset() + slot->offsetFromFP());
                        break;
                    }
                    case Arg::CallArg:
                        arg = stackAddr(arg.offset() - code.frameSize());
                        break;
                    default:
                        break;
                    }
                }
            );
        }
        insertionSet.execute(block);
    }
}