void BlockState::processWrite(RLEContext &Ctx, SILInstruction *I, SILValue Mem, SILValue Val, RLEKind Kind) { // Initialize the LSLocation. LSLocation L(Mem); // If we cant figure out the Base or Projection Path for the write, // process it as an unknown memory instruction. if (!L.isValid()) { // we can ignore unknown store instructions if we are computing the // AvailSetMax. if (!isComputeAvailSetMax(Kind)) { processUnknownWriteInst(Ctx, I, Kind); } return; } // Expand the given location and val into individual fields and process // them as separate writes. LSLocationList Locs; LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE()); if (isComputeAvailSetMax(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { updateMaxAvailForwardSetForWrite(Ctx, Ctx.getLocationBit(Locs[i])); } return; } // Are we computing the genset and killset ? if (isComputeAvailGenKillSet(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { updateGenKillSetForWrite(Ctx, Ctx.getLocationBit(Locs[i])); } return; } // Are we computing available set ? if (isComputeAvailSet(Kind)) { for (unsigned i = 0; i < Locs.size(); ++i) { updateForwardSetForWrite(Ctx, Ctx.getLocationBit(Locs[i])); } return; } // Are we computing available value or performing RLE? if (isComputeAvailValue(Kind) || isPerformingRLE(Kind)) { LSValueList Vals; LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE()); for (unsigned i = 0; i < Locs.size(); ++i) { updateForwardSetAndValForWrite(Ctx, Ctx.getLocationBit(Locs[i]), Ctx.getValueBit(Vals[i])); } return; } llvm_unreachable("Unknown RLE compute kind"); }
BlockState::ValueState BlockState::getValueStateAtEndOfBlock(RLEContext &Ctx, LSLocation &L) { LSLocationList Locs; LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE()); // Find number of covering value and concrete values for the locations // expanded from the given location. unsigned CSCount = 0, CTCount = 0; ValueTableMap &OTM = getForwardValOut(); for (auto &X : Locs) { LSValue &V = Ctx.getLSValue(OTM[Ctx.getLSLocationBit(X)]); if (V.isCoveringValue()) { ++CSCount; continue; } ++CTCount; } if (CSCount == Locs.size()) return ValueState::CoverValues; if (CTCount == Locs.size()) return ValueState::ConcreteValues; return ValueState::CoverAndConcreteValues; }
void BlockState::processRead(RLEContext &Ctx, SILInstruction *I, SILValue Mem, SILValue Val, RLEKind Kind) { // Initialize the LSLocation. LSLocation L(Mem); // If we cant figure out the Base or Projection Path for the read, simply // ignore it for now. if (!L.isValid()) return; // Expand the given LSLocation and Val into individual fields and process // them as separate reads. LSLocationList Locs; LSLocation::expand(L, &I->getModule(), Locs, Ctx.getTE()); // Are we computing available set ?. if (isComputeAvailSet(Kind)) { for (auto &X : Locs) { if (isTrackingLSLocation(Ctx.getLSLocationBit(X))) continue; updateForwardSetForRead(Ctx, Ctx.getLSLocationBit(X)); } return; } // Are we computing available values ?. bool CanForward = true; if (isComputeAvailValue(Kind) || isPerformRLE(Kind)) { LSValueList Vals; LSValue::expand(Val, &I->getModule(), Vals, Ctx.getTE()); for (unsigned i = 0; i < Locs.size(); ++i) { if (isTrackingLSLocation(Ctx.getLSLocationBit(Locs[i]))) continue; updateForwardValForRead(Ctx, Ctx.getLSLocationBit(Locs[i]), Ctx.getLSValueBit(Vals[i])); CanForward = false; } } // Simply return if we are not performing RLE or we do not have all the // values available to perform RLE. if (!isPerformRLE(Kind) || !CanForward) return; // Lastly, forward value to the load. setupRLE(Ctx, I, Mem); }
bool RLEContext::gatherLocationValues(SILBasicBlock *BB, LSLocation &L, LSLocationValueMap &Values, ValueTableMap &VM) { LSLocationSet CSLocs; LSLocationList Locs; LSLocation::expand(L, &BB->getModule(), Locs, TE); auto *Mod = &BB->getModule(); // Find the locations that this basic block defines and the locations which // we do not have a concrete value in the current basic block. for (auto &X : Locs) { Values[X] = getLSValue(VM[getLSLocationBit(X)]); if (!Values[X].isCoveringValue()) continue; CSLocs.insert(X); } // For locations which we do not have concrete values for in this basic // block, try to reduce it to the minimum # of locations possible, this // will help us to generate as few SILArguments as possible. LSLocation::reduce(L, Mod, CSLocs, TE); // To handle covering value, we need to go to the predecessors and // materialize them there. for (auto &X : CSLocs) { SILValue V = computePredecessorLocationValue(BB, X); if (!V) return false; // We've constructed a concrete value for the covering value. Expand and // collect the newly created forwardable values. LSLocationList Locs; LSValueList Vals; LSLocation::expand(X, Mod, Locs, TE); LSValue::expand(V, Mod, Vals, TE); for (unsigned i = 0; i < Locs.size(); ++i) { Values[Locs[i]] = Vals[i]; assert(Values[Locs[i]].isValid() && "Invalid load store value"); } } return true; }
SILValue BlockState::reduceValuesAtEndOfBlock(RLEContext &Ctx, LSLocation &L) { // First, collect current available locations and their corresponding values // into a map. LSLocationValueMap Values; LSLocationList Locs; LSLocation::expand(L, &BB->getModule(), Locs, Ctx.getTE()); // Find the values that this basic block defines and the locations which // we do not have a concrete value in the current basic block. ValueTableMap &OTM = getForwardValOut(); for (unsigned i = 0; i < Locs.size(); ++i) { Values[Locs[i]] = Ctx.getLSValue(OTM[Ctx.getLSLocationBit(Locs[i])]); } // Second, reduce the available values into a single SILValue we can use to // forward. SILValue TheForwardingValue; TheForwardingValue = LSValue::reduce(L, &BB->getModule(), Values, BB->getTerminator(), Ctx.getTE()); /// Return the forwarding value. return TheForwardingValue; }
SILValue LSValue::reduce(LSLocation &Base, SILModule *M, LSLocationValueMap &Values, SILInstruction *InsertPt, TypeExpansionAnalysis *TE) { // Walk bottom up the projection tree, try to reason about how to construct // a single SILValue out of all the available values for all the memory // locations. // // First, get a list of all the leaf nodes and intermediate nodes for the // Base memory location. LSLocationList ALocs; ProjectionPath &BasePath = Base.getPath().getValue(); for (const auto &P : TE->getTypeExpansionProjectionPaths(Base.getType(), M, TEKind::TENode)) { ALocs.push_back(LSLocation(Base.getBase(), P.getValue(), BasePath)); } // Second, go from leaf nodes to their parents. This guarantees that at the // point the parent is processed, its children have been processed already. for (auto I = ALocs.rbegin(), E = ALocs.rend(); I != E; ++I) { // This is a leaf node, we have a value for it. // // Reached the end of the projection tree, this is a leaf node. LSLocationList FirstLevel; I->getFirstLevelLSLocations(FirstLevel, M); if (FirstLevel.empty()) continue; // If this is a class reference type, we have reached end of the type tree. if (I->getType().getClassOrBoundGenericClass()) continue; // This is NOT a leaf node, we need to construct a value for it. // There is only 1 children node and its value's projection path is not // empty, keep stripping it. auto Iter = FirstLevel.begin(); LSValue &FirstVal = Values[*Iter]; if (FirstLevel.size() == 1 && !FirstVal.hasEmptyProjectionPath()) { Values[*I] = FirstVal.stripLastLevelProjection(); // We have a value for the parent, remove all the values for children. removeLSLocations(Values, FirstLevel); continue; } // If there are more than 1 children and all the children nodes have // LSValues with the same base and non-empty projection path. we can get // away by not extracting value for every single field. // // Simply create a new node with all the aggregated base value, i.e. // stripping off the last level projection. bool HasIdenticalValueBase = true; SILValue FirstBase = FirstVal.getBase(); Iter = std::next(Iter); for (auto EndIter = FirstLevel.end(); Iter != EndIter; ++Iter) { LSValue &V = Values[*Iter]; HasIdenticalValueBase &= (FirstBase == V.getBase()); } if (FirstLevel.size() > 1 && HasIdenticalValueBase && !FirstVal.hasEmptyProjectionPath()) { Values[*I] = FirstVal.stripLastLevelProjection(); // We have a value for the parent, remove all the values for children. removeLSLocations(Values, FirstLevel); continue; } // In 3 cases do we need aggregation. // // 1. If there is only 1 child and we cannot strip off any projections, // that means we need to create an aggregation. // // 2. There are multiple children and they have the same base, but empty // projection paths. // // 3. Children have values from different bases, We need to create // extractions and aggregation in this case. // llvm::SmallVector<SILValue, 8> Vals; for (auto &X : FirstLevel) { Vals.push_back(Values[X].materialize(InsertPt)); } SILBuilder Builder(InsertPt); // We use an auto-generated SILLocation for now. // TODO: make the sil location more precise. NullablePtr<swift::SILInstruction> AI = Projection::createAggFromFirstLevelProjections( Builder, RegularLocation::getAutoGeneratedLocation(), I->getType(), Vals); // This is the Value for the current node. ProjectionPath P; Values[*I] = LSValue(SILValue(AI.get()), P); removeLSLocations(Values, FirstLevel); // Keep iterating until we have reach the top-most level of the projection // tree. // i.e. the memory location represented by the Base. } assert(Values.size() == 1 && "Should have a single location this point"); // Finally materialize and return the forwarding SILValue. return Values.begin()->second.materialize(InsertPt); }
void LSValue::reduceInner(LSLocation &Base, SILModule *M, LSLocationValueMap &Values, SILInstruction *InsertPt) { // If this is a class reference type, we have reached end of the type tree. if (Base.getType(M).getClassOrBoundGenericClass()) return; // This is a leaf node, we must have a value for it. LSLocationList NextLevel; Base.getNextLevelLSLocations(NextLevel, M); if (NextLevel.empty()) return; // This is not a leaf node, reduce the next level node one by one. for (auto &X : NextLevel) { LSValue::reduceInner(X, M, Values, InsertPt); } // This is NOT a leaf node, we need to construct a value for it. auto Iter = NextLevel.begin(); LSValue &FirstVal = Values[*Iter]; // There is only 1 children node and its value's projection path is not // empty, keep stripping it. if (NextLevel.size() == 1 && !FirstVal.hasEmptyProjectionPath()) { Values[Base] = FirstVal.stripLastLevelProjection(); // We have a value for the parent, remove all the values for children. removeLSLocations(Values, NextLevel); return; } bool HasIdenticalBase = true; SILValue FirstBase = FirstVal.getBase(); for (auto &X : NextLevel) { HasIdenticalBase &= (FirstBase == Values[X].getBase()); } // This is NOT a leaf node and it has multiple children, but they have the // same value base. if (NextLevel.size() > 1 && HasIdenticalBase) { if (!FirstVal.hasEmptyProjectionPath()) { Values[Base] = FirstVal.stripLastLevelProjection(); // We have a value for the parent, remove all the values for children. removeLSLocations(Values, NextLevel); return; } } // In 3 cases do we need aggregation. // // 1. If there is only 1 child and we cannot strip off any projections, // that means we need to create an aggregation. // // 2. There are multiple children and they have the same base, but empty // projection paths. // // 3. Children have values from different bases, We need to create // extractions and aggregation in this case. // llvm::SmallVector<SILValue, 8> Vals; for (auto &X : NextLevel) { Vals.push_back(Values[X].materialize(InsertPt)); } SILBuilder Builder(InsertPt); Builder.setCurrentDebugScope(InsertPt->getFunction()->getDebugScope()); // We use an auto-generated SILLocation for now. NullablePtr<swift::SILInstruction> AI = Projection::createAggFromFirstLevelProjections( Builder, RegularLocation::getAutoGeneratedLocation(), Base.getType(M).getObjectType(), Vals); // This is the Value for the current base. ProjectionPath P(Base.getType(M)); Values[Base] = LSValue(SILValue(AI.get()), P); removeLSLocations(Values, NextLevel); }
void DSEContext::processWrite(SILInstruction *I, SILValue Val, SILValue Mem, DSEKind Kind) { auto *S = getBlockState(I); // Construct a LSLocation to represent the memory read by this instruction. // NOTE: The base will point to the actual object this inst is accessing, // not this particular field. // // e.g. %1 = alloc_stack $S // %2 = struct_element_addr %1, #a // store %3 to %2 : $*Int // // Base will point to %1, but not %2. Projection path will indicate which // field is accessed. // // This will make comparison between locations easier. This eases the // implementation of intersection operator in the data flow. LSLocation L; if (BaseToLocIndex.find(Mem) != BaseToLocIndex.end()) { L = BaseToLocIndex[Mem]; } else { SILValue UO = getUnderlyingObject(Mem); L = LSLocation(UO, ProjectionPath::getProjectionPath(UO, Mem)); } // If we can't figure out the Base or Projection Path for the store // instruction, simply ignore it. if (!L.isValid()) return; // Expand the given Mem into individual fields and process them as separate // writes. bool Dead = true; LSLocationList Locs; LSLocation::expand(L, Mod, Locs, TE); llvm::SmallBitVector V(Locs.size()); // Are we computing max store set. if (isComputeMaxStoreSet(Kind)) { for (auto &E : Locs) { // Update the max store set for the basic block. processWriteForMaxStoreSet(S, getLocationBit(E)); } return; } // Are we computing genset and killset. if (isBuildingGenKillSet(Kind)) { for (auto &E : Locs) { // Only building the gen and kill sets here. processWriteForGenKillSet(S, getLocationBit(E)); } // Data flow has not stabilized, do not perform the DSE just yet. return; } // We are doing the actual DSE. assert(isPerformingDSE(Kind) && "Invalid computation kind"); unsigned idx = 0; for (auto &E : Locs) { // This is the last iteration, compute BBWriteSetOut and perform the dead // store elimination. if (processWriteForDSE(S, getLocationBit(E))) V.set(idx); Dead &= V.test(idx); ++idx; } // Fully dead store - stores to all the components are dead, therefore this // instruction is dead. if (Dead) { DEBUG(llvm::dbgs() << "Instruction Dead: " << *I << "\n"); S->DeadStores.insert(I); ++NumDeadStores; return; } // Partial dead store - stores to some locations are dead, but not all. This // is a partially dead store. Also at this point we know what locations are // dead. llvm::DenseSet<LSLocation> Alives; if (V.any()) { // Take out locations that are dead. for (unsigned i = 0; i < V.size(); ++i) { if (V.test(i)) continue; // This location is alive. Alives.insert(Locs[i]); } // Try to create as few aggregated stores as possible out of the locations. LSLocation::reduce(L, Mod, Alives); // Oops, we have too many smaller stores generated, bail out. if (Alives.size() > MaxPartialStoreCount) return; // At this point, we are performing a partial dead store elimination. // // Locations here have a projection path from their Base, but this // particular instruction may not be accessing the base, so we need to // *rebase* the locations w.r.t. to the current instruction. SILValue B = Locs[0].getBase(); Optional<ProjectionPath> BP = ProjectionPath::getProjectionPath(B, Mem); // Strip off the projection path from base to the accessed field. for (auto &X : Alives) { X.removePathPrefix(BP); } // We merely setup the remaining live stores, but do not materialize in IR // yet, These stores will be materialized before the algorithm exits. for (auto &X : Alives) { SILValue Value = X.getPath()->createExtract(Val, I, true); SILValue Addr = X.getPath()->createExtract(Mem, I, false); S->LiveAddr.insert(Addr); S->LiveStores[Addr] = Value; } // Lastly, mark the old store as dead. DEBUG(llvm::dbgs() << "Instruction Partially Dead: " << *I << "\n"); S->DeadStores.insert(I); ++NumPartialDeadStores; } }