Ejemplo n.º 1
0
/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
/// edge and spill them.
void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
                                           ArrayRef<InvokeInst *> Invokes) {
  // Finally, scan the code looking for instructions with bad live ranges.
  for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
    for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE;
         ++II) {
      // Ignore obvious cases we don't have to handle. In particular, most
      // instructions either have no uses or only have a single use inside the
      // current block. Ignore them quickly.
      Instruction *Inst = II;
      if (Inst->use_empty())
        continue;
      if (Inst->hasOneUse() &&
          cast<Instruction>(Inst->user_back())->getParent() == BB &&
          !isa<PHINode>(Inst->user_back()))
        continue;

      // If this is an alloca in the entry block, it's not a real register
      // value.
      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
        if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin())
          continue;

      // Avoid iterator invalidation by copying users to a temporary vector.
      SmallVector<Instruction *, 16> Users;
      for (User *U : Inst->users()) {
        Instruction *UI = cast<Instruction>(U);
        if (UI->getParent() != BB || isa<PHINode>(UI))
          Users.push_back(UI);
      }

      // Find all of the blocks that this value is live in.
      SmallPtrSet<BasicBlock *, 64> LiveBBs;
      LiveBBs.insert(Inst->getParent());
      while (!Users.empty()) {
        Instruction *U = Users.back();
        Users.pop_back();

        if (!isa<PHINode>(U)) {
          MarkBlocksLiveIn(U->getParent(), LiveBBs);
        } else {
          // Uses for a PHI node occur in their predecessor block.
          PHINode *PN = cast<PHINode>(U);
          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
            if (PN->getIncomingValue(i) == Inst)
              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
        }
      }

      // Now that we know all of the blocks that this thing is live in, see if
      // it includes any of the unwind locations.
      bool NeedsSpill = false;
      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
          DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
                       << UnwindBlock->getName() << "\n");
          NeedsSpill = true;
          break;
        }
      }

      // If we decided we need a spill, do it.
      // FIXME: Spilling this way is overkill, as it forces all uses of
      // the value to be reloaded from the stack slot, even those that aren't
      // in the unwind blocks. We should be more selective.
      if (NeedsSpill) {
        DemoteRegToStack(*Inst, true);
        ++NumSpilled;
      }
    }
  }

  // Go through the landing pads and remove any PHIs there.
  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
    BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
    LandingPadInst *LPI = UnwindBlock->getLandingPadInst();

    // Place PHIs into a set to avoid invalidating the iterator.
    SmallPtrSet<PHINode *, 8> PHIsToDemote;
    for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
      PHIsToDemote.insert(cast<PHINode>(PN));
    if (PHIsToDemote.empty())
      continue;

    // Demote the PHIs to the stack.
    for (SmallPtrSet<PHINode *, 8>::iterator I = PHIsToDemote.begin(),
                                             E = PHIsToDemote.end();
         I != E; ++I)
      DemotePHIToStack(*I);

    // Move the landingpad instruction back to the top of the landing pad block.
    LPI->moveBefore(UnwindBlock->begin());
  }
}
Ejemplo n.º 2
0
/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
/// this value inserts the new computation that produces the shifted value.
static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                              InstCombiner &IC) {
  // We can always evaluate constants shifted.
  if (Constant *C = dyn_cast<Constant>(V)) {
    if (isLeftShift)
      V = IC.Builder->CreateShl(C, NumBits);
    else
      V = IC.Builder->CreateLShr(C, NumBits);
    // If we got a constantexpr back, try to simplify it with TD info.
    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
      V = ConstantFoldConstantExpression(CE, IC.getDataLayout(),
                                         IC.getTargetLibraryInfo());
    return V;
  }

  Instruction *I = cast<Instruction>(V);
  IC.Worklist.Add(I);

  switch (I->getOpcode()) {
  default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor:
    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
    I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
    return I;

  case Instruction::Shl: {
    BinaryOperator *BO = cast<BinaryOperator>(I);
    unsigned TypeWidth = BO->getType()->getScalarSizeInBits();

    // We only accept shifts-by-a-constant in CanEvaluateShifted.
    ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));

    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
    if (isLeftShift) {
      // If this is oversized composite shift, then unsigned shifts get 0.
      unsigned NewShAmt = NumBits+CI->getZExtValue();
      if (NewShAmt >= TypeWidth)
        return Constant::getNullValue(I->getType());

      BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
      BO->setHasNoUnsignedWrap(false);
      BO->setHasNoSignedWrap(false);
      return I;
    }

    // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
    // zeros.
    if (CI->getValue() == NumBits) {
      APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
      V = IC.Builder->CreateAnd(BO->getOperand(0),
                                ConstantInt::get(BO->getContext(), Mask));
      if (Instruction *VI = dyn_cast<Instruction>(V)) {
        VI->moveBefore(BO);
        VI->takeName(BO);
      }
      return V;
    }

    // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
    // the and won't be needed.
    assert(CI->getZExtValue() > NumBits);
    BO->setOperand(1, ConstantInt::get(BO->getType(),
                                       CI->getZExtValue() - NumBits));
    BO->setHasNoUnsignedWrap(false);
    BO->setHasNoSignedWrap(false);
    return BO;
  }
  case Instruction::LShr: {
    BinaryOperator *BO = cast<BinaryOperator>(I);
    unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
    // We only accept shifts-by-a-constant in CanEvaluateShifted.
    ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));

    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
    if (!isLeftShift) {
      // If this is oversized composite shift, then unsigned shifts get 0.
      unsigned NewShAmt = NumBits+CI->getZExtValue();
      if (NewShAmt >= TypeWidth)
        return Constant::getNullValue(BO->getType());

      BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
      BO->setIsExact(false);
      return I;
    }

    // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
    // zeros.
    if (CI->getValue() == NumBits) {
      APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
      V = IC.Builder->CreateAnd(I->getOperand(0),
                                ConstantInt::get(BO->getContext(), Mask));
      if (Instruction *VI = dyn_cast<Instruction>(V)) {
        VI->moveBefore(I);
        VI->takeName(I);
      }
      return V;
    }

    // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
    // the and won't be needed.
    assert(CI->getZExtValue() > NumBits);
    BO->setOperand(1, ConstantInt::get(BO->getType(),
                                       CI->getZExtValue() - NumBits));
    BO->setIsExact(false);
    return BO;
  }

  case Instruction::Select:
    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
    I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
    return I;
  case Instruction::PHI: {
    // We can change a phi if we can change all operands.  Note that we never
    // get into trouble with cyclic PHIs here because we only consider
    // instructions with a single use.
    PHINode *PN = cast<PHINode>(I);
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
      PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
                                              NumBits, isLeftShift, IC));
    return PN;
  }
  }
}
Ejemplo n.º 3
0
/// CanEvaluateShifted - See if we can compute the specified value, but shifted
/// logically to the left or right by some number of bits.  This should return
/// true if the expression can be computed for the same cost as the current
/// expression tree.  This is used to eliminate extraneous shifting from things
/// like:
///      %C = shl i128 %A, 64
///      %D = shl i128 %B, 96
///      %E = or i128 %C, %D
///      %F = lshr i128 %E, 64
/// where the client will ask if E can be computed shifted right by 64-bits.  If
/// this succeeds, the GetShiftedValue function will be called to produce the
/// value.
static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
                               InstCombiner &IC) {
  // We can always evaluate constants shifted.
  if (isa<Constant>(V))
    return true;

  Instruction *I = dyn_cast<Instruction>(V);
  if (!I) return false;

  // If this is the opposite shift, we can directly reuse the input of the shift
  // if the needed bits are already zero in the input.  This allows us to reuse
  // the value which means that we don't care if the shift has multiple uses.
  //  TODO:  Handle opposite shift by exact value.
  ConstantInt *CI = 0;
  if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
      (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
    if (CI->getZExtValue() == NumBits) {
      // TODO: Check that the input bits are already zero with MaskedValueIsZero
#if 0
      // If this is a truncate of a logical shr, we can truncate it to a smaller
      // lshr iff we know that the bits we would otherwise be shifting in are
      // already zeros.
      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
      uint32_t BitWidth = Ty->getScalarSizeInBits();
      if (MaskedValueIsZero(I->getOperand(0),
            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
          CI->getLimitedValue(BitWidth) < BitWidth) {
        return CanEvaluateTruncated(I->getOperand(0), Ty);
      }
#endif

    }
  }

  // We can't mutate something that has multiple uses: doing so would
  // require duplicating the instruction in general, which isn't profitable.
  if (!I->hasOneUse()) return false;

  switch (I->getOpcode()) {
  default: return false;
  case Instruction::And:
  case Instruction::Or:
  case Instruction::Xor:
    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
    return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
           CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);

  case Instruction::Shl: {
    // We can often fold the shift into shifts-by-a-constant.
    CI = dyn_cast<ConstantInt>(I->getOperand(1));
    if (CI == 0) return false;

    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
    if (isLeftShift) return true;

    // We can always turn shl(c)+shr(c) -> and(c2).
    if (CI->getValue() == NumBits) return true;

    unsigned TypeWidth = I->getType()->getScalarSizeInBits();

    // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
    // profitable unless we know the and'd out bits are already zero.
    if (CI->getZExtValue() > NumBits) {
      unsigned LowBits = TypeWidth - CI->getZExtValue();
      if (MaskedValueIsZero(I->getOperand(0),
                       APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
        return true;
    }

    return false;
  }
  case Instruction::LShr: {
    // We can often fold the shift into shifts-by-a-constant.
    CI = dyn_cast<ConstantInt>(I->getOperand(1));
    if (CI == 0) return false;

    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
    if (!isLeftShift) return true;

    // We can always turn lshr(c)+shl(c) -> and(c2).
    if (CI->getValue() == NumBits) return true;

    unsigned TypeWidth = I->getType()->getScalarSizeInBits();

    // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
    // profitable unless we know the and'd out bits are already zero.
    if (CI->getValue().ult(TypeWidth) && CI->getZExtValue() > NumBits) {
      unsigned LowBits = CI->getZExtValue() - NumBits;
      if (MaskedValueIsZero(I->getOperand(0),
                          APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
        return true;
    }

    return false;
  }
  case Instruction::Select: {
    SelectInst *SI = cast<SelectInst>(I);
    return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
           CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
  }
  case Instruction::PHI: {
    // We can change a phi if we can change all operands.  Note that we never
    // get into trouble with cyclic PHIs here because we only consider
    // instructions with a single use.
    PHINode *PN = cast<PHINode>(I);
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
      if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
        return false;
    return true;
  }
  }
}
Ejemplo n.º 4
0
// If we have a PHI node with a vector type that has only 2 uses: feed
// itself and be an operand of extractelement at a constant location,
// try to replace the PHI of the vector type with a PHI of a scalar type.
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
  // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
  if (!PN->hasNUses(2))
    return nullptr;

  // If so, it's known at this point that one operand is PHI and the other is
  // an extractelement node. Find the PHI user that is not the extractelement
  // node.
  auto iu = PN->user_begin();
  Instruction *PHIUser = dyn_cast<Instruction>(*iu);
  if (PHIUser == cast<Instruction>(&EI))
    PHIUser = cast<Instruction>(*(++iu));

  // Verify that this PHI user has one use, which is the PHI itself,
  // and that it is a binary operation which is cheap to scalarize.
  // otherwise return NULL.
  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
      !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
    return nullptr;

  // Create a scalar PHI node that will replace the vector PHI node
  // just before the current PHI node.
  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
      PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
  // Scalarize each PHI operand.
  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
    Value *PHIInVal = PN->getIncomingValue(i);
    BasicBlock *inBB = PN->getIncomingBlock(i);
    Value *Elt = EI.getIndexOperand();
    // If the operand is the PHI induction variable:
    if (PHIInVal == PHIUser) {
      // Scalarize the binary operation. Its first operand is the
      // scalar PHI, and the second operand is extracted from the other
      // vector operand.
      BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
      unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
      Value *Op = InsertNewInstWith(
          ExtractElementInst::Create(B0->getOperand(opId), Elt,
                                     B0->getOperand(opId)->getName() + ".Elt"),
          *B0);
      Value *newPHIUser = InsertNewInstWith(
          BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0);
      scalarPHI->addIncoming(newPHIUser, inBB);
    } else {
      // Scalarize PHI input:
      Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
      // Insert the new instruction into the predecessor basic block.
      Instruction *pos = dyn_cast<Instruction>(PHIInVal);
      BasicBlock::iterator InsertPos;
      if (pos && !isa<PHINode>(pos)) {
        InsertPos = ++pos->getIterator();
      } else {
        InsertPos = inBB->getFirstInsertionPt();
      }

      InsertNewInstWith(newEI, *InsertPos);

      scalarPHI->addIncoming(newEI, inBB);
    }
  }
  return replaceInstUsesWith(EI, scalarPHI);
}
Ejemplo n.º 5
0
void
DebriefDlg::Show()
{
    FormWindow::Show();
    Game::SetTimeCompression(1);

    mission  = 0;
    campaign = Campaign::GetCampaign();
    sim      = Sim::GetSim();

    if (sim)
    ship  = sim->GetPlayerShip();

    if (campaign)
    mission = campaign->GetMission();

    if (mission_name) {
        if (mission)
        mission_name->SetText(mission->Name());
        else
        mission_name->SetText(Game::GetText("DebriefDlg.mission-name"));
    }

    if (mission_system) {
        mission_system->SetText("");

        if (mission) {
            StarSystem* sys = mission->GetStarSystem();

            if (sys)
            mission_system->SetText(sys->Name());
        }
    }

    if (mission_sector) {
        mission_sector->SetText("");

        if (mission) {
            MissionElement* elem = mission->GetElements()[0];

            if (elem)
            mission_sector->SetText(elem->Region());
        }
    }

    if (mission_time_start) {
        if (mission) {
            char txt[32];
            FormatDayTime(txt, mission->Start());
            mission_time_start->SetText(txt);
        }
    }

    if (objectives) {
        bool found_objectives = false;

        if (sim && sim->GetPlayerElement()) {
            Text     text;
            Element* elem = sim->GetPlayerElement();

            for (int i = 0; i < elem->NumObjectives(); i++) {
                Instruction* obj = elem->GetObjective(i);
                text += Text("* ") + obj->GetDescription() + Text("\n");

                found_objectives = true;
            }

            objectives->SetText(text);
        }

        if (!found_objectives) {
            if (mission)
            objectives->SetText(mission->Objective());
            else
            objectives->SetText(Game::GetText("DebriefDlg.unspecified"));
        }
    }

    if (situation) {
        if (mission)
        situation->SetText(mission->Situation());
        else
        situation->SetText(Game::GetText("DebriefDlg.unknown"));
    }

    if (mission_score) {
        mission_score->SetText(Game::GetText("DebriefDlg.no-stats"));

        if (ship) {
            for (int i = 0; i < ShipStats::NumStats(); i++) {
                ShipStats* stats = ShipStats::GetStats(i);
                if (stats && !strcmp(ship->Name(), stats->GetName())) {
                    stats->Summarize();

                    Player* player = Player::GetCurrentPlayer();
                    int     points = stats->GetPoints() + stats->GetCommandPoints();

                    if (player && sim)
                        points = player->GetMissionPoints(stats, sim->StartTime()) + stats->GetCommandPoints();

                    char score[32];
                    sprintf_s(score, "%d %s", points, Game::GetText("DebriefDlg.points").data());
                    mission_score->SetText(score);
                    break;
                }
            }
        }
    }

    DrawUnits();
}
Ejemplo n.º 6
0
/// \brief Assign DWARF discriminators.
///
/// To assign discriminators, we examine the boundaries of every
/// basic block and its successors. Suppose there is a basic block B1
/// with successor B2. The last instruction I1 in B1 and the first
/// instruction I2 in B2 are located at the same file and line number.
/// This situation is illustrated in the following code snippet:
///
///       if (i < 10) x = i;
///
///     entry:
///       br i1 %cmp, label %if.then, label %if.end, !dbg !10
///     if.then:
///       %1 = load i32* %i.addr, align 4, !dbg !10
///       store i32 %1, i32* %x, align 4, !dbg !10
///       br label %if.end, !dbg !10
///     if.end:
///       ret void, !dbg !12
///
/// Notice how the branch instruction in block 'entry' and all the
/// instructions in block 'if.then' have the exact same debug location
/// information (!dbg !10).
///
/// To distinguish instructions in block 'entry' from instructions in
/// block 'if.then', we generate a new lexical block for all the
/// instruction in block 'if.then' that share the same file and line
/// location with the last instruction of block 'entry'.
///
/// This new lexical block will have the same location information as
/// the previous one, but with a new DWARF discriminator value.
///
/// One of the main uses of this discriminator value is in runtime
/// sample profilers. It allows the profiler to distinguish instructions
/// at location !dbg !10 that execute on different basic blocks. This is
/// important because while the predicate 'if (x < 10)' may have been
/// executed millions of times, the assignment 'x = i' may have only
/// executed a handful of times (meaning that the entry->if.then edge is
/// seldom taken).
///
/// If we did not have discriminator information, the profiler would
/// assign the same weight to both blocks 'entry' and 'if.then', which
/// in turn will make it conclude that the entry->if.then edge is very
/// hot.
///
/// To decide where to create new discriminator values, this function
/// traverses the CFG and examines instruction at basic block boundaries.
/// If the last instruction I1 of a block B1 is at the same file and line
/// location as instruction I2 of successor B2, then it creates a new
/// lexical block for I2 and all the instruction in B2 that share the same
/// file and line location as I2. This new lexical block will have a
/// different discriminator number than I1.
bool AddDiscriminators::runOnFunction(Function &F) {
  // If the function has debug information, but the user has disabled
  // discriminators, do nothing.
  // Simlarly, if the function has no debug info, do nothing.
  // Finally, if this module is built with dwarf versions earlier than 4,
  // do nothing (discriminator support is a DWARF 4 feature).
  if (NoDiscriminators ||
      !hasDebugInfo(F) ||
      F.getParent()->getDwarfVersion() < 4)
    return false;

  bool Changed = false;
  Module *M = F.getParent();
  LLVMContext &Ctx = M->getContext();
  DIBuilder Builder(*M, /*AllowUnresolved*/ false);

  // Traverse all the blocks looking for instructions in different
  // blocks that are at the same file:line location.
  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
    BasicBlock *B = I;
    TerminatorInst *Last = B->getTerminator();
    DILocation LastDIL = Last->getDebugLoc().get();
    if (!LastDIL)
      continue;

    for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
      BasicBlock *Succ = Last->getSuccessor(I);
      Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
      DILocation FirstDIL = First->getDebugLoc().get();
      if (!FirstDIL)
        continue;

      // If the first instruction (First) of Succ is at the same file
      // location as B's last instruction (Last), add a new
      // discriminator for First's location and all the instructions
      // in Succ that share the same location with First.
      if (!FirstDIL->canDiscriminate(*LastDIL)) {
        // Create a new lexical scope and compute a new discriminator
        // number for it.
        StringRef Filename = FirstDIL->getFilename();
        auto *Scope = FirstDIL->getScope();
        auto *File = Builder.createFile(Filename, Scope->getDirectory());

        // FIXME: Calculate the discriminator here, based on local information,
        // and delete MDLocation::computeNewDiscriminator().  The current
        // solution gives different results depending on other modules in the
        // same context.  All we really need is to discriminate between
        // FirstDIL and LastDIL -- a local map would suffice.
        unsigned Discriminator = FirstDIL->computeNewDiscriminator();
        auto *NewScope =
            Builder.createLexicalBlockFile(Scope, File, Discriminator);
        auto *NewDIL =
            MDLocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
                            NewScope, FirstDIL->getInlinedAt());
        DebugLoc newDebugLoc = NewDIL;

        // Attach this new debug location to First and every
        // instruction following First that shares the same location.
        for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
             ++I1) {
          if (I1->getDebugLoc().get() != FirstDIL)
            break;
          I1->setDebugLoc(newDebugLoc);
          DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
                       << ":" << NewDIL->getColumn() << ":"
                       << NewDIL->getDiscriminator() << *I1 << "\n");
        }
        DEBUG(dbgs() << "\n");
        Changed = true;
      }
    }
  }
  return Changed;
}
Ejemplo n.º 7
0
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                       BasicBlock::const_iterator StartingInst,
                                       std::vector<const BasicBlock*> &ToClone){
  WeakVH &BBEntry = VMap[BB];

  // Have we already cloned this block?
  if (BBEntry) return;
  
  // Nope, clone it now.
  BasicBlock *NewBB;
  BBEntry = NewBB = BasicBlock::Create(BB->getContext());
  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);

  // It is only legal to clone a function if a block address within that
  // function is never referenced outside of the function.  Given that, we
  // want to map block addresses from the old function to block addresses in
  // the clone. (This is different from the generic ValueMapper
  // implementation, which generates an invalid blockaddress when
  // cloning a function.)
  //
  // Note that we don't need to fix the mapping for unreachable blocks;
  // the default mapping there is safe.
  if (BB->hasAddressTaken()) {
    Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
                                            const_cast<BasicBlock*>(BB));
    VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
  }

  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;

  // Loop over all instructions, and copy them over, DCE'ing as we go.  This
  // loop doesn't include the terminator.
  for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
       II != IE; ++II) {

    Instruction *NewInst = II->clone();

    // Eagerly remap operands to the newly cloned instruction, except for PHI
    // nodes for which we defer processing until we update the CFG.
    if (!isa<PHINode>(NewInst)) {
      RemapInstruction(NewInst, VMap,
                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);

      // If we can simplify this instruction to some other value, simply add
      // a mapping to that value rather than inserting a new instruction into
      // the basic block.
      if (Value *V =
              SimplifyInstruction(NewInst, BB->getModule()->getDataLayout())) {
        // On the off-chance that this simplifies to an instruction in the old
        // function, map it back into the new function.
        if (Value *MappedV = VMap.lookup(V))
          V = MappedV;

        VMap[&*II] = V;
        delete NewInst;
        continue;
      }
    }

    if (II->hasName())
      NewInst->setName(II->getName()+NameSuffix);
    VMap[&*II] = NewInst; // Add instruction map to value.
    NewBB->getInstList().push_back(NewInst);
    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));

    if (CodeInfo)
      if (auto CS = ImmutableCallSite(&*II))
        if (CS.hasOperandBundles())
          CodeInfo->OperandBundleCallSites.push_back(NewInst);

    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
      if (isa<ConstantInt>(AI->getArraySize()))
        hasStaticAllocas = true;
      else
        hasDynamicAllocas = true;
    }
  }
  
  // Finally, clone over the terminator.
  const TerminatorInst *OldTI = BB->getTerminator();
  bool TerminatorDone = false;
  if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
    if (BI->isConditional()) {
      // If the condition was a known constant in the callee...
      ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
      // Or is a known constant in the caller...
      if (!Cond) {
        Value *V = VMap.lookup(BI->getCondition());
        Cond = dyn_cast_or_null<ConstantInt>(V);
      }

      // Constant fold to uncond branch!
      if (Cond) {
        BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
        VMap[OldTI] = BranchInst::Create(Dest, NewBB);
        ToClone.push_back(Dest);
        TerminatorDone = true;
      }
    }
  } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
    // If switching on a value known constant in the caller.
    ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
    if (!Cond) { // Or known constant after constant prop in the callee...
      Value *V = VMap.lookup(SI->getCondition());
      Cond = dyn_cast_or_null<ConstantInt>(V);
    }
    if (Cond) {     // Constant fold to uncond branch!
      SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
      BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
      VMap[OldTI] = BranchInst::Create(Dest, NewBB);
      ToClone.push_back(Dest);
      TerminatorDone = true;
    }
  }
  
  if (!TerminatorDone) {
    Instruction *NewInst = OldTI->clone();
    if (OldTI->hasName())
      NewInst->setName(OldTI->getName()+NameSuffix);
    NewBB->getInstList().push_back(NewInst);
    VMap[OldTI] = NewInst;             // Add instruction map to value.

    if (CodeInfo)
      if (auto CS = ImmutableCallSite(OldTI))
        if (CS.hasOperandBundles())
          CodeInfo->OperandBundleCallSites.push_back(NewInst);

    // Recursively clone any reachable successor blocks.
    const TerminatorInst *TI = BB->getTerminator();
    for (const BasicBlock *Succ : TI->successors())
      ToClone.push_back(Succ);
  }
  
  if (CodeInfo) {
    CodeInfo->ContainsCalls          |= hasCalls;
    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
      BB != &BB->getParent()->front();
  }
}
Ejemplo n.º 8
0
void StatsTracker::computeReachableUncovered() {
  KModule *km = executor.kmodule;
  Module *m = km->module;
  static bool init = true;
  const InstructionInfoTable &infos = *km->infos;
  StatisticManager &sm = *theStatisticManager;
  
  if (init) {
    init = false;

    // Compute call targets. It would be nice to use alias information
    // instead of assuming all indirect calls hit all escaping
    // functions, eh?
    for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); 
         fnIt != fn_ie; ++fnIt) {
      for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); 
           bbIt != bb_ie; ++bbIt) {
        for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); 
             it != ie; ++it) {
          if (isa<CallInst>(it) || isa<InvokeInst>(it)) {
            CallSite cs(it);
            if (isa<InlineAsm>(cs.getCalledValue())) {
              // We can never call through here so assume no targets
              // (which should be correct anyhow).
              callTargets.insert(std::make_pair(it,
                                                std::vector<Function*>()));
            } else if (Function *target = getDirectCallTarget(cs)) {
              callTargets[it].push_back(target);
            } else {
              callTargets[it] = 
                std::vector<Function*>(km->escapingFunctions.begin(),
                                       km->escapingFunctions.end());
            }
          }
        }
      }
    }

    // Compute function callers as reflexion of callTargets.
    for (calltargets_ty::iterator it = callTargets.begin(), 
           ie = callTargets.end(); it != ie; ++it)
      for (std::vector<Function*>::iterator fit = it->second.begin(), 
             fie = it->second.end(); fit != fie; ++fit) 
        functionCallers[*fit].push_back(it->first);

    // Initialize minDistToReturn to shortest paths through
    // functions. 0 is unreachable.
    std::vector<Instruction *> instructions;
    for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); 
         fnIt != fn_ie; ++fnIt) {
      if (fnIt->isDeclaration()) {
        if (fnIt->doesNotReturn()) {
          functionShortestPath[fnIt] = 0;
        } else {
          functionShortestPath[fnIt] = 1; // whatever
        }
      } else {
        functionShortestPath[fnIt] = 0;
      }

      // Not sure if I should bother to preorder here. XXX I should.
      for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); 
           bbIt != bb_ie; ++bbIt) {
        for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); 
             it != ie; ++it) {
          instructions.push_back(it);
          unsigned id = infos.getInfo(it).id;
          sm.setIndexedValue(stats::minDistToReturn, 
                             id, 
                             isa<ReturnInst>(it)
#if LLVM_VERSION_CODE < LLVM_VERSION(3, 1)
                             || isa<UnwindInst>(it)
#endif
                             );
        }
      }
    }
  
    std::reverse(instructions.begin(), instructions.end());
    
    // I'm so lazy it's not even worklisted.
    bool changed;
    do {
      changed = false;
      for (std::vector<Instruction*>::iterator it = instructions.begin(),
             ie = instructions.end(); it != ie; ++it) {
        Instruction *inst = *it;
        unsigned bestThrough = 0;

        if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
          std::vector<Function*> &targets = callTargets[inst];
          for (std::vector<Function*>::iterator fnIt = targets.begin(),
                 ie = targets.end(); fnIt != ie; ++fnIt) {
            uint64_t dist = functionShortestPath[*fnIt];
            if (dist) {
              dist = 1+dist; // count instruction itself
              if (bestThrough==0 || dist<bestThrough)
                bestThrough = dist;
            }
          }
        } else {
          bestThrough = 1;
        }
       
        if (bestThrough) {
          unsigned id = infos.getInfo(*it).id;
          uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToReturn, id);
          std::vector<Instruction*> succs = getSuccs(*it);
          for (std::vector<Instruction*>::iterator it2 = succs.begin(),
                 ie = succs.end(); it2 != ie; ++it2) {
            uint64_t dist = sm.getIndexedValue(stats::minDistToReturn,
                                               infos.getInfo(*it2).id);
            if (dist) {
              uint64_t val = bestThrough + dist;
              if (best==0 || val<best)
                best = val;
            }
          }
          // there's a corner case here when a function only includes a single
          // instruction (a ret). in that case, we MUST update
          // functionShortestPath, or it will remain 0 (erroneously indicating
          // that no return instructions are reachable)
          Function *f = inst->getParent()->getParent();
          if (best != cur
              || (inst == f->begin()->begin()
                  && functionShortestPath[f] != best)) {
            sm.setIndexedValue(stats::minDistToReturn, id, best);
            changed = true;

            // Update shortest path if this is the entry point.
            if (inst==f->begin()->begin())
              functionShortestPath[f] = best;
          }
        }
      }
    } while (changed);
  }

  // compute minDistToUncovered, 0 is unreachable
  std::vector<Instruction *> instructions;
  for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); 
       fnIt != fn_ie; ++fnIt) {
    // Not sure if I should bother to preorder here.
    for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); 
         bbIt != bb_ie; ++bbIt) {
      for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); 
           it != ie; ++it) {
        unsigned id = infos.getInfo(it).id;
        instructions.push_back(&*it);
        sm.setIndexedValue(stats::minDistToUncovered, 
                           id, 
                           sm.getIndexedValue(stats::uncoveredInstructions, id));
      }
    }
  }
  
  std::reverse(instructions.begin(), instructions.end());
  
  // I'm so lazy it's not even worklisted.
  bool changed;
  do {
    changed = false;
    for (std::vector<Instruction*>::iterator it = instructions.begin(),
           ie = instructions.end(); it != ie; ++it) {
      Instruction *inst = *it;
      uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToUncovered, 
                                                     infos.getInfo(inst).id);
      unsigned bestThrough = 0;
      
      if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
        std::vector<Function*> &targets = callTargets[inst];
        for (std::vector<Function*>::iterator fnIt = targets.begin(),
               ie = targets.end(); fnIt != ie; ++fnIt) {
          uint64_t dist = functionShortestPath[*fnIt];
          if (dist) {
            dist = 1+dist; // count instruction itself
            if (bestThrough==0 || dist<bestThrough)
              bestThrough = dist;
          }

          if (!(*fnIt)->isDeclaration()) {
            uint64_t calleeDist = sm.getIndexedValue(stats::minDistToUncovered,
                                                     infos.getFunctionInfo(*fnIt).id);
            if (calleeDist) {
              calleeDist = 1+calleeDist; // count instruction itself
              if (best==0 || calleeDist<best)
                best = calleeDist;
            }
          }
        }
      } else {
        bestThrough = 1;
      }
      
      if (bestThrough) {
        std::vector<Instruction*> succs = getSuccs(inst);
        for (std::vector<Instruction*>::iterator it2 = succs.begin(),
               ie = succs.end(); it2 != ie; ++it2) {
          uint64_t dist = sm.getIndexedValue(stats::minDistToUncovered,
                                             infos.getInfo(*it2).id);
          if (dist) {
            uint64_t val = bestThrough + dist;
            if (best==0 || val<best)
              best = val;
          }
        }
      }

      if (best != cur) {
        sm.setIndexedValue(stats::minDistToUncovered, 
                           infos.getInfo(inst).id, 
                           best);
        changed = true;
      }
    }
  } while (changed);

  for (std::set<ExecutionState*>::iterator it = executor.states.begin(),
         ie = executor.states.end(); it != ie; ++it) {
    ExecutionState *es = *it;
    uint64_t currentFrameMinDist = 0;
#if MULTITHREAD
	for (Thread::stack_ty::iterator sfIt = es->stack().begin(),
				sf_ie = es->stack().end(); sfIt != sf_ie; ++sfIt) {
		Thread::stack_ty::iterator next = sfIt + 1;
		KInstIterator kii;
		if (next==es->stack().end()) {
			kii = es->pc();
#else
	for (ExecutionState::stack_ty::iterator sfIt = es->stack.begin(),
           sf_ie = es->stack.end(); sfIt != sf_ie; ++sfIt) {
      ExecutionState::stack_ty::iterator next = sfIt + 1;
      KInstIterator kii;

      if (next==es->stack.end()) {
        kii = es->pc;
#endif
      } else {
        kii = next->caller;
        ++kii;
      }
      
      sfIt->minDistToUncoveredOnReturn = currentFrameMinDist;
      
      currentFrameMinDist = computeMinDistToUncovered(kii, currentFrameMinDist);
    }
  }
}
Ejemplo n.º 9
0
/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
  SmallPtrSet<Function*, 8> SCCNodes;

  // Fill SCCNodes with the elements of the SCC.  Used for quickly
  // looking up whether a given CallGraphNode is in this SCC.
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
    SCCNodes.insert((*I)->getFunction());

  // Check if any of the functions in the SCC read or write memory.  If they
  // write memory then they can't be marked readnone or readonly.
  bool ReadsMemory = false;
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    Function *F = (*I)->getFunction();

    if (F == 0)
      // External node - may write memory.  Just give up.
      return false;

    AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
    if (MRB == AliasAnalysis::DoesNotAccessMemory)
      // Already perfect!
      continue;

    // Definitions with weak linkage may be overridden at linktime with
    // something that writes memory, so treat them like declarations.
    if (F->isDeclaration() || F->mayBeOverridden()) {
      if (!AliasAnalysis::onlyReadsMemory(MRB))
        // May write memory.  Just give up.
        return false;

      ReadsMemory = true;
      continue;
    }

    // Scan the function body for instructions that may read or write memory.
    for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
      Instruction *I = &*II;

      // Some instructions can be ignored even if they read or write memory.
      // Detect these now, skipping to the next instruction if one is found.
      CallSite CS(cast<Value>(I));
      if (CS) {
        // Ignore calls to functions in the same SCC.
        if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
          continue;
        AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
        // If the call doesn't access arbitrary memory, we may be able to
        // figure out something.
        if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
          // If the call does access argument pointees, check each argument.
          if (AliasAnalysis::doesAccessArgPointees(MRB))
            // Check whether all pointer arguments point to local memory, and
            // ignore calls that only access local memory.
            for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
                 CI != CE; ++CI) {
              Value *Arg = *CI;
              if (Arg->getType()->isPointerTy()) {
                AliasAnalysis::Location Loc(Arg,
                                            AliasAnalysis::UnknownSize,
                                            I->getMetadata(LLVMContext::MD_tbaa));
                if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
                  if (MRB & AliasAnalysis::Mod)
                    // Writes non-local memory.  Give up.
                    return false;
                  if (MRB & AliasAnalysis::Ref)
                    // Ok, it reads non-local memory.
                    ReadsMemory = true;
                }
              }
            }
          continue;
        }
        // The call could access any memory. If that includes writes, give up.
        if (MRB & AliasAnalysis::Mod)
          return false;
        // If it reads, note it.
        if (MRB & AliasAnalysis::Ref)
          ReadsMemory = true;
        continue;
      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
        // Ignore non-volatile loads from local memory. (Atomic is okay here.)
        if (!LI->isVolatile()) {
          AliasAnalysis::Location Loc = AA->getLocation(LI);
          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
            continue;
        }
      } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
        // Ignore non-volatile stores to local memory. (Atomic is okay here.)
        if (!SI->isVolatile()) {
          AliasAnalysis::Location Loc = AA->getLocation(SI);
          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
            continue;
        }
      } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
        // Ignore vaargs on local memory.
        AliasAnalysis::Location Loc = AA->getLocation(VI);
        if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
          continue;
      }

      // Any remaining instructions need to be taken seriously!  Check if they
      // read or write memory.
      if (I->mayWriteToMemory())
        // Writes memory.  Just give up.
        return false;

      // If this instruction may read memory, remember that.
      ReadsMemory |= I->mayReadFromMemory();
    }
  }

  // Success!  Functions in this SCC do not access memory, or only read memory.
  // Give them the appropriate attribute.
  bool MadeChange = false;
  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
    Function *F = (*I)->getFunction();

    if (F->doesNotAccessMemory())
      // Already perfect!
      continue;

    if (F->onlyReadsMemory() && ReadsMemory)
      // No change.
      continue;

    MadeChange = true;

    // Clear out any existing attributes.
    AttrBuilder B;
    B.addAttribute(Attributes::ReadOnly)
      .addAttribute(Attributes::ReadNone);
    F->removeAttribute(AttributeSet::FunctionIndex,
                       Attributes::get(F->getContext(), B));

    // Add in the new attribute.
    B.clear();
    B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone);
    F->addAttribute(AttributeSet::FunctionIndex,
                    Attributes::get(F->getContext(), B));

    if (ReadsMemory)
      ++NumReadOnly;
    else
      ++NumReadNone;
  }

  return MadeChange;
}
Ejemplo n.º 10
0
MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
                               const TargetTransformInfo *TTI) {

  // DemandedBits will give us every value's live-out bits. But we want
  // to ensure no extra casts would need to be inserted, so every DAG
  // of connected values must have the same minimum bitwidth.
  EquivalenceClasses<Value *> ECs;
  SmallVector<Value *, 16> Worklist;
  SmallPtrSet<Value *, 4> Roots;
  SmallPtrSet<Value *, 16> Visited;
  DenseMap<Value *, uint64_t> DBits;
  SmallPtrSet<Instruction *, 4> InstructionSet;
  MapVector<Instruction *, uint64_t> MinBWs;

  // Determine the roots. We work bottom-up, from truncs or icmps.
  bool SeenExtFromIllegalType = false;
  for (auto *BB : Blocks)
    for (auto &I : *BB) {
      InstructionSet.insert(&I);

      if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
          !TTI->isTypeLegal(I.getOperand(0)->getType()))
        SeenExtFromIllegalType = true;

      // Only deal with non-vector integers up to 64-bits wide.
      if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
          !I.getType()->isVectorTy() &&
          I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
        // Don't make work for ourselves. If we know the loaded type is legal,
        // don't add it to the worklist.
        if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
          continue;

        Worklist.push_back(&I);
        Roots.insert(&I);
      }
    }
  // Early exit.
  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
    return MinBWs;

  // Now proceed breadth-first, unioning values together.
  while (!Worklist.empty()) {
    Value *Val = Worklist.pop_back_val();
    Value *Leader = ECs.getOrInsertLeaderValue(Val);

    if (Visited.count(Val))
      continue;
    Visited.insert(Val);

    // Non-instructions terminate a chain successfully.
    if (!isa<Instruction>(Val))
      continue;
    Instruction *I = cast<Instruction>(Val);

    // If we encounter a type that is larger than 64 bits, we can't represent
    // it so bail out.
    if (DB.getDemandedBits(I).getBitWidth() > 64)
      return MapVector<Instruction *, uint64_t>();

    uint64_t V = DB.getDemandedBits(I).getZExtValue();
    DBits[Leader] |= V;
    DBits[I] = V;

    // Casts, loads and instructions outside of our range terminate a chain
    // successfully.
    if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
        !InstructionSet.count(I))
      continue;

    // Unsafe casts terminate a chain unsuccessfully. We can't do anything
    // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
    // transform anything that relies on them.
    if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
        !I->getType()->isIntegerTy()) {
      DBits[Leader] |= ~0ULL;
      continue;
    }

    // We don't modify the types of PHIs. Reductions will already have been
    // truncated if possible, and inductions' sizes will have been chosen by
    // indvars.
    if (isa<PHINode>(I))
      continue;

    if (DBits[Leader] == ~0ULL)
      // All bits demanded, no point continuing.
      continue;

    for (Value *O : cast<User>(I)->operands()) {
      ECs.unionSets(Leader, O);
      Worklist.push_back(O);
    }
  }

  // Now we've discovered all values, walk them to see if there are
  // any users we didn't see. If there are, we can't optimize that
  // chain.
  for (auto &I : DBits)
    for (auto *U : I.first->users())
      if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
        DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;

  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
    uint64_t LeaderDemandedBits = 0;
    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
      LeaderDemandedBits |= DBits[*MI];

    uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
                     llvm::countLeadingZeros(LeaderDemandedBits);
    // Round up to a power of 2
    if (!isPowerOf2_64((uint64_t)MinBW))
      MinBW = NextPowerOf2(MinBW);

    // We don't modify the types of PHIs. Reductions will already have been
    // truncated if possible, and inductions' sizes will have been chosen by
    // indvars.
    // If we are required to shrink a PHI, abandon this entire equivalence class.
    bool Abort = false;
    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
      if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
        Abort = true;
        break;
      }
    if (Abort)
      continue;

    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
      if (!isa<Instruction>(*MI))
        continue;
      Type *Ty = (*MI)->getType();
      if (Roots.count(*MI))
        Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
      if (MinBW < Ty->getScalarSizeInBits())
        MinBWs[cast<Instruction>(*MI)] = MinBW;
    }
  }

  return MinBWs;
}
Ejemplo n.º 11
0
// Analyze interleaved accesses and collect them into interleaved load and
// store groups.
//
// When generating code for an interleaved load group, we effectively hoist all
// loads in the group to the location of the first load in program order. When
// generating code for an interleaved store group, we sink all stores to the
// location of the last store. This code motion can change the order of load
// and store instructions and may break dependences.
//
// The code generation strategy mentioned above ensures that we won't violate
// any write-after-read (WAR) dependences.
//
// E.g., for the WAR dependence:  a = A[i];      // (1)
//                                A[i] = b;      // (2)
//
// The store group of (2) is always inserted at or below (2), and the load
// group of (1) is always inserted at or above (1). Thus, the instructions will
// never be reordered. All other dependences are checked to ensure the
// correctness of the instruction reordering.
//
// The algorithm visits all memory accesses in the loop in bottom-up program
// order. Program order is established by traversing the blocks in the loop in
// reverse postorder when collecting the accesses.
//
// We visit the memory accesses in bottom-up order because it can simplify the
// construction of store groups in the presence of write-after-write (WAW)
// dependences.
//
// E.g., for the WAW dependence:  A[i] = a;      // (1)
//                                A[i] = b;      // (2)
//                                A[i + 1] = c;  // (3)
//
// We will first create a store group with (3) and (2). (1) can't be added to
// this group because it and (2) are dependent. However, (1) can be grouped
// with other accesses that may precede it in program order. Note that a
// bottom-up order does not imply that WAW dependences should not be checked.
void InterleavedAccessInfo::analyzeInterleaving(
                                 bool EnablePredicatedInterleavedMemAccesses) {
  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
  const ValueToValueMap &Strides = LAI->getSymbolicStrides();

  // Holds all accesses with a constant stride.
  MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
  collectConstStrideAccesses(AccessStrideInfo, Strides);

  if (AccessStrideInfo.empty())
    return;

  // Collect the dependences in the loop.
  collectDependences();

  // Holds all interleaved store groups temporarily.
  SmallSetVector<InterleaveGroup *, 4> StoreGroups;
  // Holds all interleaved load groups temporarily.
  SmallSetVector<InterleaveGroup *, 4> LoadGroups;

  // Search in bottom-up program order for pairs of accesses (A and B) that can
  // form interleaved load or store groups. In the algorithm below, access A
  // precedes access B in program order. We initialize a group for B in the
  // outer loop of the algorithm, and then in the inner loop, we attempt to
  // insert each A into B's group if:
  //
  //  1. A and B have the same stride,
  //  2. A and B have the same memory object size, and
  //  3. A belongs in B's group according to its distance from B.
  //
  // Special care is taken to ensure group formation will not break any
  // dependences.
  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
       BI != E; ++BI) {
    Instruction *B = BI->first;
    StrideDescriptor DesB = BI->second;

    // Initialize a group for B if it has an allowable stride. Even if we don't
    // create a group for B, we continue with the bottom-up algorithm to ensure
    // we don't break any of B's dependences.
    InterleaveGroup *Group = nullptr;
    if (isStrided(DesB.Stride) && 
        (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
      Group = getInterleaveGroup(B);
      if (!Group) {
        LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
                          << '\n');
        Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
      }
      if (B->mayWriteToMemory())
        StoreGroups.insert(Group);
      else
        LoadGroups.insert(Group);
    }

    for (auto AI = std::next(BI); AI != E; ++AI) {
      Instruction *A = AI->first;
      StrideDescriptor DesA = AI->second;

      // Our code motion strategy implies that we can't have dependences
      // between accesses in an interleaved group and other accesses located
      // between the first and last member of the group. Note that this also
      // means that a group can't have more than one member at a given offset.
      // The accesses in a group can have dependences with other accesses, but
      // we must ensure we don't extend the boundaries of the group such that
      // we encompass those dependent accesses.
      //
      // For example, assume we have the sequence of accesses shown below in a
      // stride-2 loop:
      //
      //  (1, 2) is a group | A[i]   = a;  // (1)
      //                    | A[i-1] = b;  // (2) |
      //                      A[i-3] = c;  // (3)
      //                      A[i]   = d;  // (4) | (2, 4) is not a group
      //
      // Because accesses (2) and (3) are dependent, we can group (2) with (1)
      // but not with (4). If we did, the dependent access (3) would be within
      // the boundaries of the (2, 4) group.
      if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
        // If a dependence exists and A is already in a group, we know that A
        // must be a store since A precedes B and WAR dependences are allowed.
        // Thus, A would be sunk below B. We release A's group to prevent this
        // illegal code motion. A will then be free to form another group with
        // instructions that precede it.
        if (isInterleaved(A)) {
          InterleaveGroup *StoreGroup = getInterleaveGroup(A);
          StoreGroups.remove(StoreGroup);
          releaseGroup(StoreGroup);
        }

        // If a dependence exists and A is not already in a group (or it was
        // and we just released it), B might be hoisted above A (if B is a
        // load) or another store might be sunk below A (if B is a store). In
        // either case, we can't add additional instructions to B's group. B
        // will only form a group with instructions that it precedes.
        break;
      }

      // At this point, we've checked for illegal code motion. If either A or B
      // isn't strided, there's nothing left to do.
      if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
        continue;

      // Ignore A if it's already in a group or isn't the same kind of memory
      // operation as B.
      // Note that mayReadFromMemory() isn't mutually exclusive to
      // mayWriteToMemory in the case of atomic loads. We shouldn't see those
      // here, canVectorizeMemory() should have returned false - except for the
      // case we asked for optimization remarks.
      if (isInterleaved(A) ||
          (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
          (A->mayWriteToMemory() != B->mayWriteToMemory()))
        continue;

      // Check rules 1 and 2. Ignore A if its stride or size is different from
      // that of B.
      if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
        continue;

      // Ignore A if the memory object of A and B don't belong to the same
      // address space
      if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
        continue;

      // Calculate the distance from A to B.
      const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
          PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
      if (!DistToB)
        continue;
      int64_t DistanceToB = DistToB->getAPInt().getSExtValue();

      // Check rule 3. Ignore A if its distance to B is not a multiple of the
      // size.
      if (DistanceToB % static_cast<int64_t>(DesB.Size))
        continue;

      // All members of a predicated interleave-group must have the same predicate,
      // and currently must reside in the same BB.
      BasicBlock *BlockA = A->getParent();  
      BasicBlock *BlockB = B->getParent();  
      if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
          (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
        continue;

      // The index of A is the index of B plus A's distance to B in multiples
      // of the size.
      int IndexA =
          Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);

      // Try to insert A into B's group.
      if (Group->insertMember(A, IndexA, DesA.Align)) {
        LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
                          << "    into the interleave group with" << *B
                          << '\n');
        InterleaveGroupMap[A] = Group;

        // Set the first load in program order as the insert position.
        if (A->mayReadFromMemory())
          Group->setInsertPos(A);
      }
    } // Iteration over A accesses.
  }   // Iteration over B accesses.

  // Remove interleaved store groups with gaps.
  for (InterleaveGroup *Group : StoreGroups)
    if (Group->getNumMembers() != Group->getFactor()) {
      LLVM_DEBUG(
          dbgs() << "LV: Invalidate candidate interleaved store group due "
                    "to gaps.\n");
      releaseGroup(Group);
    }
  // Remove interleaved groups with gaps (currently only loads) whose memory
  // accesses may wrap around. We have to revisit the getPtrStride analysis,
  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
  // not check wrapping (see documentation there).
  // FORNOW we use Assume=false;
  // TODO: Change to Assume=true but making sure we don't exceed the threshold
  // of runtime SCEV assumptions checks (thereby potentially failing to
  // vectorize altogether).
  // Additional optional optimizations:
  // TODO: If we are peeling the loop and we know that the first pointer doesn't
  // wrap then we can deduce that all pointers in the group don't wrap.
  // This means that we can forcefully peel the loop in order to only have to
  // check the first pointer for no-wrap. When we'll change to use Assume=true
  // we'll only need at most one runtime check per interleaved group.
  for (InterleaveGroup *Group : LoadGroups) {
    // Case 1: A full group. Can Skip the checks; For full groups, if the wide
    // load would wrap around the address space we would do a memory access at
    // nullptr even without the transformation.
    if (Group->getNumMembers() == Group->getFactor())
      continue;

    // Case 2: If first and last members of the group don't wrap this implies
    // that all the pointers in the group don't wrap.
    // So we check only group member 0 (which is always guaranteed to exist),
    // and group member Factor - 1; If the latter doesn't exist we rely on
    // peeling (if it is a non-reveresed accsess -- see Case 3).
    Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
    if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
                      /*ShouldCheckWrap=*/true)) {
      LLVM_DEBUG(
          dbgs() << "LV: Invalidate candidate interleaved group due to "
                    "first group member potentially pointer-wrapping.\n");
      releaseGroup(Group);
      continue;
    }
    Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
    if (LastMember) {
      Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
      if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
                        /*ShouldCheckWrap=*/true)) {
        LLVM_DEBUG(
            dbgs() << "LV: Invalidate candidate interleaved group due to "
                      "last group member potentially pointer-wrapping.\n");
        releaseGroup(Group);
      }
    } else {
      // Case 3: A non-reversed interleaved load group with gaps: We need
      // to execute at least one scalar epilogue iteration. This will ensure
      // we don't speculatively access memory out-of-bounds. We only need
      // to look for a member at index factor - 1, since every group must have
      // a member at index zero.
      if (Group->isReverse()) {
        LLVM_DEBUG(
            dbgs() << "LV: Invalidate candidate interleaved group due to "
                      "a reverse access with gaps.\n");
        releaseGroup(Group);
        continue;
      }
      LLVM_DEBUG(
          dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
      RequiresScalarEpilogue = true;
    }
  }
}
Ejemplo n.º 12
0
bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
  Value *AAPtr;
  const SCEV *AlignSCEV, *OffSCEV;
  if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
    return false;

  const SCEV *AASCEV = SE->getSCEV(AAPtr);

  // Apply the assumption to all other users of the specified pointer.
  SmallPtrSet<Instruction *, 32> Visited;
  SmallVector<Instruction*, 16> WorkList;
  for (User *J : AAPtr->users()) {
    if (J == ACall)
      continue;

    if (Instruction *K = dyn_cast<Instruction>(J))
      if (isValidAssumeForContext(ACall, K, DL, DT))
        WorkList.push_back(K);
  }

  while (!WorkList.empty()) {
    Instruction *J = WorkList.pop_back_val();

    if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
      unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
        LI->getPointerOperand(), SE);

      if (NewAlignment > LI->getAlignment()) {
        LI->setAlignment(NewAlignment);
        ++NumLoadAlignChanged;
      }
    } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
      unsigned NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
        SI->getPointerOperand(), SE);

      if (NewAlignment > SI->getAlignment()) {
        SI->setAlignment(NewAlignment);
        ++NumStoreAlignChanged;
      }
    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
      unsigned NewDestAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
        MI->getDest(), SE);

      // For memory transfers, we need a common alignment for both the
      // source and destination. If we have a new alignment for this
      // instruction, but only for one operand, save it. If we reach the
      // other operand through another assumption later, then we may
      // change the alignment at that point.
      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
        unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
          MTI->getSource(), SE);

        DenseMap<MemTransferInst *, unsigned>::iterator DI =
          NewDestAlignments.find(MTI);
        unsigned AltDestAlignment = (DI == NewDestAlignments.end()) ?
                                    0 : DI->second;

        DenseMap<MemTransferInst *, unsigned>::iterator SI =
          NewSrcAlignments.find(MTI);
        unsigned AltSrcAlignment = (SI == NewSrcAlignments.end()) ?
                                   0 : SI->second;

        DEBUG(dbgs() << "\tmem trans: " << NewDestAlignment << " " <<
                        AltDestAlignment << " " << NewSrcAlignment <<
                        " " << AltSrcAlignment << "\n");

        // Of these four alignments, pick the largest possible...
        unsigned NewAlignment = 0;
        if (NewDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
          NewAlignment = std::max(NewAlignment, NewDestAlignment);
        if (AltDestAlignment <= std::max(NewSrcAlignment, AltSrcAlignment))
          NewAlignment = std::max(NewAlignment, AltDestAlignment);
        if (NewSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
          NewAlignment = std::max(NewAlignment, NewSrcAlignment);
        if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment))
          NewAlignment = std::max(NewAlignment, AltSrcAlignment);

        if (NewAlignment > MI->getAlignment()) {
          MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
            MI->getParent()->getContext()), NewAlignment));
          ++NumMemIntAlignChanged;
        }

        NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment));
        NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment));
      } else if (NewDestAlignment > MI->getAlignment()) {
        assert((!isa<MemIntrinsic>(MI) || isa<MemSetInst>(MI)) &&
               "Unknown memory intrinsic");

        MI->setAlignment(ConstantInt::get(Type::getInt32Ty(
          MI->getParent()->getContext()), NewDestAlignment));
        ++NumMemIntAlignChanged;
      }
    }

    // Now that we've updated that use of the pointer, look for other uses of
    // the pointer to update.
    Visited.insert(J);
    for (User *UJ : J->users()) {
      Instruction *K = cast<Instruction>(UJ);
      if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DL, DT))
        WorkList.push_back(K);
    }
  }

  return true;
}
Ejemplo n.º 13
0
	virtual bool runOnModule(Module &M) {

		LLVMContext &C = M.getContext();
		Function *printError_func = (Function*)M.getOrInsertFunction("printErrorMessage", Type::getVoidTy(C), NULL);

		BasicBlock* entryBlock = BasicBlock::Create(C, "", printError_func);
		IRBuilder<> builder(entryBlock);

		Constant *msg = ConstantArray::get(C, "ERROR!  Array Index Out of Bounds", true);

		Constant *zero_32 = Constant::getNullValue(IntegerType::getInt32Ty(C));
		Constant *gep_params[] = {zero_32, zero_32};


		GlobalVariable *errorMsg = new GlobalVariable(M, msg->getType(), true, GlobalValue::InternalLinkage, msg, "errorMsg");
		Function *puts_func = (Function*)(M.getOrInsertFunction("puts", IntegerType::getInt32Ty(C), PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL));
		Constant *msgptr = ConstantExpr::getGetElementPtr(errorMsg, gep_params);

		Value *puts_params[] = {msgptr};

		CallInst *puts_call = builder.CreateCall(puts_func, puts_params);
		puts_call->setTailCall(false);

		Function *exit_func = cast<Function>(M.getOrInsertFunction("exit", IntegerType::getVoidTy(C), Type::getInt32Ty(C),NULL));

		Value *exit_val = ConstantInt::get(IntegerType::getInt32Ty(C), 1);
		
		//create exit block.  This block prints the error and calls exit system function
		BasicBlock* exitBlock = BasicBlock::Create(C, "exitBlock", printError_func);
		builder.CreateBr(exitBlock);
		builder.SetInsertPoint(exitBlock);


		builder.CreateCall(exit_func,exit_val);
		builder.CreateBr(exitBlock);

		int checksInserted = 0;

		for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
		{
			//leave func defs alone
			if (!MI->isDeclaration()) 
			{
				for (inst_iterator I = inst_begin(*MI), E = inst_end(*MI); I != E; ++I)
				{
					Instruction *inst = &*I;
					
					if(GetElementPtrInst *gep = dyn_cast<GetElementPtrInst>(inst))
					{
						if (const ArrayType *ar = dyn_cast<ArrayType>(gep->getPointerOperandType()->getElementType()))
						{           
							//increment checks inserted counter
							checksInserted++;
	
							//create split in basic block for function call insertion (branch)
							Instruction* next = inst->getNextNode();
							BasicBlock* oldBlock = inst->getParent();
							BasicBlock* newBlock = SplitBlock(oldBlock, next, this);

							//get upper limit and index used
							unsigned upperLim = ar->getNumElements();
							int index = gep->getNumOperands() - 1;
							Value *vIndexUsed = gep->getOperand(index);
							Value *vUpperLimit = ConstantInt::get(vIndexUsed->getType(), upperLim);

							BasicBlock* checkUpperBlock = BasicBlock::Create(C, "checkUpperBlock", MI, newBlock);
							BasicBlock* checkLowerBlock = BasicBlock::Create(C, "checkLowerBlock", MI, checkUpperBlock);
							
							builder.SetInsertPoint(oldBlock);
							
							//remove old terminator
							TerminatorInst* term = oldBlock->getTerminator();
							term->eraseFromParent();
							//insert new one
							builder.CreateBr(checkUpperBlock);
							
							//configure uppper bound test
							builder.SetInsertPoint(checkUpperBlock);
							Value* condUpperInst = builder.CreateICmpSLT(vIndexUsed, vUpperLimit, "checkUpperBounds");
							BasicBlock* errorBlock = BasicBlock::Create(C, "errorBlock", MI, newBlock);
							builder.CreateCondBr(condUpperInst, checkLowerBlock, errorBlock);

							//configure lower bound test
							builder.SetInsertPoint(checkLowerBlock);
							Value *vLowerLimit = ConstantInt::get(vIndexUsed->getType(), -1);
							Value *condLowerInst = builder.CreateICmpSGT(vIndexUsed, vLowerLimit, "checkLowerBounds");
							builder.CreateCondBr(condLowerInst, newBlock, errorBlock);

							//setup error block.  All this block does is call func to print error and exit
							builder.SetInsertPoint(errorBlock);
							builder.CreateCall(printError_func);
							builder.CreateBr(errorBlock);
						}
					}				
				}
			}
		}
		errs() << "This pass has inserted " << checksInserted << " checks\n";
      return true;
	}
Ejemplo n.º 14
0
/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
/// the function context and marking the call sites with the appropriate
/// values. These values are used by the DWARF EH emitter.
bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
  SmallVector<ReturnInst *, 16> Returns;
  SmallVector<InvokeInst *, 16> Invokes;
  SmallSetVector<LandingPadInst *, 16> LPads;

  // Look through the terminators of the basic blocks to find invokes.
  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
      if (Function *Callee = II->getCalledFunction())
        if (Callee->isIntrinsic() &&
            Callee->getIntrinsicID() == Intrinsic::donothing) {
          // Remove the NOP invoke.
          BranchInst::Create(II->getNormalDest(), II);
          II->eraseFromParent();
          continue;
        }

      Invokes.push_back(II);
      LPads.insert(II->getUnwindDest()->getLandingPadInst());
    } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
      Returns.push_back(RI);
    }

  if (Invokes.empty())
    return false;

  NumInvokes += Invokes.size();

  lowerIncomingArguments(F);
  lowerAcrossUnwindEdges(F, Invokes);

  Value *FuncCtx =
      setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
  BasicBlock *EntryBB = F.begin();
  IRBuilder<> Builder(EntryBB->getTerminator());

  // Get a reference to the jump buffer.
  Value *JBufPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 5, "jbuf_gep");

  // Save the frame pointer.
  Value *FramePtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 0, "jbuf_fp_gep");

  Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp");
  Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true);

  // Save the stack pointer.
  Value *StackPtr = Builder.CreateConstGEP2_32(JBufPtr, 0, 2, "jbuf_sp_gep");

  Val = Builder.CreateCall(StackAddrFn, "sp");
  Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);

  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
  Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy());
  Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg);

  // Store a pointer to the function context so that the back-end will know
  // where to look for it.
  Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy());
  Builder.CreateCall(FuncCtxFn, FuncCtxArg);

  // At this point, we are all set up, update the invoke instructions to mark
  // their call_site values.
  for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
    insertCallSiteStore(Invokes[I], I + 1);

    ConstantInt *CallSiteNum =
        ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);

    // Record the call site value for the back end so it stays associated with
    // the invoke.
    CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]);
  }

  // Mark call instructions that aren't nounwind as no-action (call_site ==
  // -1). Skip the entry block, as prior to then, no function context has been
  // created for this function and any unexpected exceptions thrown will go
  // directly to the caller's context, which is what we want anyway, so no need
  // to do anything here.
  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;)
    for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
      if (CallInst *CI = dyn_cast<CallInst>(I)) {
        if (!CI->doesNotThrow())
          insertCallSiteStore(CI, -1);
      } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
        insertCallSiteStore(RI, -1);
      }

  // Register the function context and make sure it's known to not throw
  CallInst *Register =
      CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator());
  Register->setDoesNotThrow();

  // Following any allocas not in the entry block, update the saved SP in the
  // jmpbuf to the new value.
  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
    if (BB == F.begin())
      continue;
    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
      if (CallInst *CI = dyn_cast<CallInst>(I)) {
        if (CI->getCalledFunction() != StackRestoreFn)
          continue;
      } else if (!isa<AllocaInst>(I)) {
        continue;
      }
      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
      StackAddr->insertAfter(I);
      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
      StoreStackAddr->insertAfter(StackAddr);
    }
  }

  // Finally, for any returns from this function, if this function contains an
  // invoke, add a call to unregister the function context.
  for (unsigned I = 0, E = Returns.size(); I != E; ++I)
    CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]);

  return true;
}
static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
                    const SimplifyQuery &SQ) {
  bool FnChanged = false;
  // Visiting in a pre-order depth-first traversal causes us to simplify early
  // blocks before querying later blocks (which require us to analyze early
  // blocks).  Eagerly simplifying shallow blocks means there is strictly less
  // work to do for deep blocks.  This also means we don't visit unreachable
  // blocks.
  for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
    bool BBChanged = false;
    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
      Instruction *II = &*BI++;
      switch (II->getOpcode()) {
      case Instruction::Select:
        BBChanged |= processSelect(cast<SelectInst>(II), LVI);
        break;
      case Instruction::PHI:
        BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ);
        break;
      case Instruction::ICmp:
      case Instruction::FCmp:
        BBChanged |= processCmp(cast<CmpInst>(II), LVI);
        break;
      case Instruction::Load:
      case Instruction::Store:
        BBChanged |= processMemAccess(II, LVI);
        break;
      case Instruction::Call:
      case Instruction::Invoke:
        BBChanged |= processCallSite(CallSite(II), LVI);
        break;
      case Instruction::SRem:
        BBChanged |= processSRem(cast<BinaryOperator>(II), LVI);
        break;
      case Instruction::SDiv:
        BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI);
        break;
      case Instruction::UDiv:
      case Instruction::URem:
        BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI);
        break;
      case Instruction::AShr:
        BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
        break;
      case Instruction::Add:
        BBChanged |= processAdd(cast<BinaryOperator>(II), LVI);
        break;
      }
    }

    Instruction *Term = BB->getTerminator();
    switch (Term->getOpcode()) {
    case Instruction::Switch:
      BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI, DT);
      break;
    case Instruction::Ret: {
      auto *RI = cast<ReturnInst>(Term);
      // Try to determine the return value if we can.  This is mainly here to
      // simplify the writing of unit tests, but also helps to enable IPO by
      // constant folding the return values of callees.
      auto *RetVal = RI->getReturnValue();
      if (!RetVal) break; // handle "ret void"
      if (isa<Constant>(RetVal)) break; // nothing to do
      if (auto *C = getConstantAt(RetVal, RI, LVI)) {
        ++NumReturns;
        RI->replaceUsesOfWith(RetVal, C);
        BBChanged = true;
      }
    }
    }

    FnChanged |= BBChanged;
  }

  return FnChanged;
}
Ejemplo n.º 16
0
Value *llvm::FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy,
                                       bool AtLeastAtomic, BasicBlock *ScanBB,
                                       BasicBlock::iterator &ScanFrom,
                                       unsigned MaxInstsToScan,
                                       AliasAnalysis *AA, bool *IsLoadCSE,
                                       unsigned *NumScanedInst) {
  if (MaxInstsToScan == 0)
    MaxInstsToScan = ~0U;

  const DataLayout &DL = ScanBB->getModule()->getDataLayout();

  // Try to get the store size for the type.
  auto AccessSize = LocationSize::precise(DL.getTypeStoreSize(AccessTy));

  Value *StrippedPtr = Ptr->stripPointerCasts();

  while (ScanFrom != ScanBB->begin()) {
    // We must ignore debug info directives when counting (otherwise they
    // would affect codegen).
    Instruction *Inst = &*--ScanFrom;
    if (isa<DbgInfoIntrinsic>(Inst))
      continue;

    // Restore ScanFrom to expected value in case next test succeeds
    ScanFrom++;

    if (NumScanedInst)
      ++(*NumScanedInst);

    // Don't scan huge blocks.
    if (MaxInstsToScan-- == 0)
      return nullptr;

    --ScanFrom;
    // If this is a load of Ptr, the loaded value is available.
    // (This is true even if the load is volatile or atomic, although
    // those cases are unlikely.)
    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
      if (AreEquivalentAddressValues(
              LI->getPointerOperand()->stripPointerCasts(), StrippedPtr) &&
          CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) {

        // We can value forward from an atomic to a non-atomic, but not the
        // other way around.
        if (LI->isAtomic() < AtLeastAtomic)
          return nullptr;

        if (IsLoadCSE)
            *IsLoadCSE = true;
        return LI;
      }

    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
      Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
      // If this is a store through Ptr, the value is available!
      // (This is true even if the store is volatile or atomic, although
      // those cases are unlikely.)
      if (AreEquivalentAddressValues(StorePtr, StrippedPtr) &&
          CastInst::isBitOrNoopPointerCastable(SI->getValueOperand()->getType(),
                                               AccessTy, DL)) {

        // We can value forward from an atomic to a non-atomic, but not the
        // other way around.
        if (SI->isAtomic() < AtLeastAtomic)
          return nullptr;

        if (IsLoadCSE)
          *IsLoadCSE = false;
        return SI->getOperand(0);
      }

      // If both StrippedPtr and StorePtr reach all the way to an alloca or
      // global and they are different, ignore the store. This is a trivial form
      // of alias analysis that is important for reg2mem'd code.
      if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) &&
          (isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)) &&
          StrippedPtr != StorePtr)
        continue;

      // If we have alias analysis and it says the store won't modify the loaded
      // value, ignore the store.
      if (AA && !isModSet(AA->getModRefInfo(SI, StrippedPtr, AccessSize)))
        continue;

      // Otherwise the store that may or may not alias the pointer, bail out.
      ++ScanFrom;
      return nullptr;
    }

    // If this is some other instruction that may clobber Ptr, bail out.
    if (Inst->mayWriteToMemory()) {
      // If alias analysis claims that it really won't modify the load,
      // ignore it.
      if (AA && !isModSet(AA->getModRefInfo(Inst, StrippedPtr, AccessSize)))
        continue;

      // May modify the pointer, bail out.
      ++ScanFrom;
      return nullptr;
    }
  }

  // Got to the start of the block, we didn't find it, but are done for this
  // block.
  return nullptr;
}
/// tryMergingIntoMemset - When scanning forward over instructions, we look for
/// some other patterns to fold away.  In particular, this looks for stores to
/// neighboring locations of memory.  If it sees enough consecutive ones, it
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
        Value *StartPtr, Value *ByteVal) {
    if (TD == 0) return 0;

    // Okay, so we now have a single store that can be splatable.  Scan to find
    // all subsequent stores of the same value to offset from the same pointer.
    // Join these together into ranges, so we can decide whether contiguous blocks
    // are stored.
    MemsetRanges Ranges(*TD);

    BasicBlock::iterator BI = StartInst;
    for (++BI; !isa<TerminatorInst>(BI); ++BI) {
        if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
            // If the instruction is readnone, ignore it, otherwise bail out.  We
            // don't even allow readonly here because we don't want something like:
            // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
            if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
                break;
            continue;
        }

        if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
            // If this is a store, see if we can merge it in.
            if (!NextStore->isSimple()) break;

            // Check to see if this stored value is of the same byte-splattable value.
            if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
                break;

            // Check to see if this store is to a constant offset from the start ptr.
            int64_t Offset;
            if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
                                 Offset, *TD))
                break;

            Ranges.addStore(Offset, NextStore);
        } else {
            MemSetInst *MSI = cast<MemSetInst>(BI);

            if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
                    !isa<ConstantInt>(MSI->getLength()))
                break;

            // Check to see if this store is to a constant offset from the start ptr.
            int64_t Offset;
            if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
                break;

            Ranges.addMemSet(Offset, MSI);
        }
    }

    // If we have no ranges, then we just had a single store with nothing that
    // could be merged in.  This is a very common case of course.
    if (Ranges.empty())
        return 0;

    // If we had at least one store that could be merged in, add the starting
    // store as well.  We try to avoid this unless there is at least something
    // interesting as a small compile-time optimization.
    Ranges.addInst(0, StartInst);

    // If we create any memsets, we put it right before the first instruction that
    // isn't part of the memset block.  This ensure that the memset is dominated
    // by any addressing instruction needed by the start of the block.
    IRBuilder<> Builder(BI);

    // Now that we have full information about ranges, loop over the ranges and
    // emit memset's for anything big enough to be worthwhile.
    Instruction *AMemSet = 0;
    for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
            I != E; ++I) {
        const MemsetRange &Range = *I;

        if (Range.TheStores.size() == 1) continue;

        // If it is profitable to lower this range to memset, do so now.
        if (!Range.isProfitableToUseMemset(*TD))
            continue;

        // Otherwise, we do want to transform this!  Create a new memset.
        // Get the starting pointer of the block.
        StartPtr = Range.StartPtr;

        // Determine alignment
        unsigned Alignment = Range.Alignment;
        if (Alignment == 0) {
            Type *EltType =
                cast<PointerType>(StartPtr->getType())->getElementType();
            Alignment = TD->getABITypeAlignment(EltType);
        }

        AMemSet =
            Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);

        DEBUG(dbgs() << "Replace stores:\n";
              for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
              dbgs() << *Range.TheStores[i] << '\n';
              dbgs() << "With: " << *AMemSet << '\n');

        if (!Range.TheStores.empty())
            AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());

        // Zap all the stores.
        for (SmallVector<Instruction*, 16>::const_iterator
                SI = Range.TheStores.begin(),
                SE = Range.TheStores.end(); SI != SE; ++SI) {
            MD->removeInstruction(*SI);
            (*SI)->eraseFromParent();
        }
        ++NumMemSetInfer;
    }

    return AMemSet;
}
Ejemplo n.º 18
0
// Sinks \p I from the loop \p L's preheader to its uses. Returns true if
// sinking is successful.
// \p LoopBlockNumber is used to sort the insertion blocks to ensure
// determinism.
static bool sinkInstruction(Loop &L, Instruction &I,
                            const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
                            const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber,
                            LoopInfo &LI, DominatorTree &DT,
                            BlockFrequencyInfo &BFI) {
  // Compute the set of blocks in loop L which contain a use of I.
  SmallPtrSet<BasicBlock *, 2> BBs;
  for (auto &U : I.uses()) {
    Instruction *UI = cast<Instruction>(U.getUser());
    // We cannot sink I to PHI-uses.
    if (dyn_cast<PHINode>(UI))
      return false;
    // We cannot sink I if it has uses outside of the loop.
    if (!L.contains(LI.getLoopFor(UI->getParent())))
      return false;
    BBs.insert(UI->getParent());
  }

  // findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max
  // BBs.size() to avoid expensive computation.
  // FIXME: Handle code size growth for min_size and opt_size.
  if (BBs.size() > MaxNumberOfUseBBsForSinking)
    return false;

  // Find the set of BBs that we should insert a copy of I.
  SmallPtrSet<BasicBlock *, 2> BBsToSinkInto =
      findBBsToSinkInto(L, BBs, ColdLoopBBs, DT, BFI);
  if (BBsToSinkInto.empty())
    return false;

  // Copy the final BBs into a vector and sort them using the total ordering
  // of the loop block numbers as iterating the set doesn't give a useful
  // order. No need to stable sort as the block numbers are a total ordering.
  SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
  SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(),
                             BBsToSinkInto.end());
  llvm::sort(SortedBBsToSinkInto.begin(), SortedBBsToSinkInto.end(),
             [&](BasicBlock *A, BasicBlock *B) {
               return LoopBlockNumber.find(A)->second <
                      LoopBlockNumber.find(B)->second;
             });

  BasicBlock *MoveBB = *SortedBBsToSinkInto.begin();
  // FIXME: Optimize the efficiency for cloned value replacement. The current
  //        implementation is O(SortedBBsToSinkInto.size() * I.num_uses()).
  for (BasicBlock *N : makeArrayRef(SortedBBsToSinkInto).drop_front(1)) {
    assert(LoopBlockNumber.find(N)->second >
               LoopBlockNumber.find(MoveBB)->second &&
           "BBs not sorted!");
    // Clone I and replace its uses.
    Instruction *IC = I.clone();
    IC->setName(I.getName());
    IC->insertBefore(&*N->getFirstInsertionPt());
    // Replaces uses of I with IC in N
    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
      Use &U = *UI++;
      auto *I = cast<Instruction>(U.getUser());
      if (I->getParent() == N)
        U.set(IC);
    }
    // Replaces uses of I with IC in blocks dominated by N
    replaceDominatedUsesWith(&I, IC, DT, N);
    LLVM_DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName()
                      << '\n');
    NumLoopSunkCloned++;
  }
  LLVM_DEBUG(dbgs() << "Sinking " << I << " To: " << MoveBB->getName() << '\n');
  NumLoopSunk++;
  I.moveBefore(&*MoveBB->getFirstInsertionPt());

  return true;
}
Ejemplo n.º 19
0
void WorklessInstrument::CloneInnerLoop(Loop * pLoop, vector<BasicBlock *> & vecAdd, ValueToValueMapTy & VMap, set<BasicBlock *> & setCloned)
{
	Function * pFunction = pLoop->getHeader()->getParent();
	BasicBlock * pPreHeader = vecAdd[0];

	SmallVector<BasicBlock *, 4> ExitBlocks;
	pLoop->getExitBlocks(ExitBlocks);

	set<BasicBlock *> setExitBlocks;

	for(unsigned long i = 0; i < ExitBlocks.size(); i++)
	{
		setExitBlocks.insert(ExitBlocks[i]);
	}

	for(unsigned long i = 0; i < ExitBlocks.size(); i++ )
	{
		VMap[ExitBlocks[i]] = ExitBlocks[i];
	}

	vector<BasicBlock *> ToClone;
	vector<BasicBlock *> BeenCloned;

	
	//clone loop
	ToClone.push_back(pLoop->getHeader());

	while(ToClone.size()>0)
	{
		BasicBlock * pCurrent = ToClone.back();
		ToClone.pop_back();

		WeakVH & BBEntry = VMap[pCurrent];
		if (BBEntry)
		{
			continue;
		}

		BasicBlock * NewBB;
		BBEntry = NewBB = BasicBlock::Create(pCurrent->getContext(), "", pFunction);

		if(pCurrent->hasName())
		{
			NewBB->setName(pCurrent->getName() + ".CPI");
		}

		if(pCurrent->hasAddressTaken())
		{
			errs() << "hasAddressTaken branch\n" ;
			exit(0);
		}

		for(BasicBlock::const_iterator II = pCurrent->begin(); II != pCurrent->end(); ++II )
		{
			Instruction * NewInst = II->clone();
			if(II->hasName())
			{
				NewInst->setName(II->getName() + ".CPI");
			}
			VMap[II] = NewInst;
			NewBB->getInstList().push_back(NewInst);
		}

		const TerminatorInst *TI = pCurrent->getTerminator();
		for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
		{
			ToClone.push_back(TI->getSuccessor(i));
		}

		setCloned.insert(NewBB);
		BeenCloned.push_back(NewBB);
	}

	//remap value used inside loop
	vector<BasicBlock *>::iterator itVecBegin = BeenCloned.begin();
	vector<BasicBlock *>::iterator itVecEnd = BeenCloned.end();

	for(; itVecBegin != itVecEnd; itVecBegin ++)
	{
		for(BasicBlock::iterator II = (*itVecBegin)->begin(); II != (*itVecBegin)->end(); II ++ )
		{
			//II->dump();
			RemapInstruction(II, VMap);
		}
	}

	//add to the else if body
	BasicBlock * pElseBody = vecAdd[1];

	BasicBlock * pClonedHeader = cast<BasicBlock>(VMap[pLoop->getHeader()]);

	BranchInst::Create(pClonedHeader, pElseBody);

	//errs() << pPreHeader->getName() << "\n";
	for(BasicBlock::iterator II = pClonedHeader->begin(); II != pClonedHeader->end(); II ++ )
	{
		if(PHINode * pPHI = dyn_cast<PHINode>(II))
		{
			vector<int> vecToRemoved;
			for (unsigned i = 0, e = pPHI->getNumIncomingValues(); i != e; ++i) 
			{
				if(pPHI->getIncomingBlock(i) == pPreHeader)
				{
					pPHI->setIncomingBlock(i, pElseBody);
				}
			}
		}
	}

	set<BasicBlock *> setProcessedBlock;

	for(unsigned long i = 0; i < ExitBlocks.size(); i++ )
	{
		if(setProcessedBlock.find(ExitBlocks[i]) != setProcessedBlock.end() )
		{
			continue;
		}
		else
		{
			setProcessedBlock.insert(ExitBlocks[i]);
		}

		for(BasicBlock::iterator II = ExitBlocks[i]->begin(); II != ExitBlocks[i]->end(); II ++ )
		{
			if(PHINode * pPHI = dyn_cast<PHINode>(II))
			{
				unsigned numIncomming = pPHI->getNumIncomingValues();
				for(unsigned i = 0; i<numIncomming; i++)
				{
					BasicBlock * incommingBlock = pPHI->getIncomingBlock(i);
					if(VMap.find(incommingBlock) != VMap.end() )
					{
						Value * incommingValue = pPHI->getIncomingValue(i);

						if(VMap.find(incommingValue) != VMap.end() )
						{
							incommingValue = VMap[incommingValue];
						}

						pPHI->addIncoming(incommingValue, cast<BasicBlock>(VMap[incommingBlock]));

					}
				} 

			}
		}
	}
}
Ejemplo n.º 20
0
    Value* ModuloSchedulerDriverPass::copyLoopBodyToHeader(Instruction* inst,
            Instruction* induction, BasicBlock* header, int offset){

        // Holds the body of the interesting loop
        BasicBlock *body = inst->getParent();

        assert(header && "Header is null");
        assert(header->getTerminator() && "Header has no terminator");

        // Maps the old instructions to the new Instructions
        DenseMap<const Value *, Value *>  ValueMap;
        // Do the actual clone
        stringstream iname;
        iname<<"___"<<offset<<"___";
        BasicBlock* newBB = CloneBasicBlock(body, ValueMap, iname.str().c_str());

        // Fixing the dependencies for each of the instructions in the cloned BB
        // They now depend on themselves rather on the old cloned BB.
        for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) {
            for (Instruction::op_iterator ops = (it)->op_begin(); ops != (it)->op_end(); ++ops) {
                if (ValueMap.end() != ValueMap.find(*ops)) {
                    //*ops = ValueMap[*ops];
                    it->replaceUsesOfWith(*ops, ValueMap[*ops]);
                }
            }
        }

        // Fixing the PHI nodes since they are no longer needed
        for (BasicBlock::iterator it = newBB->begin(); it != newBB->end(); ++it) {
            if (PHINode *phi = dyn_cast<PHINode>(it)) {
                // Taking the preheader entryfrom the PHI node

                Value* prevalue = phi->getIncomingValue(phi->getBasicBlockIndex(header));
                assert(prevalue && "no prevalue. Don't know what to do");

                // If we are handling a PHI node which is the induction index ? A[PHI(i,0)] ?
                // If so, turn it into A[i + offset]
                if (ValueMap[induction] == phi) {
                    Instruction *add = subscripts::incrementValue(prevalue, offset);
                    //add->insertBefore(phi); This is the same as next line (compiles on LLVM2.1)
                    phi->getParent()->getInstList().insert(phi, add);
                    phi->replaceAllUsesWith(add);
                }  else {
                    // eliminating the PHI node all together
                    // This is just a regular variable or constant. No need to increment
                    // the index.
                    phi->replaceAllUsesWith(prevalue);
                }
            } 
        }

        // Move all non PHI and non terminator instructions into the header.
        while (!newBB->getFirstNonPHI()->isTerminator()) {
            Instruction* inst = newBB->getFirstNonPHI();
            if (dyn_cast<StoreInst>(inst)) {
                inst->eraseFromParent();
            } else {
                inst->moveBefore(header->getTerminator());
            }
        }
        newBB->dropAllReferences();
        return ValueMap[inst];
    }
Ejemplo n.º 21
0
/// Return true if we can evaluate the specified expression tree if the vector
/// elements were shuffled in a different order.
static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
                                unsigned Depth = 5) {
  // We can always reorder the elements of a constant.
  if (isa<Constant>(V))
    return true;

  // We won't reorder vector arguments. No IPO here.
  Instruction *I = dyn_cast<Instruction>(V);
  if (!I) return false;

  // Two users may expect different orders of the elements. Don't try it.
  if (!I->hasOneUse())
    return false;

  if (Depth == 0) return false;

  switch (I->getOpcode()) {
    case Instruction::Add:
    case Instruction::FAdd:
    case Instruction::Sub:
    case Instruction::FSub:
    case Instruction::Mul:
    case Instruction::FMul:
    case Instruction::UDiv:
    case Instruction::SDiv:
    case Instruction::FDiv:
    case Instruction::URem:
    case Instruction::SRem:
    case Instruction::FRem:
    case Instruction::Shl:
    case Instruction::LShr:
    case Instruction::AShr:
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
    case Instruction::ICmp:
    case Instruction::FCmp:
    case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
    case Instruction::FPToUI:
    case Instruction::FPToSI:
    case Instruction::UIToFP:
    case Instruction::SIToFP:
    case Instruction::FPTrunc:
    case Instruction::FPExt:
    case Instruction::GetElementPtr: {
      for (Value *Operand : I->operands()) {
        if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
          return false;
      }
      return true;
    }
    case Instruction::InsertElement: {
      ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
      if (!CI) return false;
      int ElementNumber = CI->getLimitedValue();

      // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
      // can't put an element into multiple indices.
      bool SeenOnce = false;
      for (int i = 0, e = Mask.size(); i != e; ++i) {
        if (Mask[i] == ElementNumber) {
          if (SeenOnce)
            return false;
          SeenOnce = true;
        }
      }
      return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
    }
  }
  return false;
}
Ejemplo n.º 22
0
bool ObjCARCContract::runOnFunction(Function &F) {
  if (!EnableARCOpts)
    return false;

  // If nothing in the Module uses ARC, don't do anything.
  if (!Run)
    return false;

  Changed = false;
  AA = &getAnalysis<AliasAnalysis>();
  DT = &getAnalysis<DominatorTree>();

  PA.setAA(&getAnalysis<AliasAnalysis>());

  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
  // keyword. Be conservative if the function has variadic arguments.
  // It seems that functions which "return twice" are also unsafe for the
  // "tail" argument, because they are setjmp, which could need to
  // return to an earlier stack state.
  bool TailOkForStoreStrongs = !F.isVarArg() &&
                               !F.callsFunctionThatReturnsTwice();

  // For ObjC library calls which return their argument, replace uses of the
  // argument with uses of the call return value, if it dominates the use. This
  // reduces register pressure.
  SmallPtrSet<Instruction *, 4> DependingInstructions;
  SmallPtrSet<const BasicBlock *, 4> Visited;
  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
    Instruction *Inst = &*I++;

    DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n");

    // Only these library routines return their argument. In particular,
    // objc_retainBlock does not necessarily return its argument.
    InstructionClass Class = GetBasicInstructionClass(Inst);
    switch (Class) {
    case IC_FusedRetainAutorelease:
    case IC_FusedRetainAutoreleaseRV:
      break;
    case IC_Autorelease:
    case IC_AutoreleaseRV:
      if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
        continue;
      break;
    case IC_Retain:
      // Attempt to convert retains to retainrvs if they are next to function
      // calls.
      if (!OptimizeRetainCall(F, Inst))
        break;
      // If we succeed in our optimization, fall through.
      // FALLTHROUGH
    case IC_RetainRV: {
      // If we're compiling for a target which needs a special inline-asm
      // marker to do the retainAutoreleasedReturnValue optimization,
      // insert it now.
      if (!RetainRVMarker)
        break;
      BasicBlock::iterator BBI = Inst;
      BasicBlock *InstParent = Inst->getParent();

      // Step up to see if the call immediately precedes the RetainRV call.
      // If it's an invoke, we have to cross a block boundary. And we have
      // to carefully dodge no-op instructions.
      do {
        if (&*BBI == InstParent->begin()) {
          BasicBlock *Pred = InstParent->getSinglePredecessor();
          if (!Pred)
            goto decline_rv_optimization;
          BBI = Pred->getTerminator();
          break;
        }
        --BBI;
      } while (IsNoopInstruction(BBI));

      if (&*BBI == GetObjCArg(Inst)) {
        DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for "
                        "retainAutoreleasedReturnValue optimization.\n");
        Changed = true;
        InlineAsm *IA =
          InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
                                           /*isVarArg=*/false),
                         RetainRVMarker->getString(),
                         /*Constraints=*/"", /*hasSideEffects=*/true);
        CallInst::Create(IA, "", Inst);
      }
    decline_rv_optimization:
      break;
    }
    case IC_InitWeak: {
      // objc_initWeak(p, null) => *p = null
      CallInst *CI = cast<CallInst>(Inst);
      if (IsNullOrUndef(CI->getArgOperand(1))) {
        Value *Null =
          ConstantPointerNull::get(cast<PointerType>(CI->getType()));
        Changed = true;
        new StoreInst(Null, CI->getArgOperand(0), CI);

        DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n"
                     << "                 New = " << *Null << "\n");

        CI->replaceAllUsesWith(Null);
        CI->eraseFromParent();
      }
      continue;
    }
    case IC_Release:
      ContractRelease(Inst, I);
      continue;
    case IC_User:
      // Be conservative if the function has any alloca instructions.
      // Technically we only care about escaping alloca instructions,
      // but this is sufficient to handle some interesting cases.
      if (isa<AllocaInst>(Inst))
        TailOkForStoreStrongs = false;
      continue;
    case IC_IntrinsicUser:
      // Remove calls to @clang.arc.use(...).
      Inst->eraseFromParent();
      continue;
    default:
      continue;
    }

    DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n");

    // Don't use GetObjCArg because we don't want to look through bitcasts
    // and such; to do the replacement, the argument must have type i8*.
    const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
    for (;;) {
      // If we're compiling bugpointed code, don't get in trouble.
      if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
        break;
      // Look through the uses of the pointer.
      for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
           UI != UE; ) {
        Use &U = UI.getUse();
        unsigned OperandNo = UI.getOperandNo();
        ++UI; // Increment UI now, because we may unlink its element.

        // If the call's return value dominates a use of the call's argument
        // value, rewrite the use to use the return value. We check for
        // reachability here because an unreachable call is considered to
        // trivially dominate itself, which would lead us to rewriting its
        // argument in terms of its return value, which would lead to
        // infinite loops in GetObjCArg.
        if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
          Changed = true;
          Instruction *Replacement = Inst;
          Type *UseTy = U.get()->getType();
          if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
            // For PHI nodes, insert the bitcast in the predecessor block.
            unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
            BasicBlock *BB = PHI->getIncomingBlock(ValNo);
            if (Replacement->getType() != UseTy)
              Replacement = new BitCastInst(Replacement, UseTy, "",
                                            &BB->back());
            // While we're here, rewrite all edges for this PHI, rather
            // than just one use at a time, to minimize the number of
            // bitcasts we emit.
            for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
              if (PHI->getIncomingBlock(i) == BB) {
                // Keep the UI iterator valid.
                if (&PHI->getOperandUse(
                      PHINode::getOperandNumForIncomingValue(i)) ==
                    &UI.getUse())
                  ++UI;
                PHI->setIncomingValue(i, Replacement);
              }
          } else {
            if (Replacement->getType() != UseTy)
              Replacement = new BitCastInst(Replacement, UseTy, "",
                                            cast<Instruction>(U.getUser()));
            U.set(Replacement);
          }
        }
      }

      // If Arg is a no-op casted pointer, strip one level of casts and iterate.
      if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
        Arg = BI->getOperand(0);
      else if (isa<GEPOperator>(Arg) &&
               cast<GEPOperator>(Arg)->hasAllZeroIndices())
        Arg = cast<GEPOperator>(Arg)->getPointerOperand();
      else if (isa<GlobalAlias>(Arg) &&
               !cast<GlobalAlias>(Arg)->mayBeOverridden())
        Arg = cast<GlobalAlias>(Arg)->getAliasee();
      else
        break;
    }
  }

  // If this function has no escaping allocas or suspicious vararg usage,
  // objc_storeStrong calls can be marked with the "tail" keyword.
  if (TailOkForStoreStrongs)
    for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
         E = StoreStrongCalls.end(); I != E; ++I)
      (*I)->setTailCall();
  StoreStrongCalls.clear();

  return Changed;
}
Ejemplo n.º 23
0
Value *
InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
  // Mask.size() does not need to be equal to the number of vector elements.

  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
  if (isa<UndefValue>(V)) {
    return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
                                           Mask.size()));
  }
  if (isa<ConstantAggregateZero>(V)) {
    return ConstantAggregateZero::get(
               VectorType::get(V->getType()->getScalarType(),
                               Mask.size()));
  }
  if (Constant *C = dyn_cast<Constant>(V)) {
    SmallVector<Constant *, 16> MaskValues;
    for (int i = 0, e = Mask.size(); i != e; ++i) {
      if (Mask[i] == -1)
        MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
      else
        MaskValues.push_back(Builder->getInt32(Mask[i]));
    }
    return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
                                          ConstantVector::get(MaskValues));
  }

  Instruction *I = cast<Instruction>(V);
  switch (I->getOpcode()) {
    case Instruction::Add:
    case Instruction::FAdd:
    case Instruction::Sub:
    case Instruction::FSub:
    case Instruction::Mul:
    case Instruction::FMul:
    case Instruction::UDiv:
    case Instruction::SDiv:
    case Instruction::FDiv:
    case Instruction::URem:
    case Instruction::SRem:
    case Instruction::FRem:
    case Instruction::Shl:
    case Instruction::LShr:
    case Instruction::AShr:
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor:
    case Instruction::ICmp:
    case Instruction::FCmp:
    case Instruction::Trunc:
    case Instruction::ZExt:
    case Instruction::SExt:
    case Instruction::FPToUI:
    case Instruction::FPToSI:
    case Instruction::UIToFP:
    case Instruction::SIToFP:
    case Instruction::FPTrunc:
    case Instruction::FPExt:
    case Instruction::Select:
    case Instruction::GetElementPtr: {
      SmallVector<Value*, 8> NewOps;
      bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
        Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
        NewOps.push_back(V);
        NeedsRebuild |= (V != I->getOperand(i));
      }
      if (NeedsRebuild) {
        return buildNew(I, NewOps);
      }
      return I;
    }
    case Instruction::InsertElement: {
      int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();

      // The insertelement was inserting at Element. Figure out which element
      // that becomes after shuffling. The answer is guaranteed to be unique
      // by CanEvaluateShuffled.
      bool Found = false;
      int Index = 0;
      for (int e = Mask.size(); Index != e; ++Index) {
        if (Mask[Index] == Element) {
          Found = true;
          break;
        }
      }

      // If element is not in Mask, no need to handle the operand 1 (element to
      // be inserted). Just evaluate values in operand 0 according to Mask.
      if (!Found)
        return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);

      Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
      return InsertElementInst::Create(V, I->getOperand(1),
                                       Builder->getInt32(Index), "", I);
    }
  }
  llvm_unreachable("failed to reorder elements of vector instruction!");
}
Ejemplo n.º 24
0
// Add the remainder of the reduction-variable chain to the instruction vector
// (the initial PHINode has already been added). If successful, the object is
// marked as valid.
void LoopReroll::SimpleLoopReduction::add(Loop *L) {
  assert(!Valid && "Cannot add to an already-valid chain");

  // The reduction variable must be a chain of single-use instructions
  // (including the PHI), except for the last value (which is used by the PHI
  // and also outside the loop).
  Instruction *C = Instructions.front();

  do {
    C = cast<Instruction>(*C->use_begin());
    if (C->hasOneUse()) {
      if (!C->isBinaryOp())
        return;

      if (!(isa<PHINode>(Instructions.back()) ||
            C->isSameOperationAs(Instructions.back())))
        return;

      Instructions.push_back(C);
    }
  } while (C->hasOneUse());

  if (Instructions.size() < 2 ||
      !C->isSameOperationAs(Instructions.back()) ||
      C->use_begin() == C->use_end())
    return;

  // C is now the (potential) last instruction in the reduction chain.
  for (Value::use_iterator UI = C->use_begin(), UIE = C->use_end();
       UI != UIE; ++UI) {
    // The only in-loop user can be the initial PHI.
    if (L->contains(cast<Instruction>(*UI)))
      if (cast<Instruction>(*UI ) != Instructions.front())
        return;
  }

  Instructions.push_back(C);
  Valid = true;
}
Ejemplo n.º 25
0
bool PPCLoopDataPrefetch::runOnLoop(Loop *L) {
  bool MadeChange = false;

  // Only prefetch in the inner-most loop
  if (!L->empty())
    return MadeChange;

  SmallPtrSet<const Value *, 32> EphValues;
  CodeMetrics::collectEphemeralValues(L, AC, EphValues);

  // Calculate the number of iterations ahead to prefetch
  CodeMetrics Metrics;
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I) {

    // If the loop already has prefetches, then assume that the user knows
    // what he or she is doing and don't add any more.
    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
         J != JE; ++J)
      if (CallInst *CI = dyn_cast<CallInst>(J))
        if (Function *F = CI->getCalledFunction())
          if (F->getIntrinsicID() == Intrinsic::prefetch)
            return MadeChange;

    Metrics.analyzeBasicBlock(*I, *TTI, EphValues);
  }
  unsigned LoopSize = Metrics.NumInsts;
  if (!LoopSize)
    LoopSize = 1;

  unsigned ItersAhead = PrefDist/LoopSize;
  if (!ItersAhead)
    ItersAhead = 1;

  SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads;
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I) {
    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
        J != JE; ++J) {
      Value *PtrValue;
      Instruction *MemI;

      if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
        MemI = LMemI;
        PtrValue = LMemI->getPointerOperand();
      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
        if (!PrefetchWrites) continue;
        MemI = SMemI;
        PtrValue = SMemI->getPointerOperand();
      } else continue;

      unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
      if (PtrAddrSpace)
        continue;

      if (L->isLoopInvariant(PtrValue))
        continue;

      const SCEV *LSCEV = SE->getSCEV(PtrValue);
      const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
      if (!LSCEVAddRec)
        continue;

      // We don't want to double prefetch individual cache lines. If this load
      // is known to be within one cache line of some other load that has
      // already been prefetched, then don't prefetch this one as well.
      bool DupPref = false;
      for (SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>,
             16>::iterator K = PrefLoads.begin(), KE = PrefLoads.end();
           K != KE; ++K) {
        const SCEV *PtrDiff = SE->getMinusSCEV(LSCEVAddRec, K->second);
        if (const SCEVConstant *ConstPtrDiff =
            dyn_cast<SCEVConstant>(PtrDiff)) {
          int64_t PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
          if (PD < (int64_t) CacheLineSize) {
            DupPref = true;
            break;
          }
        }
      }
      if (DupPref)
        continue;

      const SCEV *NextLSCEV = SE->getAddExpr(LSCEVAddRec, SE->getMulExpr(
        SE->getConstant(LSCEVAddRec->getType(), ItersAhead),
        LSCEVAddRec->getStepRecurrence(*SE)));
      if (!isSafeToExpand(NextLSCEV, *SE))
        continue;

      PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));

      Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
      SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
      Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);

      IRBuilder<> Builder(MemI);
      Module *M = (*I)->getParent()->getParent();
      Type *I32 = Type::getInt32Ty((*I)->getContext());
      Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
      Builder.CreateCall(
          PrefetchFunc,
          {PrefPtrValue,
           ConstantInt::get(I32, MemI->mayReadFromMemory() ? 0 : 1),
           ConstantInt::get(I32, 3), ConstantInt::get(I32, 1)});

      MadeChange = true;
    }
  }

  return MadeChange;
}
Ejemplo n.º 26
0
// Reroll the provided loop with respect to the provided induction variable.
// Generally, we're looking for a loop like this:
//
// %iv = phi [ (preheader, ...), (body, %iv.next) ]
// f(%iv)
// %iv.1 = add %iv, 1                <-- a root increment
// f(%iv.1)
// %iv.2 = add %iv, 2                <-- a root increment
// f(%iv.2)
// %iv.scale_m_1 = add %iv, scale-1  <-- a root increment
// f(%iv.scale_m_1)
// ...
// %iv.next = add %iv, scale
// %cmp = icmp(%iv, ...)
// br %cmp, header, exit
//
// Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of
// instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can
// be intermixed with eachother. The restriction imposed by this algorithm is
// that the relative order of the isomorphic instructions in f(%iv), f(%iv.1),
// etc. be the same.
//
// First, we collect the use set of %iv, excluding the other increment roots.
// This gives us f(%iv). Then we iterate over the loop instructions (scale-1)
// times, having collected the use set of f(%iv.(i+1)), during which we:
//   - Ensure that the next unmatched instruction in f(%iv) is isomorphic to
//     the next unmatched instruction in f(%iv.(i+1)).
//   - Ensure that both matched instructions don't have any external users
//     (with the exception of last-in-chain reduction instructions).
//   - Track the (aliasing) write set, and other side effects, of all
//     instructions that belong to future iterations that come before the matched
//     instructions. If the matched instructions read from that write set, then
//     f(%iv) or f(%iv.(i+1)) has some dependency on instructions in
//     f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly,
//     if any of these future instructions had side effects (could not be
//     speculatively executed), and so do the matched instructions, when we
//     cannot reorder those side-effect-producing instructions, and rerolling
//     fails.
//
// Finally, we make sure that all loop instructions are either loop increment
// roots, belong to simple latch code, parts of validated reductions, part of
// f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions
// have been validated), then we reroll the loop.
bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
                        const SCEV *IterCount,
                        ReductionTracker &Reductions) {
  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
  uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
                   getValue()->getZExtValue();
  // The collection of loop increment instructions.
  SmallInstructionVector LoopIncs;
  uint64_t Scale = Inc;

  // The effective induction variable, IV, is normally also the real induction
  // variable. When we're dealing with a loop like:
  //   for (int i = 0; i < 500; ++i)
  //     x[3*i] = ...;
  //     x[3*i+1] = ...;
  //     x[3*i+2] = ...;
  // then the real IV is still i, but the effective IV is (3*i).
  Instruction *RealIV = IV;
  if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs))
    return false;

  assert(Scale <= MaxInc && "Scale is too large");
  assert(Scale > 1 && "Scale must be at least 2");

  // The set of increment instructions for each increment value.
  SmallVector<SmallInstructionVector, 32> Roots(Scale-1);
  SmallInstructionSet AllRoots;
  if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs))
    return false;

  DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
                  *RealIV << "\n");

  // An array of just the possible reductions for this scale factor. When we
  // collect the set of all users of some root instructions, these reduction
  // instructions are treated as 'final' (their uses are not considered).
  // This is important because we don't want the root use set to search down
  // the reduction chain.
  SmallInstructionSet PossibleRedSet;
  SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet;
  Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet,
                             PossibleRedLastSet);

  // We now need to check for equivalence of the use graph of each root with
  // that of the primary induction variable (excluding the roots). Our goal
  // here is not to solve the full graph isomorphism problem, but rather to
  // catch common cases without a lot of work. As a result, we will assume
  // that the relative order of the instructions in each unrolled iteration
  // is the same (although we will not make an assumption about how the
  // different iterations are intermixed). Note that while the order must be
  // the same, the instructions may not be in the same basic block.
  SmallInstructionSet Exclude(AllRoots);
  Exclude.insert(LoopIncs.begin(), LoopIncs.end());

  DenseSet<Instruction *> BaseUseSet;
  collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet);

  DenseSet<Instruction *> AllRootUses;
  std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1);

  bool MatchFailed = false;
  for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) {
    DenseSet<Instruction *> &RootUseSet = RootUseSets[i];
    collectInLoopUserSet(L, Roots[i], SmallInstructionSet(),
                         PossibleRedSet, RootUseSet);

    DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() <<
                    " vs. iteration increment " << (i+1) <<
                    " use set size: " << RootUseSet.size() << "\n");

    if (BaseUseSet.size() != RootUseSet.size()) {
      MatchFailed = true;
      break;
    }

    // In addition to regular aliasing information, we need to look for
    // instructions from later (future) iterations that have side effects
    // preventing us from reordering them past other instructions with side
    // effects.
    bool FutureSideEffects = false;
    AliasSetTracker AST(*AA);

    // The map between instructions in f(%iv.(i+1)) and f(%iv).
    DenseMap<Value *, Value *> BaseMap;

    assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops");
    for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(),
         JE = Header->end(); J1 != JE && !MatchFailed; ++J1) {
      if (cast<Instruction>(J1) == RealIV)
        continue;
      if (cast<Instruction>(J1) == IV)
        continue;
      if (!BaseUseSet.count(J1))
        continue;
      if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs.
        continue;

      while (J2 != JE && (!RootUseSet.count(J2) ||
             std::find(Roots[i].begin(), Roots[i].end(), J2) !=
               Roots[i].end())) {
        // As we iterate through the instructions, instructions that don't
        // belong to previous iterations (or the base case), must belong to
        // future iterations. We want to track the alias set of writes from
        // previous iterations.
        if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) &&
            !AllRootUses.count(J2)) {
          if (J2->mayWriteToMemory())
            AST.add(J2);

          // Note: This is specifically guarded by a check on isa<PHINode>,
          // which while a valid (somewhat arbitrary) micro-optimization, is
          // needed because otherwise isSafeToSpeculativelyExecute returns
          // false on PHI nodes.
          if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL))
            FutureSideEffects = true; 
        }

        ++J2;
      }

      if (!J1->isSameOperationAs(J2)) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << "\n");
        MatchFailed = true;
        break;
      }

      // Make sure that this instruction, which is in the use set of this
      // root instruction, does not also belong to the base set or the set of
      // some previous root instruction.
      if (BaseUseSet.count(J2) || AllRootUses.count(J2)) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << " (prev. case overlap)\n");
        MatchFailed = true;
        break;
      }

      // Make sure that we don't alias with any instruction in the alias set
      // tracker. If we do, then we depend on a future iteration, and we
      // can't reroll.
      if (J2->mayReadFromMemory()) {
        for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end();
             K != KE && !MatchFailed; ++K) {
          if (K->aliasesUnknownInst(J2, *AA)) {
            DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                            " vs. " << *J2 << " (depends on future store)\n");
            MatchFailed = true;
            break;
          }
        }
      }

      // If we've past an instruction from a future iteration that may have
      // side effects, and this instruction might also, then we can't reorder
      // them, and this matching fails. As an exception, we allow the alias
      // set tracker to handle regular (simple) load/store dependencies.
      if (FutureSideEffects &&
            ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
             (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 <<
                        " (side effects prevent reordering)\n");
        MatchFailed = true;
        break;
      }

      // For instructions that are part of a reduction, if the operation is
      // associative, then don't bother matching the operands (because we
      // already know that the instructions are isomorphic, and the order
      // within the iteration does not matter). For non-associative reductions,
      // we do need to match the operands, because we need to reject
      // out-of-order instructions within an iteration!
      // For example (assume floating-point addition), we need to reject this:
      //   x += a[i]; x += b[i];
      //   x += a[i+1]; x += b[i+1];
      //   x += b[i+2]; x += a[i+2];
      bool InReduction = Reductions.isPairInSame(J1, J2);

      if (!(InReduction && J1->isAssociative())) {
        bool Swapped = false, SomeOpMatched = false;;
        for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
          Value *Op2 = J2->getOperand(j);

	  // If this is part of a reduction (and the operation is not
	  // associatve), then we match all operands, but not those that are
	  // part of the reduction.
          if (InReduction)
            if (Instruction *Op2I = dyn_cast<Instruction>(Op2))
              if (Reductions.isPairInSame(J2, Op2I))
                continue;

          DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2);
          if (BMI != BaseMap.end())
            Op2 = BMI->second;
          else if (std::find(Roots[i].begin(), Roots[i].end(),
                             (Instruction*) Op2) != Roots[i].end())
            Op2 = IV;

          if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) {
	    // If we've not already decided to swap the matched operands, and
	    // we've not already matched our first operand (note that we could
	    // have skipped matching the first operand because it is part of a
	    // reduction above), and the instruction is commutative, then try
	    // the swapped match.
            if (!Swapped && J1->isCommutative() && !SomeOpMatched &&
                J1->getOperand(!j) == Op2) {
              Swapped = true;
            } else {
              DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                              " vs. " << *J2 << " (operand " << j << ")\n");
              MatchFailed = true;
              break;
            }
          }

          SomeOpMatched = true;
        }
      }

      if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) ||
          (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) {
        DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
                        " vs. " << *J2 << " (uses outside loop)\n");
        MatchFailed = true;
        break;
      }

      if (!MatchFailed)
        BaseMap.insert(std::pair<Value *, Value *>(J2, J1));

      AllRootUses.insert(J2);
      Reductions.recordPair(J1, J2, i+1);

      ++J2;
    }
  }

  if (MatchFailed)
    return false;

  DEBUG(dbgs() << "LRR: Matched all iteration increments for " <<
                  *RealIV << "\n");

  DenseSet<Instruction *> LoopIncUseSet;
  collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(),
                       SmallInstructionSet(), LoopIncUseSet);
  DEBUG(dbgs() << "LRR: Loop increment set size: " <<
                  LoopIncUseSet.size() << "\n");

  // Make sure that all instructions in the loop have been included in some
  // use set.
  for (BasicBlock::iterator J = Header->begin(), JE = Header->end();
       J != JE; ++J) {
    if (isa<DbgInfoIntrinsic>(J))
      continue;
    if (cast<Instruction>(J) == RealIV)
      continue;
    if (cast<Instruction>(J) == IV)
      continue;
    if (BaseUseSet.count(J) || AllRootUses.count(J) ||
        (LoopIncUseSet.count(J) && (J->isTerminator() ||
                                    isSafeToSpeculativelyExecute(J, DL))))
      continue;

    if (AllRoots.count(J))
      continue;

    if (Reductions.isSelectedPHI(J))
      continue;

    DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV <<
                    " unprocessed instruction found: " << *J << "\n");
    MatchFailed = true;
    break;
  }

  if (MatchFailed)
    return false;

  DEBUG(dbgs() << "LRR: all instructions processed from " <<
                  *RealIV << "\n");

  if (!Reductions.validateSelected())
    return false;

  // At this point, we've validated the rerolling, and we're committed to
  // making changes!

  Reductions.replaceSelected();

  // Remove instructions associated with non-base iterations.
  for (BasicBlock::reverse_iterator J = Header->rbegin();
       J != Header->rend();) {
    if (AllRootUses.count(&*J)) {
      Instruction *D = &*J;
      DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
      D->eraseFromParent();
      continue;
    }

    ++J; 
  }

  // Insert the new induction variable.
  const SCEV *Start = RealIVSCEV->getStart();
  if (Inc == 1)
    Start = SE->getMulExpr(Start,
                           SE->getConstant(Start->getType(), Scale));
  const SCEVAddRecExpr *H =
    cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start,
                           SE->getConstant(RealIVSCEV->getType(), 1),
                           L, SCEV::FlagAnyWrap));
  { // Limit the lifetime of SCEVExpander.
    SCEVExpander Expander(*SE, "reroll");
    Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());

    for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(),
         JE = BaseUseSet.end(); J != JE; ++J)
      (*J)->replaceUsesOfWith(IV, NewIV);

    if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) {
      if (LoopIncUseSet.count(BI)) {
        const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
        if (Inc == 1)
          ICSCEV =
            SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale));
        // Iteration count SCEV minus 1
        const SCEV *ICMinus1SCEV =
          SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));

        Value *ICMinus1; // Iteration count minus 1
        if (isa<SCEVConstant>(ICMinus1SCEV)) {
          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI);
        } else {
          BasicBlock *Preheader = L->getLoopPreheader();
          if (!Preheader)
            Preheader = InsertPreheaderForLoop(L, this);

          ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
                                            Preheader->getTerminator());
        }
 
        Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1,
                                   "exitcond");
        BI->setCondition(Cond);

        if (BI->getSuccessor(1) != Header)
          BI->swapSuccessors();
      }
    }
  }

  SimplifyInstructionsInBlock(Header, DL, TLI);
  DeleteDeadPHIs(Header, TLI);
  ++NumRerolledLoops;
  return true;
}
Ejemplo n.º 27
0
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                               BinaryOperator &I) {
  bool isLeftShift = I.getOpcode() == Instruction::Shl;


  // See if we can propagate this shift into the input, this covers the trivial
  // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
  if (I.getOpcode() != Instruction::AShr &&
      CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
    DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
              " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");

    return ReplaceInstUsesWith(I,
                 GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
  }


  // See if we can simplify any instructions used by the instruction whose sole
  // purpose is to compute bits we don't care about.
  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();

  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
  // a signed shift.
  //
  if (Op1->uge(TypeBits)) {
    if (I.getOpcode() != Instruction::AShr)
      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
    // ashr i32 X, 32 --> ashr i32 X, 31
    I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
    return &I;
  }

  // ((X*C1) << C2) == (X * (C1 << C2))
  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
        return BinaryOperator::CreateMul(BO->getOperand(0),
                                        ConstantExpr::getShl(BOOp, Op1));

  // Try to fold constant and into select arguments.
  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
    if (Instruction *R = FoldOpIntoSelect(I, SI))
      return R;
  if (isa<PHINode>(Op0))
    if (Instruction *NV = FoldOpIntoPhi(I))
      return NV;

  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
    // place.  Don't try to do this transformation in this case.  Also, we
    // require that the input operand is a shift-by-constant so that we have
    // confidence that the shifts will get folded together.  We could do this
    // xform in more cases, but it is unlikely to be profitable.
    if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
        isa<ConstantInt>(TrOp->getOperand(1))) {
      // Okay, we'll do this xform.  Make the shift of shift.
      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
      // (shift2 (shift1 & 0x00FF), c2)
      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());

      // For logical shifts, the truncation has the effect of making the high
      // part of the register be zeros.  Emulate this by inserting an AND to
      // clear the top bits as needed.  This 'and' will usually be zapped by
      // other xforms later if dead.
      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
      unsigned DstSize = TI->getType()->getScalarSizeInBits();
      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));

      // The mask we constructed says what the trunc would do if occurring
      // between the shifts.  We want to know the effect *after* the second
      // shift.  We know that it is a logical shift by a constant, so adjust the
      // mask as appropriate.
      if (I.getOpcode() == Instruction::Shl)
        MaskV <<= Op1->getZExtValue();
      else {
        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
        MaskV = MaskV.lshr(Op1->getZExtValue());
      }

      // shift1 & 0x00FF
      Value *And = Builder->CreateAnd(NSh,
                                      ConstantInt::get(I.getContext(), MaskV),
                                      TI->getName());

      // Return the value truncated to the interesting size.
      return new TruncInst(And, I.getType());
    }
  }

  if (Op0->hasOneUse()) {
    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
      Value *V1, *V2;
      ConstantInt *CC;
      switch (Op0BO->getOpcode()) {
      default: break;
      case Instruction::Add:
      case Instruction::And:
      case Instruction::Or:
      case Instruction::Xor: {
        // These operators commute.
        // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
        if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
            match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
                  m_Specific(Op1)))) {
          Value *YS =         // (Y << C)
            Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
          // (X + (Y << C))
          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
                                          Op0BO->getOperand(1)->getName());
          uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
          return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
                     APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
        }

        // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
        Value *Op0BOOp1 = Op0BO->getOperand(1);
        if (isLeftShift && Op0BOOp1->hasOneUse() &&
            match(Op0BOOp1,
                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
                        m_ConstantInt(CC)))) {
          Value *YS =   // (Y << C)
            Builder->CreateShl(Op0BO->getOperand(0), Op1,
                                         Op0BO->getName());
          // X & (CC << C)
          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                         V1->getName()+".mask");
          return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
        }
      }

      // FALL THROUGH.
      case Instruction::Sub: {
        // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
            match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
                  m_Specific(Op1)))) {
          Value *YS =  // (Y << C)
            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
          // (X + (Y << C))
          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
                                          Op0BO->getOperand(0)->getName());
          uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
          return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
                     APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
        }

        // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
            match(Op0BO->getOperand(0),
                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
                        m_ConstantInt(CC))) && V2 == Op1) {
          Value *YS = // (Y << C)
            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
          // X & (CC << C)
          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                         V1->getName()+".mask");

          return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
        }

        break;
      }
      }


      // If the operand is an bitwise operator with a constant RHS, and the
      // shift is the only use, we can pull it out of the shift.
      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
        bool isValid = true;     // Valid only for And, Or, Xor
        bool highBitSet = false; // Transform if high bit of constant set?

        switch (Op0BO->getOpcode()) {
        default: isValid = false; break;   // Do not perform transform!
        case Instruction::Add:
          isValid = isLeftShift;
          break;
        case Instruction::Or:
        case Instruction::Xor:
          highBitSet = false;
          break;
        case Instruction::And:
          highBitSet = true;
          break;
        }

        // If this is a signed shift right, and the high bit is modified
        // by the logical operation, do not perform the transformation.
        // The highBitSet boolean indicates the value of the high bit of
        // the constant which would cause it to be modified for this
        // operation.
        //
        if (isValid && I.getOpcode() == Instruction::AShr)
          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;

        if (isValid) {
          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);

          Value *NewShift =
            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
          NewShift->takeName(Op0BO);

          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
                                        NewRHS);
        }
      }
    }
  }

  // Find out if this is a shift of a shift by a constant.
  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
  if (ShiftOp && !ShiftOp->isShift())
    ShiftOp = 0;

  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {

    // This is a constant shift of a constant shift. Be careful about hiding
    // shl instructions behind bit masks. They are used to represent multiplies
    // by a constant, and it is important that simple arithmetic expressions
    // are still recognizable by scalar evolution.
    //
    // The transforms applied to shl are very similar to the transforms applied
    // to mul by constant. We can be more aggressive about optimizing right
    // shifts.
    //
    // Combinations of right and left shifts will still be optimized in
    // DAGCombine where scalar evolution no longer applies.

    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
    uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
    if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
    Value *X = ShiftOp->getOperand(0);

    IntegerType *Ty = cast<IntegerType>(I.getType());

    // Check for (X << c1) << c2  and  (X >> c1) >> c2
    if (I.getOpcode() == ShiftOp->getOpcode()) {
      uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
      // If this is oversized composite shift, then unsigned shifts get 0, ashr
      // saturates.
      if (AmtSum >= TypeBits) {
        if (I.getOpcode() != Instruction::AShr)
          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
      }

      return BinaryOperator::Create(I.getOpcode(), X,
                                    ConstantInt::get(Ty, AmtSum));
    }

    if (ShiftAmt1 == ShiftAmt2) {
      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
      if (I.getOpcode() == Instruction::LShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
        return BinaryOperator::CreateAnd(X,
                                        ConstantInt::get(I.getContext(), Mask));
      }
    } else if (ShiftAmt1 < ShiftAmt2) {
      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;

      // (X >>?,exact C1) << C2 --> X << (C2-C1)
      // The inexact version is deferred to DAGCombine so we don't hide shl
      // behind a bit mask.
      if (I.getOpcode() == Instruction::Shl &&
          ShiftOp->getOpcode() != Instruction::Shl &&
          ShiftOp->isExact()) {
        assert(ShiftOp->getOpcode() == Instruction::LShr ||
               ShiftOp->getOpcode() == Instruction::AShr);
        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
        BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
                                                        X, ShiftDiffCst);
        NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
        NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
        return NewShl;
      }

      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
      if (I.getOpcode() == Instruction::LShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
        // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
        if (ShiftOp->hasNoUnsignedWrap()) {
          BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr,
                                                           X, ShiftDiffCst);
          NewLShr->setIsExact(I.isExact());
          return NewLShr;
        }
        Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);

        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
        return BinaryOperator::CreateAnd(Shift,
                                         ConstantInt::get(I.getContext(),Mask));
      }

      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
      if (I.getOpcode() == Instruction::AShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        if (ShiftOp->hasNoSignedWrap()) {
          // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
          BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr,
                                                           X, ShiftDiffCst);
          NewAShr->setIsExact(I.isExact());
          return NewAShr;
        }
      }
    } else {
      assert(ShiftAmt2 < ShiftAmt1);
      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;

      // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
      // The inexact version is deferred to DAGCombine so we don't hide shl
      // behind a bit mask.
      if (I.getOpcode() == Instruction::Shl &&
          ShiftOp->getOpcode() != Instruction::Shl &&
          ShiftOp->isExact()) {
        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
        BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
                                                        X, ShiftDiffCst);
        NewShr->setIsExact(true);
        return NewShr;
      }

      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
      if (I.getOpcode() == Instruction::LShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
        if (ShiftOp->hasNoUnsignedWrap()) {
          // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
                                                          X, ShiftDiffCst);
          NewShl->setHasNoUnsignedWrap(true);
          return NewShl;
        }
        Value *Shift = Builder->CreateShl(X, ShiftDiffCst);

        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
        return BinaryOperator::CreateAnd(Shift,
                                         ConstantInt::get(I.getContext(),Mask));
      }

      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
      if (I.getOpcode() == Instruction::AShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        if (ShiftOp->hasNoSignedWrap()) {
          // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
                                                          X, ShiftDiffCst);
          NewShl->setHasNoSignedWrap(true);
          return NewShl;
        }
      }
    }
  }
  return 0;
}
Ejemplo n.º 28
0
Formula* EncoderPass::makeTraceFormula() {

    MSTimer timer1;
    if(options->printDuration()) {
        timer1.start();
    }
    // Create an empty trace formula
    Formula *formula = new Formula();
    // Prepare the CFG variables
    encoder->prepareControlFlow(targetFun);
    // Pre-process the global variables
    initGlobalVariables();
    // Save the line numbers of the call to assert 
    initAssertCalls();
    
    bool isWeigted = false;
    unsigned oldLine = 0;
    Instruction *lastInstruction = NULL;
    std::vector<ExprPtr> currentConstraits;
    
    // Iterate through the function in Topological Order
    // encode each instructions in SMT constraints
    // @See: eli.thegreenplace.net/2013/09/16/analyzing-function-cfgs-with-llvm/
    ReversePostOrderTraversal<Function*> RPOT(this->targetFun);
    ReversePostOrderTraversal<Function*>::rpo_iterator itb;
    for (itb=RPOT.begin(); itb!=RPOT.end(); ++itb) {
        BasicBlock *bb = *itb;
        // Propagate pointers when we enter in a new basicblock
        ExprPtr e = ctx->propagatePointers(bb);
        if (e) {
            e->setHard();
            formula->add(e);
        }
        // HFTF: Encode bug free blocks as hard
        bool doEncodeBB = true;
        if (options->htfUsed()) {
            if(profile->isBugFreeBlock(bb)) {
                doEncodeBB = false;
            }
        }
        // Iterate through the basicblocks
        for (BasicBlock::iterator iti=bb->begin(), eti=bb->end();
             iti!=eti; ++iti) {
            Instruction *i = iti;
            bool doEncodeInst = doEncodeBB;
            // Check the line number
            unsigned line = 0;
            if (MDNode *N = i->getMetadata("dbg")) {
                DILocation Loc(N); 
                line = Loc.getLineNumber();
                // Instruction related to an assert
                if (ctx->isAssertCall(line)) {
                    isWeigted = false;
                    doEncodeInst = true;
                } else {
                    // Instruction with line number
                    // (not related to an assert)
                    isWeigted = true;
                }
            } else {
                // Instruction with no line number
                isWeigted = false;
            }
            // Hardened Trace Formula
            if (options->htfUsed() && !doEncodeInst) {
                isWeigted = false;
            }
            // Encode the instruction in a SMT formula
            ExprPtr expr = NULL;
            switch (i->getOpcode()) {
                case Instruction::Add:
                case Instruction::FAdd:
                case Instruction::Sub:
                case Instruction::FSub:
                case Instruction::Mul:
                case Instruction::FMul:
                case Instruction::UDiv:
                case Instruction::SDiv:
                case Instruction::FDiv:
                case Instruction::URem:
                case Instruction::SRem:
                case Instruction::FRem:
                case Instruction::And:
                case Instruction::Or:
                case Instruction::Xor:
                case Instruction::Shl:
                case Instruction::LShr:
                case Instruction::AShr:
                    expr = encoder->encode(cast<BinaryOperator>(i));
                    break;
                case Instruction::Select:
                    expr = encoder->encode(cast<SelectInst>(i));
                    break;
                case Instruction::PHI:
                    expr = encoder->encode(cast<PHINode>(i));
                    isWeigted = false;
                    break;
                case Instruction::Br:
                    expr = encoder->encode(cast<BranchInst>(i), loops);
                    isWeigted = false;
                    break;
                case Instruction::Switch:
                    expr = encoder->encode(cast<SwitchInst>(i));
                    break;
                case Instruction::ICmp:
                    expr = encoder->encode(cast<ICmpInst>(i));
                    break;
                case Instruction::Call:
                    expr = encoder->encode(cast<CallInst>(i), preCond, postCond);
                    if (!expr) {
                        // Do not encode sniper_x functions.
                        continue;
                    }
                    isWeigted = false;
                    break;
                case Instruction::Alloca:
                    expr = encoder->encode(cast<AllocaInst>(i));
                    if (!expr) {
                        continue;
                    }
                    isWeigted = false;
                    break;
                case Instruction::Store:
                    expr = encoder->encode(cast<StoreInst>(i));
                    break;
                case Instruction::Load:
                    expr = encoder->encode(cast<LoadInst>(i), postCond);
                    break;
                case Instruction::GetElementPtr:
                    expr = encoder->encode(cast<GetElementPtrInst>(i));
                    isWeigted = false;
                    break;
                case Instruction::SExt:
                    expr = encoder->encode(cast<SExtInst>(i));
                    isWeigted = false;
                    break;
                case Instruction::ZExt:
                    expr = encoder->encode(cast<ZExtInst>(i));
                    isWeigted = false;
                    break;
                case Instruction::Ret:
                    expr = encoder->encode(cast<ReturnInst>(i));
                    isWeigted = false;
                    break;
                case Instruction::Unreachable:
                    // Do not encode.
                    continue;
                case Instruction::PtrToInt: {
                    // Instruction added by SNIPER
                    std::string instName = i->getName().str();
                    std::string prefix("sniper_ptrVal");
                    if (!instName.compare(0, prefix.size(), prefix)) {
                        continue;
                    }
                    // NO BREAK!
                }
                case Instruction::VAArg:
                case Instruction::Invoke:
                case Instruction::Trunc:
                case Instruction::FPTrunc:
                case Instruction::FPExt:
                case Instruction::UIToFP:
                case Instruction::SIToFP:
                case Instruction::FPToUI:
                case Instruction::FPToSI:
                case Instruction::IntToPtr:
                case Instruction::BitCast:
                case Instruction::FCmp:
                case Instruction::ExtractElement:
                case Instruction::InsertElement:
                case Instruction::ShuffleVector:
                case Instruction::ExtractValue:
                case Instruction::InsertValue:
                    i->dump();
                    assert("unsupported LLVM instruction!\n");
                    break;
                default:
                    llvm_unreachable("Illegal opcode!");
            }
            assert(expr && "Expression is null!");
            // Atoi checking
            if (isAtoiFunction(i)) {
                isWeigted = false;
            }
            // Instruction with line number
            if (isWeigted) {
                // Add each instruction separately
                if (options->instructionGranularityLevel()) {
                    expr->setInstruction(i);
                    expr->setSoft();
                    formula->add(expr);
                }
                // Pack and add all instructions from
                // the same line number
                else if (options->lineGranularityLevel()) {
                    assert(line>0 && "Illegal line number!");
                    // New line, Add the collect constraints to the formula
                    if (line!=oldLine && oldLine!=0) {
                        assert(!currentConstraits.empty() && "No constraints!");
                        assert(lastInstruction && "Instruction is null!");
                        ExprPtr e = Expression::mkAnd(currentConstraits);
                        e->setInstruction(lastInstruction);
                        e->setSoft();
                        formula->add(e);
                        currentConstraits.clear();
                    }
                    currentConstraits.push_back(expr);
                    oldLine = line;
                    lastInstruction = i;
                }
                // Block level
                else if (options->blockGranularityLevel()) {
                    currentConstraits.push_back(expr);
                } else {
                    assert("Unknow granularity level!");
                }
            }
            // Instruction with no line number
            else {
                expr->setHard();
                formula->add(expr);
            }
        }
        // End of basic block iteration
        if (options->blockGranularityLevel()) {
            if (!currentConstraits.empty()) {
                ExprPtr e = Expression::mkAnd(currentConstraits);
                e->setInstruction(lastInstruction);
                e->setSoft();
                formula->add(e);
                currentConstraits.clear();
            }
        }
    }
    // Add the remaining soft constraints to the formula
    if (!currentConstraits.empty()) {
        assert(options->lineGranularityLevel()
               && "Some instructions could not be encoded!");
        assert(lastInstruction && "Instruction is null!");
        ExprPtr e = Expression::mkAnd(currentConstraits);
        e->setInstruction(lastInstruction);
        e->setSoft();
        formula->add(e);
        currentConstraits.clear();
    }
    if(options->printDuration()) {
        timer1.stop("Trace Formula Encoding Time");
    }
    return formula;
}
Ejemplo n.º 29
0
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
/// replaced with RepOp.
static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
                                     const DataLayout *TD,
                                     const TargetLibraryInfo *TLI) {
  // Trivial replacement.
  if (V == Op)
    return RepOp;

  Instruction *I = dyn_cast<Instruction>(V);
  if (!I)
    return nullptr;

  // If this is a binary operator, try to simplify it with the replaced op.
  if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
    if (B->getOperand(0) == Op)
      return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
    if (B->getOperand(1) == Op)
      return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
  }

  // Same for CmpInsts.
  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
    if (C->getOperand(0) == Op)
      return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
                             TLI);
    if (C->getOperand(1) == Op)
      return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
                             TLI);
  }

  // TODO: We could hand off more cases to instsimplify here.

  // If all operands are constant after substituting Op for RepOp then we can
  // constant fold the instruction.
  if (Constant *CRepOp = dyn_cast<Constant>(RepOp)) {
    // Build a list of all constant operands.
    SmallVector<Constant*, 8> ConstOps;
    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
      if (I->getOperand(i) == Op)
        ConstOps.push_back(CRepOp);
      else if (Constant *COp = dyn_cast<Constant>(I->getOperand(i)))
        ConstOps.push_back(COp);
      else
        break;
    }

    // All operands were constants, fold it.
    if (ConstOps.size() == I->getNumOperands()) {
      if (CmpInst *C = dyn_cast<CmpInst>(I))
        return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
                                               ConstOps[1], TD, TLI);

      if (LoadInst *LI = dyn_cast<LoadInst>(I))
        if (!LI->isVolatile())
          return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);

      return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
                                      ConstOps, TD, TLI);
    }
  }

  return nullptr;
}
Ejemplo n.º 30
0
static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
                                                  const SCCNodeSet &SCCNodes) {
  FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
  if (MRB == FMRB_DoesNotAccessMemory)
    // Already perfect!
    return MAK_ReadNone;

  // Definitions with weak linkage may be overridden at linktime with
  // something that writes memory, so treat them like declarations.
  if (F.isDeclaration() || F.mayBeOverridden()) {
    if (AliasAnalysis::onlyReadsMemory(MRB))
      return MAK_ReadOnly;

    // Conservatively assume it writes to memory.
    return MAK_MayWrite;
  }

  // Scan the function body for instructions that may read or write memory.
  bool ReadsMemory = false;
  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
    Instruction *I = &*II;

    // Some instructions can be ignored even if they read or write memory.
    // Detect these now, skipping to the next instruction if one is found.
    CallSite CS(cast<Value>(I));
    if (CS) {
      // Ignore calls to functions in the same SCC, as long as the call sites
      // don't have operand bundles.  Calls with operand bundles are allowed to
      // have memory effects not described by the memory effects of the call
      // target.
      if (!CS.hasOperandBundles() && CS.getCalledFunction() &&
          SCCNodes.count(CS.getCalledFunction()))
        continue;
      FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);

      // If the call doesn't access memory, we're done.
      if (!(MRB & MRI_ModRef))
        continue;

      if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
        // The call could access any memory. If that includes writes, give up.
        if (MRB & MRI_Mod)
          return MAK_MayWrite;
        // If it reads, note it.
        if (MRB & MRI_Ref)
          ReadsMemory = true;
        continue;
      }

      // Check whether all pointer arguments point to local memory, and
      // ignore calls that only access local memory.
      for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
           CI != CE; ++CI) {
        Value *Arg = *CI;
        if (!Arg->getType()->isPtrOrPtrVectorTy())
          continue;

        AAMDNodes AAInfo;
        I->getAAMetadata(AAInfo);
        MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);

        // Skip accesses to local or constant memory as they don't impact the
        // externally visible mod/ref behavior.
        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
          continue;

        if (MRB & MRI_Mod)
          // Writes non-local memory.  Give up.
          return MAK_MayWrite;
        if (MRB & MRI_Ref)
          // Ok, it reads non-local memory.
          ReadsMemory = true;
      }
      continue;
    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      // Ignore non-volatile loads from local memory. (Atomic is okay here.)
      if (!LI->isVolatile()) {
        MemoryLocation Loc = MemoryLocation::get(LI);
        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
          continue;
      }
    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
      // Ignore non-volatile stores to local memory. (Atomic is okay here.)
      if (!SI->isVolatile()) {
        MemoryLocation Loc = MemoryLocation::get(SI);
        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
          continue;
      }
    } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
      // Ignore vaargs on local memory.
      MemoryLocation Loc = MemoryLocation::get(VI);
      if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
        continue;
    }

    // Any remaining instructions need to be taken seriously!  Check if they
    // read or write memory.
    if (I->mayWriteToMemory())
      // Writes memory.  Just give up.
      return MAK_MayWrite;

    // If this instruction may read memory, remember that.
    ReadsMemory |= I->mayReadFromMemory();
  }

  return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
}