bool ConstantInsertExtractElementIndex::runOnBasicBlock(BasicBlock &BB) {
  bool Changed = false;
  if (!DL)
    DL = &getAnalysis<DataLayoutPass>().getDataLayout();
  Instructions OutOfRangeConstantIndices;
  Instructions NonConstantVectorIndices;

  findNonConstantInsertExtractElements(BB, OutOfRangeConstantIndices,
                                       NonConstantVectorIndices);
  if (!OutOfRangeConstantIndices.empty()) {
    Changed = true;
    fixOutOfRangeConstantIndices(BB, OutOfRangeConstantIndices);
  }
  if (!NonConstantVectorIndices.empty()) {
    Changed = true;
    fixNonConstantVectorIndices(BB, NonConstantVectorIndices);
  }
  return Changed;
}
Beispiel #2
0
void constructBlocks(Blocks &blocks, Instructions &instructions) {
	/* Create the first block containing the very first instruction in this script.
	 * Then follow the complete code flow from this instruction onwards. */

	assert(blocks.empty());
	if (instructions.empty())
		return;

	blocks.push_back(Block(instructions.front().address));
	constructBlocks(blocks, blocks.back(), instructions.front());
}
void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& AsyncCalls) {
  assert(!AsyncCalls.empty());

  // Pass 0
  // collect all the return instructions from the original function
  // will use later
  std::vector<ReturnInst*> OrigReturns;
  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
    if (ReturnInst *RI = dyn_cast<ReturnInst>(&*I)) {
      OrigReturns.push_back(RI);
    }
  }

  // Pass 1
  // Scan each async call and make the basic structure:
  // All these will be cloned into the callback functions
  // - allocate the async context before calling an async function
  // - check async right after calling an async function, save context & return if async, continue if not
  // - retrieve the async return value and free the async context if the called function turns out to be sync
  std::vector<AsyncCallEntry> AsyncCallEntries;
  AsyncCallEntries.reserve(AsyncCalls.size());
  for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end(); I != E; ++I) {
    // prepare blocks
    Instruction *CurAsyncCall = *I;

    // The block containing the async call
    BasicBlock *CurBlock = CurAsyncCall->getParent();
    // The block should run after the async call
    BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode());
    // The block where we store the context and return
    BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(), "SaveAsyncCtx", &F, AfterCallBlock);
    // return a dummy value at the end, to make the block valid
    new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock);

    // allocate the context before making the call
    // we don't know the size yet, will fix it later
    // we cannot insert the instruction later because,
    // we need to make sure that all the instructions and blocks are fixed before we can generate DT and find context variables
    // In CallHandler.h `sp` will be put as the second parameter
    // such that we can take a note of the original sp 
    CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Constant::getNullValue(I32), "AsyncCtx", CurAsyncCall);

    // Right after the call
    // check async and return if so
    // TODO: we can define truly async functions and partial async functions
    {
      // remove old terminator, which came from SplitBlock
      CurBlock->getTerminator()->eraseFromParent();
      // go to SaveAsyncCtxBlock if the previous call is async
      // otherwise just continue to AfterCallBlock
      CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", CurBlock);
      BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock);
    }

    // take a note of this async call
    AsyncCallEntry CurAsyncCallEntry;
    CurAsyncCallEntry.AsyncCallInst = CurAsyncCall;
    CurAsyncCallEntry.AfterCallBlock = AfterCallBlock;
    CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst;
    CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock;
    // create an empty function for the callback, which will be constructed later
    CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.getLinkage(), F.getName() + "__async_cb", TheModule);
    AsyncCallEntries.push_back(CurAsyncCallEntry);
  }


  // Pass 2
  // analyze the context variables and construct SaveAsyncCtxBlock for each async call
  // also calculate the size of the context and allocate the async context accordingly
  for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
    AsyncCallEntry & CurEntry = *EI;

    // Collect everything to be saved
    FindContextVariables(CurEntry);

    // Pack the variables as a struct
    {
      // TODO: sort them from large memeber to small ones, in order to make the struct compact even when aligned
      SmallVector<Type*, 8> Types;
      Types.push_back(CallbackFunctionType->getPointerTo());
      for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntry.ContextVariables.end(); VI != VE; ++VI) {
        Types.push_back((*VI)->getType());
      }
      CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Types);
    }

    // fix the size of allocation
    CurEntry.AllocAsyncCtxInst->setOperand(0, 
        ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType)));

    // construct SaveAsyncCtxBlock
    {
      // fill in SaveAsyncCtxBlock
      // temporarily remove the terminator for convenience
      CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent();
      assert(CurEntry.SaveAsyncCtxBlock->empty());

      Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo();
      BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, AsyncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock);
      SmallVector<Value*, 2> Indices;
      // store the callback
      {
        Indices.push_back(ConstantInt::get(I32, 0));
        Indices.push_back(ConstantInt::get(I32, 0));
        GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock);
        new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtxBlock);
      }
      // store the context variables
      for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
        Indices.clear();
        Indices.push_back(ConstantInt::get(I32, 0));
        Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is the callback function
        GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock);
        new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveAsyncCtxBlock);
      }
      // to exit the block, we want to return without unwinding the stack frame
      CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock);
      ReturnInst::Create(TheModule->getContext(), 
          (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType())),
          CurEntry.SaveAsyncCtxBlock);
    }
  }

  // Pass 3
  // now all the SaveAsyncCtxBlock's have been constructed
  // we can clone F and construct callback functions 
  // we could not construct the callbacks in Pass 2 because we need _all_ those SaveAsyncCtxBlock's appear in _each_ callback
  for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
    AsyncCallEntry & CurEntry = *EI;

    Function *CurCallbackFunc = CurEntry.CallbackFunc;
    ValueToValueMapTy VMap;

    // Add the entry block
    // load variables from the context
    // also update VMap for CloneFunction
    BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncCallbackEntry", CurCallbackFunc);
    std::vector<LoadInst *> LoadedAsyncVars;
    {
      Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo();
      BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(), AsyncCtxAddrTy, "AsyncCtx", EntryBlock);
      SmallVector<Value*, 2> Indices;
      for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
        Indices.clear();
        Indices.push_back(ConstantInt::get(I32, 0));
        Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of AsyncCtx is the callback function
        GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", EntryBlock);
        LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock));
        // we want the argument to be replaced by the loaded value
        if (isa<Argument>(CurEntry.ContextVariables[i]))
          VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back();
      }
    }

    // we don't need any argument, just leave dummy entries there to cheat CloneFunctionInto
    for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) {
      if (VMap.count(AI) == 0)
        VMap[AI] = Constant::getNullValue(AI->getType());
    }

    // Clone the function
    {
      SmallVector<ReturnInst*, 8> Returns;
      CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns);
      
      // return type of the callback functions is always void
      // need to fix the return type
      if (!F.getReturnType()->isVoidTy()) {
        // for those return instructions that are from the original function
        // it means we are 'truly' leaving this function
        // need to store the return value right before ruturn
        for (size_t i = 0; i < OrigReturns.size(); ++i) {
          ReturnInst *RI = cast<ReturnInst>(VMap[OrigReturns[i]]);
          // Need to store the return value into the global area
          CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", RI);
          BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnType()->getPointerTo(), "AsyncRetValAddr", RI);
          new StoreInst(RI->getOperand(0), RetValAddr, RI);
        }
        // we want to unwind the stack back to where it was before the original function as called
        // but we don't actually need to do this here
        // at this point it must be true that no callback is pended
        // so the scheduler will correct the stack pointer and pop the frame
        // here we just fix the return type
        for (size_t i = 0; i < Returns.size(); ++i) {
          ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getContext()));
        }
      }
    }

    // the callback function does not have any return value
    // so clear all the attributes for return
    {
      AttributeSet Attrs = CurCallbackFunc->getAttributes();
      CurCallbackFunc->setAttributes(
        Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnIndex, Attrs.getRetAttributes())
      );
    }

    // in the callback function, we never allocate a new async frame
    // instead we reuse the existing one
    for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
      Instruction *I = cast<Instruction>(VMap[EI->AllocAsyncCtxInst]);
      ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOperand(0), "ReallocAsyncCtx"));
    }

    // mapped entry point & async call
    BasicBlock *ResumeBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]);
    Instruction *MappedAsyncCall = cast<Instruction>(VMap[CurEntry.AsyncCallInst]);
   
    // To save space, for each async call in the callback function, we just ignore the sync case, and leave it to the scheduler
    // TODO need an option for this
    {
      for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
        AsyncCallEntry & CurEntry = *EI;
        Instruction *MappedAsyncCallInst = cast<Instruction>(VMap[CurEntry.AsyncCallInst]);
        BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent();
        BasicBlock *MappedAfterCallBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]);

        // for the sync case of the call, go to NewBlock (instead of MappedAfterCallBlock)
        BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", CurCallbackFunc, MappedAfterCallBlock);
        MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock);
        // store the return value
        if (!MappedAsyncCallInst->use_empty()) {
          CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", NewBlock);
          BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCallInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock);
          new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock);
        }
        // tell the scheduler that we want to keep the current async stack frame
        CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock);
        // finally we go to the SaveAsyncCtxBlock, to register the callbac, save the local variables and leave
        BasicBlock *MappedSaveAsyncCtxBlock = cast<BasicBlock>(VMap[CurEntry.SaveAsyncCtxBlock]);
        BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock);
      }
    }

    std::vector<AllocaInst*> ToPromote;
    // applying loaded variables in the entry block
    {
      BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock);
      for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
        Value *OrigVar = CurEntry.ContextVariables[i];
        if (isa<Argument>(OrigVar)) continue; // already processed
        Value *CurVar = VMap[OrigVar];
        assert(CurVar != MappedAsyncCall);
        if (Instruction *Inst = dyn_cast<Instruction>(CurVar)) {
          if (ReachableBlocks.count(Inst->getParent())) {
            // Inst could be either defined or loaded from the async context
            // Do the dirty works in memory
            // TODO: might need to check the safety first
            // TODO: can we create phi directly?
            AllocaInst *Addr = DemoteRegToStack(*Inst, false);
            new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock);
            ToPromote.push_back(Addr);
          } else {
            // The parent block is not reachable, which means there is no confliction
            // it's safe to replace Inst with the loaded value
            assert(Inst != LoadedAsyncVars[i]); // this should only happen when OrigVar is an Argument
            Inst->replaceAllUsesWith(LoadedAsyncVars[i]); 
          }
        }
      }
    }

    // resolve the return value of the previous async function
    // it could be the value just loaded from the global area
    // or directly returned by the function (in its sync case)
    if (!CurEntry.AsyncCallInst->use_empty()) {
      // load the async return value
      CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", EntryBlock);
      BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock);
      LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock);

      AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false);
      new StoreInst(RetVal, Addr, EntryBlock);
      ToPromote.push_back(Addr);
    }

    // TODO remove unreachable blocks before creating phi
   
    // We go right to ResumeBlock from the EntryBlock
    BranchInst::Create(ResumeBlock, EntryBlock);
   
    /*
     * Creating phi's
     * Normal stack frames and async stack frames are interleaving with each other.
     * In a callback function, if we call an async function, we might need to realloc the async ctx.
     * at this point we don't want anything stored after the ctx, 
     * such that we can free and extend the ctx by simply update STACKTOP.
     * Therefore we don't want any alloca's in callback functions.
     *
     */
    if (!ToPromote.empty()) {
      DominatorTreeWrapperPass DTW;
      DTW.runOnFunction(*CurCallbackFunc);
      PromoteMemToReg(ToPromote, DTW.getDomTree());
    }

    removeUnreachableBlocks(*CurCallbackFunc);
  }

  // Pass 4
  // Here are modifications to the original function, which we won't want to be cloned into the callback functions
  for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
    AsyncCallEntry & CurEntry = *EI;
    // remove the frame if no async functinon has been called
    CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEntry.AfterCallBlock->getFirstNonPHI());
  }
}
// Input:
// - SExtInsts contains all the sext instructions that are used directly in
//   GetElementPtrInst, i.e., access to memory.
// Algorithm:
// - For each sext operation in SExtInsts:
//   Let var be the operand of sext.
//   while it is profitable (see shouldGetThrough), legal, and safe
//   (see canGetThrough) to move sext through var's definition:
//   * promote the type of var's definition.
//   * fold var into sext uses.
//   * move sext above var's definition.
//   * update sext operand to use the operand of var that should be sign
//     extended (by construction there is only one).
//
//   E.g.,
//   a = ... i32 c, 3
//   b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a'
//   ...
//   = b
// => Yes, update the code
//   b = sext i32 c to i64
//   a = ... i64 b, 3
//   ...
//   = a
// Iterate on 'c'.
bool
AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
  DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");

  bool LocalChange = false;
  SetOfInstructions ToRemove;
  ValueToInsts ValToSExtendedUses;
  while (!SExtInsts.empty()) {
    // Get through simple chain.
    Instruction *SExt = SExtInsts.pop_back_val();

    DEBUG(dbgs() << "Consider:\n" << *SExt << '\n');

    // If this SExt has already been merged continue.
    if (SExt->use_empty() && ToRemove.count(SExt)) {
      DEBUG(dbgs() << "No uses => marked as delete\n");
      continue;
    }

    // Now try to get through the chain of definitions.
    while (auto *Inst = dyn_cast<Instruction>(SExt->getOperand(0))) {
      DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
      if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
        // We cannot get through something that is not an Instruction
        // or not safe to SExt.
        DEBUG(dbgs() << "Cannot get through\n");
        break;
      }

      LocalChange = true;
      // If this is a sign extend, it becomes useless.
      if (isa<SExtInst>(Inst) || isa<TruncInst>(Inst)) {
        DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n");
        // We cannot use replaceAllUsesWith here because we may trigger some
        // assertion on the type as all involved sext operation may have not
        // been moved yet.
        while (!Inst->use_empty()) {
          Use &U = *Inst->use_begin();
          Instruction *User = dyn_cast<Instruction>(U.getUser());
          assert(User && "User of sext is not an Instruction!");
          User->setOperand(U.getOperandNo(), SExt);
        }
        ToRemove.insert(Inst);
        SExt->setOperand(0, Inst->getOperand(0));
        SExt->moveBefore(Inst);
        continue;
      }

      // Get through the Instruction:
      // 1. Update its type.
      // 2. Replace the uses of SExt by Inst.
      // 3. Sign extend each operand that needs to be sign extended.

      // Step #1.
      Inst->mutateType(SExt->getType());
      // Step #2.
      SExt->replaceAllUsesWith(Inst);
      // Step #3.
      Instruction *SExtForOpnd = SExt;

      DEBUG(dbgs() << "Propagate SExt to operands\n");
      for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx;
           ++OpIdx) {
        DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n');
        if (Inst->getOperand(OpIdx)->getType() == SExt->getType() ||
            !shouldSExtOperand(Inst, OpIdx)) {
          DEBUG(dbgs() << "No need to propagate\n");
          continue;
        }
        // Check if we can statically sign extend the operand.
        Value *Opnd = Inst->getOperand(OpIdx);
        if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
          DEBUG(dbgs() << "Statically sign extend\n");
          Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(),
                                                         Cst->getSExtValue()));
          continue;
        }
        // UndefValue are typed, so we have to statically sign extend them.
        if (isa<UndefValue>(Opnd)) {
          DEBUG(dbgs() << "Statically sign extend\n");
          Inst->setOperand(OpIdx, UndefValue::get(SExt->getType()));
          continue;
        }

        // Otherwise we have to explicity sign extend it.
        assert(SExtForOpnd &&
               "Only one operand should have been sign extended");

        SExtForOpnd->setOperand(0, Opnd);

        DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n");
        // Move the sign extension before the insertion point.
        SExtForOpnd->moveBefore(Inst);
        Inst->setOperand(OpIdx, SExtForOpnd);
        // If more sext are required, new instructions will have to be created.
        SExtForOpnd = nullptr;
      }
      if (SExtForOpnd == SExt) {
        DEBUG(dbgs() << "Sign extension is useless now\n");
        ToRemove.insert(SExt);
        break;
      }
    }

    // If the use is already of the right type, connect its uses to its argument
    // and delete it.
    // This can happen for an Instruction all uses of which are sign extended.
    if (!ToRemove.count(SExt) &&
        SExt->getType() == SExt->getOperand(0)->getType()) {
      DEBUG(dbgs() << "Sign extension is useless, attach its use to "
                      "its argument\n");
      SExt->replaceAllUsesWith(SExt->getOperand(0));
      ToRemove.insert(SExt);
    } else
      ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt);
  }

  if (EnableMerge)
    mergeSExts(ValToSExtendedUses, ToRemove);

  // Remove all instructions marked as ToRemove.
  for (Instruction *I: ToRemove)
    I->eraseFromParent();
  return LocalChange;
}