void ConstantInsertExtractElementIndex::fixOutOfRangeConstantIndices(
    BasicBlock &BB, const Instructions &Instrs) const {
  for (Instructions::const_iterator IB = Instrs.begin(), IE = Instrs.end();
       IB != IE; ++IB) {
    Instruction *I = *IB;
    const APInt &Idx =
        cast<ConstantInt>(getInsertExtractElementIdx(I))->getValue();
    APInt NumElements = APInt(Idx.getBitWidth(), vectorNumElements(I));
    APInt NewIdx = Idx.urem(NumElements);
    setInsertExtractElementIdx(I, ConstantInt::get(M->getContext(), NewIdx));
  }
}
void ConstantInsertExtractElementIndex::fixNonConstantVectorIndices(
    BasicBlock &BB, const Instructions &Instrs) const {
  for (Instructions::const_iterator IB = Instrs.begin(), IE = Instrs.end();
       IB != IE; ++IB) {
    Instruction *I = *IB;
    Value *Vec = I->getOperand(0);
    Value *Idx = getInsertExtractElementIdx(I);
    VectorType *VecTy = cast<VectorType>(Vec->getType());
    Type *ElemTy = VecTy->getElementType();
    unsigned ElemAlign = DL->getPrefTypeAlignment(ElemTy);
    unsigned VecAlign = std::max(ElemAlign, DL->getPrefTypeAlignment(VecTy));

    IRBuilder<> IRB(I);
    AllocaInst *Alloca = IRB.CreateAlloca(
        ElemTy, ConstantInt::get(Type::getInt32Ty(M->getContext()),
                                 vectorNumElements(I)));
    Alloca->setAlignment(VecAlign);
    Value *AllocaAsVec = IRB.CreateBitCast(Alloca, VecTy->getPointerTo());
    IRB.CreateAlignedStore(Vec, AllocaAsVec, Alloca->getAlignment());
    Value *GEP = IRB.CreateGEP(Alloca, Idx);

    Value *Res;
    switch (I->getOpcode()) {
    default:
      llvm_unreachable("expected InsertElement or ExtractElement");
    case Instruction::InsertElement:
      IRB.CreateAlignedStore(I->getOperand(1), GEP, ElemAlign);
      Res = IRB.CreateAlignedLoad(AllocaAsVec, Alloca->getAlignment());
      break;
    case Instruction::ExtractElement:
      Res = IRB.CreateAlignedLoad(GEP, ElemAlign);
      break;
    }

    I->replaceAllUsesWith(Res);
    I->eraseFromParent();
  }
}
Esempio n. 3
0
void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& AsyncCalls) {
  assert(!AsyncCalls.empty());

  // Pass 0
  // collect all the return instructions from the original function
  // will use later
  std::vector<ReturnInst*> OrigReturns;
  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
    if (ReturnInst *RI = dyn_cast<ReturnInst>(&*I)) {
      OrigReturns.push_back(RI);
    }
  }

  // Pass 1
  // Scan each async call and make the basic structure:
  // All these will be cloned into the callback functions
  // - allocate the async context before calling an async function
  // - check async right after calling an async function, save context & return if async, continue if not
  // - retrieve the async return value and free the async context if the called function turns out to be sync
  std::vector<AsyncCallEntry> AsyncCallEntries;
  AsyncCallEntries.reserve(AsyncCalls.size());
  for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end(); I != E; ++I) {
    // prepare blocks
    Instruction *CurAsyncCall = *I;

    // The block containing the async call
    BasicBlock *CurBlock = CurAsyncCall->getParent();
    // The block should run after the async call
    BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode());
    // The block where we store the context and return
    BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(), "SaveAsyncCtx", &F, AfterCallBlock);
    // return a dummy value at the end, to make the block valid
    new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock);

    // allocate the context before making the call
    // we don't know the size yet, will fix it later
    // we cannot insert the instruction later because,
    // we need to make sure that all the instructions and blocks are fixed before we can generate DT and find context variables
    // In CallHandler.h `sp` will be put as the second parameter
    // such that we can take a note of the original sp 
    CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Constant::getNullValue(I32), "AsyncCtx", CurAsyncCall);

    // Right after the call
    // check async and return if so
    // TODO: we can define truly async functions and partial async functions
    {
      // remove old terminator, which came from SplitBlock
      CurBlock->getTerminator()->eraseFromParent();
      // go to SaveAsyncCtxBlock if the previous call is async
      // otherwise just continue to AfterCallBlock
      CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", CurBlock);
      BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock);
    }

    // take a note of this async call
    AsyncCallEntry CurAsyncCallEntry;
    CurAsyncCallEntry.AsyncCallInst = CurAsyncCall;
    CurAsyncCallEntry.AfterCallBlock = AfterCallBlock;
    CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst;
    CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock;
    // create an empty function for the callback, which will be constructed later
    CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.getLinkage(), F.getName() + "__async_cb", TheModule);
    AsyncCallEntries.push_back(CurAsyncCallEntry);
  }


  // Pass 2
  // analyze the context variables and construct SaveAsyncCtxBlock for each async call
  // also calculate the size of the context and allocate the async context accordingly
  for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
    AsyncCallEntry & CurEntry = *EI;

    // Collect everything to be saved
    FindContextVariables(CurEntry);

    // Pack the variables as a struct
    {
      // TODO: sort them from large memeber to small ones, in order to make the struct compact even when aligned
      SmallVector<Type*, 8> Types;
      Types.push_back(CallbackFunctionType->getPointerTo());
      for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntry.ContextVariables.end(); VI != VE; ++VI) {
        Types.push_back((*VI)->getType());
      }
      CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Types);
    }

    // fix the size of allocation
    CurEntry.AllocAsyncCtxInst->setOperand(0, 
        ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType)));

    // construct SaveAsyncCtxBlock
    {
      // fill in SaveAsyncCtxBlock
      // temporarily remove the terminator for convenience
      CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent();
      assert(CurEntry.SaveAsyncCtxBlock->empty());

      Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo();
      BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, AsyncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock);
      SmallVector<Value*, 2> Indices;
      // store the callback
      {
        Indices.push_back(ConstantInt::get(I32, 0));
        Indices.push_back(ConstantInt::get(I32, 0));
        GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock);
        new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtxBlock);
      }
      // store the context variables
      for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
        Indices.clear();
        Indices.push_back(ConstantInt::get(I32, 0));
        Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is the callback function
        GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock);
        new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveAsyncCtxBlock);
      }
      // to exit the block, we want to return without unwinding the stack frame
      CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock);
      ReturnInst::Create(TheModule->getContext(), 
          (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType())),
          CurEntry.SaveAsyncCtxBlock);
    }
  }

  // Pass 3
  // now all the SaveAsyncCtxBlock's have been constructed
  // we can clone F and construct callback functions 
  // we could not construct the callbacks in Pass 2 because we need _all_ those SaveAsyncCtxBlock's appear in _each_ callback
  for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
    AsyncCallEntry & CurEntry = *EI;

    Function *CurCallbackFunc = CurEntry.CallbackFunc;
    ValueToValueMapTy VMap;

    // Add the entry block
    // load variables from the context
    // also update VMap for CloneFunction
    BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncCallbackEntry", CurCallbackFunc);
    std::vector<LoadInst *> LoadedAsyncVars;
    {
      Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo();
      BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(), AsyncCtxAddrTy, "AsyncCtx", EntryBlock);
      SmallVector<Value*, 2> Indices;
      for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
        Indices.clear();
        Indices.push_back(ConstantInt::get(I32, 0));
        Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of AsyncCtx is the callback function
        GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", EntryBlock);
        LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock));
        // we want the argument to be replaced by the loaded value
        if (isa<Argument>(CurEntry.ContextVariables[i]))
          VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back();
      }
    }

    // we don't need any argument, just leave dummy entries there to cheat CloneFunctionInto
    for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) {
      if (VMap.count(AI) == 0)
        VMap[AI] = Constant::getNullValue(AI->getType());
    }

    // Clone the function
    {
      SmallVector<ReturnInst*, 8> Returns;
      CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns);
      
      // return type of the callback functions is always void
      // need to fix the return type
      if (!F.getReturnType()->isVoidTy()) {
        // for those return instructions that are from the original function
        // it means we are 'truly' leaving this function
        // need to store the return value right before ruturn
        for (size_t i = 0; i < OrigReturns.size(); ++i) {
          ReturnInst *RI = cast<ReturnInst>(VMap[OrigReturns[i]]);
          // Need to store the return value into the global area
          CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", RI);
          BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnType()->getPointerTo(), "AsyncRetValAddr", RI);
          new StoreInst(RI->getOperand(0), RetValAddr, RI);
        }
        // we want to unwind the stack back to where it was before the original function as called
        // but we don't actually need to do this here
        // at this point it must be true that no callback is pended
        // so the scheduler will correct the stack pointer and pop the frame
        // here we just fix the return type
        for (size_t i = 0; i < Returns.size(); ++i) {
          ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getContext()));
        }
      }
    }

    // the callback function does not have any return value
    // so clear all the attributes for return
    {
      AttributeSet Attrs = CurCallbackFunc->getAttributes();
      CurCallbackFunc->setAttributes(
        Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnIndex, Attrs.getRetAttributes())
      );
    }

    // in the callback function, we never allocate a new async frame
    // instead we reuse the existing one
    for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
      Instruction *I = cast<Instruction>(VMap[EI->AllocAsyncCtxInst]);
      ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOperand(0), "ReallocAsyncCtx"));
    }

    // mapped entry point & async call
    BasicBlock *ResumeBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]);
    Instruction *MappedAsyncCall = cast<Instruction>(VMap[CurEntry.AsyncCallInst]);
   
    // To save space, for each async call in the callback function, we just ignore the sync case, and leave it to the scheduler
    // TODO need an option for this
    {
      for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
        AsyncCallEntry & CurEntry = *EI;
        Instruction *MappedAsyncCallInst = cast<Instruction>(VMap[CurEntry.AsyncCallInst]);
        BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent();
        BasicBlock *MappedAfterCallBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]);

        // for the sync case of the call, go to NewBlock (instead of MappedAfterCallBlock)
        BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", CurCallbackFunc, MappedAfterCallBlock);
        MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock);
        // store the return value
        if (!MappedAsyncCallInst->use_empty()) {
          CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", NewBlock);
          BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCallInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock);
          new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock);
        }
        // tell the scheduler that we want to keep the current async stack frame
        CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock);
        // finally we go to the SaveAsyncCtxBlock, to register the callbac, save the local variables and leave
        BasicBlock *MappedSaveAsyncCtxBlock = cast<BasicBlock>(VMap[CurEntry.SaveAsyncCtxBlock]);
        BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock);
      }
    }

    std::vector<AllocaInst*> ToPromote;
    // applying loaded variables in the entry block
    {
      BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock);
      for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) {
        Value *OrigVar = CurEntry.ContextVariables[i];
        if (isa<Argument>(OrigVar)) continue; // already processed
        Value *CurVar = VMap[OrigVar];
        assert(CurVar != MappedAsyncCall);
        if (Instruction *Inst = dyn_cast<Instruction>(CurVar)) {
          if (ReachableBlocks.count(Inst->getParent())) {
            // Inst could be either defined or loaded from the async context
            // Do the dirty works in memory
            // TODO: might need to check the safety first
            // TODO: can we create phi directly?
            AllocaInst *Addr = DemoteRegToStack(*Inst, false);
            new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock);
            ToPromote.push_back(Addr);
          } else {
            // The parent block is not reachable, which means there is no confliction
            // it's safe to replace Inst with the loaded value
            assert(Inst != LoadedAsyncVars[i]); // this should only happen when OrigVar is an Argument
            Inst->replaceAllUsesWith(LoadedAsyncVars[i]); 
          }
        }
      }
    }

    // resolve the return value of the previous async function
    // it could be the value just loaded from the global area
    // or directly returned by the function (in its sync case)
    if (!CurEntry.AsyncCallInst->use_empty()) {
      // load the async return value
      CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", EntryBlock);
      BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock);
      LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock);

      AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false);
      new StoreInst(RetVal, Addr, EntryBlock);
      ToPromote.push_back(Addr);
    }

    // TODO remove unreachable blocks before creating phi
   
    // We go right to ResumeBlock from the EntryBlock
    BranchInst::Create(ResumeBlock, EntryBlock);
   
    /*
     * Creating phi's
     * Normal stack frames and async stack frames are interleaving with each other.
     * In a callback function, if we call an async function, we might need to realloc the async ctx.
     * at this point we don't want anything stored after the ctx, 
     * such that we can free and extend the ctx by simply update STACKTOP.
     * Therefore we don't want any alloca's in callback functions.
     *
     */
    if (!ToPromote.empty()) {
      DominatorTreeWrapperPass DTW;
      DTW.runOnFunction(*CurCallbackFunc);
      PromoteMemToReg(ToPromote, DTW.getDomTree());
    }

    removeUnreachableBlocks(*CurCallbackFunc);
  }

  // Pass 4
  // Here are modifications to the original function, which we won't want to be cloned into the callback functions
  for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end();  EI != EE; ++EI) {
    AsyncCallEntry & CurEntry = *EI;
    // remove the frame if no async functinon has been called
    CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEntry.AfterCallBlock->getFirstNonPHI());
  }
}
Esempio n. 4
0
void linkInstructionBranches(Instructions &instructions) {
	/* Go through all instructions and link them according to the flow graph.
	 *
	 * In specifics, link each instruction's follower, the instruction that
	 * naturally follows if no branches are taken. Also fill in the branches
	 * array, which contains all branches an instruction can take. This
	 * directly creates an address type for each instruction: does it start
	 * a subroutine, is it a jump destination, is it a tail of a jump or none
	 * of these?
	 */

	for (Instructions::iterator i = instructions.begin(); i != instructions.end(); ++i) {
		// If this is an instruction that has a natural follower, link it
		if ((i->opcode != kOpcodeJMP) && (i->opcode != kOpcodeRETN)) {
			Instructions::iterator follower = i + 1;

			i->follower = (follower != instructions.end()) ? &*follower : 0;

			if (follower != instructions.end())
				follower->predecessors.push_back(&*i);
		}

		// Link destinations of unconditional branches
		if ((i->opcode == kOpcodeJMP) || (i->opcode == kOpcodeJSR) || (i->opcode == kOpcodeSTORESTATE)) {
			assert(((i->opcode == kOpcodeSTORESTATE) && (i->argCount == 3)) || (i->argCount == 1));

			Instruction *branch = findInstruction(instructions, i->address + i->args[0]);
			if (!branch)
				throw Common::Exception("Can't find destination of unconditional branch");

			i->branches.push_back(branch);

			if      (i->opcode == kOpcodeJSR)
				setAddressType(branch, kAddressTypeSubRoutine);
			else if (i->opcode == kOpcodeSTORESTATE)
				setAddressType(branch, kAddressTypeStoreState);
			else {
				setAddressType(branch, kAddressTypeJumpLabel);
				branch->predecessors.push_back(&*i);
			}

			setAddressType(const_cast<Instruction *>(i->follower), kAddressTypeTail);
		}

		// Link destinations of conditional branches
		if ((i->opcode == kOpcodeJZ) || (i->opcode == kOpcodeJNZ)) {
			assert(i->argCount == 1);

			if (!i->follower)
				throw Common::Exception("Conditional branch has no false destination");

			Instruction *branch = findInstruction(instructions, i->address + i->args[0]);
			if (!branch)
				throw Common::Exception("Can't find destination of conditional branch");

			setAddressType(branch, kAddressTypeJumpLabel);

			setAddressType(const_cast<Instruction *>(i->follower), kAddressTypeTail);

			i->branches.push_back(branch);      // True branch
			i->branches.push_back(i->follower); // False branch

			branch->predecessors.push_back(&*i);
		}
	}
}