Exemplo n.º 1
0
    void buildStream(const STREAMOUT_COMPILE_STATE& state, const STREAMOUT_STREAM& streamState, Value* pSoCtx, BasicBlock* returnBB, Function* soFunc)
    {
        // get list of active SO buffers
        std::unordered_set<uint32_t> activeSOBuffers;
        for (uint32_t d = 0; d < streamState.numDecls; ++d)
        {
            const STREAMOUT_DECL& decl = streamState.decl[d];
            activeSOBuffers.insert(decl.bufferIndex);
        }

        // always increment numPrimStorageNeeded
        Value *numPrimStorageNeeded = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });
        numPrimStorageNeeded = ADD(numPrimStorageNeeded, C(1));
        STORE(numPrimStorageNeeded, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });

        // check OOB on active SO buffers.  If any buffer is out of bound, don't write
        // the primitive to any buffer
        Value* oobMask = C(false);
        for (uint32_t buffer : activeSOBuffers)
        {
            oobMask = OR(oobMask, oob(state, pSoCtx, buffer));
        }

        BasicBlock* validBB = BasicBlock::Create(JM()->mContext, "valid", soFunc);

        // early out if OOB
        COND_BR(oobMask, returnBB, validBB);

        IRB()->SetInsertPoint(validBB);

        Value* numPrimsWritten = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });
        numPrimsWritten = ADD(numPrimsWritten, C(1));
        STORE(numPrimsWritten, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });

        // compute start pointer for each output buffer
        Value* pOutBuffer[4];
        Value* pOutBufferStartVertex[4];
        Value* outBufferPitch[4];
        for (uint32_t b: activeSOBuffers)
        {
            Value* pBuf = getSOBuffer(pSoCtx, b);
            Value* pData = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pBuffer });
            Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
            pOutBuffer[b] = GEP(pData, streamOffset);
            pOutBufferStartVertex[b] = pOutBuffer[b];

            outBufferPitch[b] = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch });
        }

        // loop over the vertices of the prim
        Value* pStreamData = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pPrimData });
        for (uint32_t v = 0; v < state.numVertsPerPrim; ++v)
        {
            buildVertex(streamState, pStreamData, pOutBuffer);

            // increment stream and output buffer pointers
            // stream verts are always 32*4 dwords apart
            pStreamData = GEP(pStreamData, C(KNOB_NUM_ATTRIBUTES * 4));

            // output buffers offset using pitch in buffer state
            for (uint32_t b : activeSOBuffers)
            {
                pOutBufferStartVertex[b] = GEP(pOutBufferStartVertex[b], outBufferPitch[b]);
                pOutBuffer[b] = pOutBufferStartVertex[b];
            }
        }

        // update each active buffer's streamOffset
        for (uint32_t b : activeSOBuffers)
        {
            Value* pBuf = getSOBuffer(pSoCtx, b);
            Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
            streamOffset = ADD(streamOffset, MUL(C(state.numVertsPerPrim), outBufferPitch[b]));
            STORE(streamOffset, pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
        }
    }
Exemplo n.º 2
0
  bool runOnFunction(Function &Func) override {
    if (Func.isDeclaration()) {
      return false;
    }
    vector<BranchInst *> BIs;
    for (inst_iterator I = inst_begin(Func); I != inst_end(Func); I++) {
      Instruction *Inst = &(*I);
      if (BranchInst *BI = dyn_cast<BranchInst>(Inst)) {
        BIs.push_back(BI);
      }
    } // Finish collecting branching conditions
    Value *zero =
        ConstantInt::get(Type::getInt32Ty(Func.getParent()->getContext()), 0);
    for (BranchInst *BI : BIs) {
      IRBuilder<> IRB(BI);
      vector<BasicBlock *> BBs;
      // We use the condition's evaluation result to generate the GEP
      // instruction  False evaluates to 0 while true evaluates to 1.  So here
      // we insert the false block first
      if (BI->isConditional()) {
        BBs.push_back(BI->getSuccessor(1));
      }
      BBs.push_back(BI->getSuccessor(0));
      ArrayType *AT = ArrayType::get(
          Type::getInt8PtrTy(Func.getParent()->getContext()), BBs.size());
      vector<Constant *> BlockAddresses;
      for (unsigned i = 0; i < BBs.size(); i++) {
        BlockAddresses.push_back(BlockAddress::get(BBs[i]));
      }
      GlobalVariable *LoadFrom = NULL;

      if (BI->isConditional() || indexmap.find(BI->getSuccessor(0))==indexmap.end()) {
        // Create a new GV
        Constant *BlockAddressArray =
            ConstantArray::get(AT, ArrayRef<Constant *>(BlockAddresses));
        LoadFrom = new GlobalVariable(*Func.getParent(), AT, false,
                                      GlobalValue::LinkageTypes::PrivateLinkage,
                                      BlockAddressArray);
      } else {
        LoadFrom =
            Func.getParent()->getGlobalVariable("IndirectBranchingGlobalTable",true);
      }
      Value *index = NULL;
      if (BI->isConditional()) {
        Value *condition = BI->getCondition();
        index = IRB.CreateZExt(
            condition, Type::getInt32Ty(Func.getParent()->getContext()));
      } else {
        index =
            ConstantInt::get(Type::getInt32Ty(Func.getParent()->getContext()),
                             indexmap[BI->getSuccessor(0)]);
      }
      Value *GEP = IRB.CreateGEP(LoadFrom, {zero, index});
      LoadInst *LI = IRB.CreateLoad(GEP, "IndirectBranchingTargetAddress");
      IndirectBrInst *indirBr = IndirectBrInst::Create(LI, BBs.size());
      for (BasicBlock *BB : BBs) {
        indirBr->addDestination(BB);
      }
      ReplaceInstWithInst(BI, indirBr);
    }
    return true;
  }