void buildStream(const STREAMOUT_COMPILE_STATE& state, const STREAMOUT_STREAM& streamState, Value* pSoCtx, BasicBlock* returnBB, Function* soFunc) { // get list of active SO buffers std::unordered_set<uint32_t> activeSOBuffers; for (uint32_t d = 0; d < streamState.numDecls; ++d) { const STREAMOUT_DECL& decl = streamState.decl[d]; activeSOBuffers.insert(decl.bufferIndex); } // always increment numPrimStorageNeeded Value *numPrimStorageNeeded = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded }); numPrimStorageNeeded = ADD(numPrimStorageNeeded, C(1)); STORE(numPrimStorageNeeded, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded }); // check OOB on active SO buffers. If any buffer is out of bound, don't write // the primitive to any buffer Value* oobMask = C(false); for (uint32_t buffer : activeSOBuffers) { oobMask = OR(oobMask, oob(state, pSoCtx, buffer)); } BasicBlock* validBB = BasicBlock::Create(JM()->mContext, "valid", soFunc); // early out if OOB COND_BR(oobMask, returnBB, validBB); IRB()->SetInsertPoint(validBB); Value* numPrimsWritten = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten }); numPrimsWritten = ADD(numPrimsWritten, C(1)); STORE(numPrimsWritten, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten }); // compute start pointer for each output buffer Value* pOutBuffer[4]; Value* pOutBufferStartVertex[4]; Value* outBufferPitch[4]; for (uint32_t b: activeSOBuffers) { Value* pBuf = getSOBuffer(pSoCtx, b); Value* pData = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pBuffer }); Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset }); pOutBuffer[b] = GEP(pData, streamOffset); pOutBufferStartVertex[b] = pOutBuffer[b]; outBufferPitch[b] = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch }); } // loop over the vertices of the prim Value* pStreamData = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pPrimData }); for (uint32_t v = 0; v < state.numVertsPerPrim; ++v) { buildVertex(streamState, pStreamData, pOutBuffer); // increment stream and output buffer pointers // stream verts are always 32*4 dwords apart pStreamData = GEP(pStreamData, C(KNOB_NUM_ATTRIBUTES * 4)); // output buffers offset using pitch in buffer state for (uint32_t b : activeSOBuffers) { pOutBufferStartVertex[b] = GEP(pOutBufferStartVertex[b], outBufferPitch[b]); pOutBuffer[b] = pOutBufferStartVertex[b]; } } // update each active buffer's streamOffset for (uint32_t b : activeSOBuffers) { Value* pBuf = getSOBuffer(pSoCtx, b); Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset }); streamOffset = ADD(streamOffset, MUL(C(state.numVertsPerPrim), outBufferPitch[b])); STORE(streamOffset, pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset }); } }
bool runOnFunction(Function &Func) override { if (Func.isDeclaration()) { return false; } vector<BranchInst *> BIs; for (inst_iterator I = inst_begin(Func); I != inst_end(Func); I++) { Instruction *Inst = &(*I); if (BranchInst *BI = dyn_cast<BranchInst>(Inst)) { BIs.push_back(BI); } } // Finish collecting branching conditions Value *zero = ConstantInt::get(Type::getInt32Ty(Func.getParent()->getContext()), 0); for (BranchInst *BI : BIs) { IRBuilder<> IRB(BI); vector<BasicBlock *> BBs; // We use the condition's evaluation result to generate the GEP // instruction False evaluates to 0 while true evaluates to 1. So here // we insert the false block first if (BI->isConditional()) { BBs.push_back(BI->getSuccessor(1)); } BBs.push_back(BI->getSuccessor(0)); ArrayType *AT = ArrayType::get( Type::getInt8PtrTy(Func.getParent()->getContext()), BBs.size()); vector<Constant *> BlockAddresses; for (unsigned i = 0; i < BBs.size(); i++) { BlockAddresses.push_back(BlockAddress::get(BBs[i])); } GlobalVariable *LoadFrom = NULL; if (BI->isConditional() || indexmap.find(BI->getSuccessor(0))==indexmap.end()) { // Create a new GV Constant *BlockAddressArray = ConstantArray::get(AT, ArrayRef<Constant *>(BlockAddresses)); LoadFrom = new GlobalVariable(*Func.getParent(), AT, false, GlobalValue::LinkageTypes::PrivateLinkage, BlockAddressArray); } else { LoadFrom = Func.getParent()->getGlobalVariable("IndirectBranchingGlobalTable",true); } Value *index = NULL; if (BI->isConditional()) { Value *condition = BI->getCondition(); index = IRB.CreateZExt( condition, Type::getInt32Ty(Func.getParent()->getContext())); } else { index = ConstantInt::get(Type::getInt32Ty(Func.getParent()->getContext()), indexmap[BI->getSuccessor(0)]); } Value *GEP = IRB.CreateGEP(LoadFrom, {zero, index}); LoadInst *LI = IRB.CreateLoad(GEP, "IndirectBranchingTargetAddress"); IndirectBrInst *indirBr = IndirectBrInst::Create(LI, BBs.size()); for (BasicBlock *BB : BBs) { indirBr->addDestination(BB); } ReplaceInstWithInst(BI, indirBr); } return true; }