예제 #1
0
    //////////////////////////////////////////////////////////////////////////
    // @brief processes a single decl from the streamout stream. Reads 4 components from the input
    //        stream and writes N components to the output buffer given the componentMask or if
    //        a hole, just increments the buffer pointer
    // @param pStream - pointer to current attribute
    // @param pOutBuffers - pointers to the current location of each output buffer
    // @param decl - input decl
    void buildDecl(Value* pStream, Value* pOutBuffers[4], const STREAMOUT_DECL& decl)
    {
        // @todo add this to x86 macros
        Function* maskStore = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx_maskstore_ps);

        uint32_t numComponents = _mm_popcnt_u32(decl.componentMask);
        uint32_t packedMask = (1 << numComponents) - 1;
        if (!decl.hole)
        {
            // increment stream pointer to correct slot
            Value* pAttrib = GEP(pStream, C(4 * decl.attribSlot));

            // load 4 components from stream
            Type* simd4Ty = VectorType::get(IRB()->getFloatTy(), 4);
            Type* simd4PtrTy = PointerType::get(simd4Ty, 0);
            pAttrib = BITCAST(pAttrib, simd4PtrTy);
            Value *vattrib = LOAD(pAttrib);

            // shuffle/pack enabled components
            Value* vpackedAttrib = VSHUFFLE(vattrib, vattrib, PackMask(decl.componentMask));

            // store to output buffer
            // cast SO buffer to i8*, needed by maskstore
            Value* pOut = BITCAST(pOutBuffers[decl.bufferIndex], PointerType::get(mInt8Ty, 0));

            // cast input to <4xfloat>
            Value* src = BITCAST(vpackedAttrib, simd4Ty);
            CALL(maskStore, {pOut, ToMask(packedMask), src});
        }

        // increment SO buffer
        pOutBuffers[decl.bufferIndex] = GEP(pOutBuffers[decl.bufferIndex], C(numComponents));
    }
예제 #2
0
static Value *
arch_6502_get_operand_lvalue(cpu_t *cpu, addr_t pc, BasicBlock* bb) {
	int am = get_addmode(cpu->RAM[pc]);
	Value *index_register_before;
	Value *index_register_after;
	bool is_indirect;
	bool is_8bit_base;

	switch (am) {
		case ADDMODE_ACC:
			return ptr_A;
		case ADDMODE_BRA:
		case ADDMODE_IMPL:
			return NULL;
		case ADDMODE_IMM:
			{
			Value *ptr_temp = new AllocaInst(getIntegerType(8), "temp", bb);
			new StoreInst(CONST8(OPERAND_8), ptr_temp, bb);
			return ptr_temp;
			}
	}

	is_indirect = ((am == ADDMODE_IND) || (am == ADDMODE_INDX) || (am == ADDMODE_INDY));
	is_8bit_base = !((am == ADDMODE_ABS) || (am == ADDMODE_ABSX) || (am == ADDMODE_ABSY));
	index_register_before = NULL;
	if ((am == ADDMODE_ABSX) || (am == ADDMODE_INDX) || (am == ADDMODE_ZPX))
		index_register_before = ptr_X;
	if ((am == ADDMODE_ABSY) || (am == ADDMODE_ZPY))
		index_register_before = ptr_Y;
	index_register_after = (am == ADDMODE_INDY)? ptr_Y : NULL;

#if 0
	LOG("pc = %x\n", pc);
	LOG("index_register_before = %x\n", index_register_before);
	LOG("index_register_after = %x\n", index_register_after);
	LOG("is_indirect = %x\n", is_indirect);
	LOG("is_8bit_base = %x\n", is_8bit_base);
#endif

	/* create base constant */
	uint16_t base = is_8bit_base? (OPERAND_8):(OPERAND_16);
	Value *ea = CONST32(base);

	if (index_register_before)
		ea = ADD(ZEXT32(LOAD(index_register_before)), ea);

	/* wrap around in zero page */
	if (is_8bit_base)
		ea = AND(ea, CONST32(0x00FF));
	else if (base >= 0xFF00) /* wrap around in memory */
		ea = AND(ea, CONST32(0xFFFF));

	if (is_indirect)
		ea = ZEXT32(LOAD_RAM16(ea));

	if (index_register_after)
		ea = ADD(ZEXT32(LOAD(index_register_after)), ea);

	return GEP(ea);
}
예제 #3
0
    void buildStream(const STREAMOUT_COMPILE_STATE& state, const STREAMOUT_STREAM& streamState, Value* pSoCtx, BasicBlock* returnBB, Function* soFunc)
    {
        // get list of active SO buffers
        std::unordered_set<uint32_t> activeSOBuffers;
        for (uint32_t d = 0; d < streamState.numDecls; ++d)
        {
            const STREAMOUT_DECL& decl = streamState.decl[d];
            activeSOBuffers.insert(decl.bufferIndex);
        }

        // always increment numPrimStorageNeeded
        Value *numPrimStorageNeeded = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });
        numPrimStorageNeeded = ADD(numPrimStorageNeeded, C(1));
        STORE(numPrimStorageNeeded, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimStorageNeeded });

        // check OOB on active SO buffers.  If any buffer is out of bound, don't write
        // the primitive to any buffer
        Value* oobMask = C(false);
        for (uint32_t buffer : activeSOBuffers)
        {
            oobMask = OR(oobMask, oob(state, pSoCtx, buffer));
        }

        BasicBlock* validBB = BasicBlock::Create(JM()->mContext, "valid", soFunc);

        // early out if OOB
        COND_BR(oobMask, returnBB, validBB);

        IRB()->SetInsertPoint(validBB);

        Value* numPrimsWritten = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });
        numPrimsWritten = ADD(numPrimsWritten, C(1));
        STORE(numPrimsWritten, pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_numPrimsWritten });

        // compute start pointer for each output buffer
        Value* pOutBuffer[4];
        Value* pOutBufferStartVertex[4];
        Value* outBufferPitch[4];
        for (uint32_t b: activeSOBuffers)
        {
            Value* pBuf = getSOBuffer(pSoCtx, b);
            Value* pData = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pBuffer });
            Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
            pOutBuffer[b] = GEP(pData, streamOffset);
            pOutBufferStartVertex[b] = pOutBuffer[b];

            outBufferPitch[b] = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_pitch });
        }

        // loop over the vertices of the prim
        Value* pStreamData = LOAD(pSoCtx, { 0, SWR_STREAMOUT_CONTEXT_pPrimData });
        for (uint32_t v = 0; v < state.numVertsPerPrim; ++v)
        {
            buildVertex(streamState, pStreamData, pOutBuffer);

            // increment stream and output buffer pointers
            // stream verts are always 32*4 dwords apart
            pStreamData = GEP(pStreamData, C(KNOB_NUM_ATTRIBUTES * 4));

            // output buffers offset using pitch in buffer state
            for (uint32_t b : activeSOBuffers)
            {
                pOutBufferStartVertex[b] = GEP(pOutBufferStartVertex[b], outBufferPitch[b]);
                pOutBuffer[b] = pOutBufferStartVertex[b];
            }
        }

        // update each active buffer's streamOffset
        for (uint32_t b : activeSOBuffers)
        {
            Value* pBuf = getSOBuffer(pSoCtx, b);
            Value* streamOffset = LOAD(pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
            streamOffset = ADD(streamOffset, MUL(C(state.numVertsPerPrim), outBufferPitch[b]));
            STORE(streamOffset, pBuf, { 0, SWR_STREAMOUT_BUFFER_streamOffset });
        }
    }