void
GX2InitFetchShaderEx(GX2FetchShader *fetchShader,
                     uint8_t *buffer,
                     uint32_t attribCount,
                     GX2AttribStream *attribs,
                     GX2FetchShaderType type,
                     GX2TessellationMode tessMode)
{
   if (type != GX2FetchShaderType::NoTessellation) {
      decaf_abort(fmt::format("Invalid GX2FetchShaderType {}", enumAsString(type)));
   }

   if (tessMode != GX2TessellationMode::Discrete) {
      decaf_abort(fmt::format("Invalid GX2TessellationMode {}", enumAsString(tessMode)));
   }

   auto someTessVar1 = 128u;
   auto someTessVar2 = 128u;
   auto numGPRs = 0u;
   auto barrier = false;

   // Calculate instruction pointers
   auto fetchCount = GX2FSCalcNumFetchInsts(attribCount, type);
   auto aluCount = 0; // GX2FSCalcNumAluInsts(type, tessMode);
   auto cfCount = GX2FSCalcNumCFInsts(fetchCount, type);

   auto fetchSize = fetchCount * sizeof(latte::VertexFetchInst);
   auto cfSize = cfCount * sizeof(latte::ControlFlowInst);
   auto aluSize = aluCount * sizeof(latte::AluInst);

   auto cfOffset = 0u;
   auto aluOffset = cfSize;
   auto fetchOffset = align_up(cfSize + aluSize, 0x10u);

   auto cfPtr = reinterpret_cast<latte::ControlFlowInst *>(buffer + cfOffset);
   auto aluPtr = reinterpret_cast<latte::AluInst *>(buffer + aluOffset);
   auto fetchPtr = reinterpret_cast<latte::VertexFetchInst *>(buffer + fetchOffset);

   // Setup fetch shader
   fetchShader->type = type;
   fetchShader->attribCount = attribCount;
   fetchShader->data = buffer;
   fetchShader->size = GX2CalcFetchShaderSizeEx(attribCount, type, tessMode);

   // Generate fetch instructions
   auto indexMap = GX2FSGetIndexGprMap(type, tessMode);

   for (auto i = 0u; i < attribCount; ++i) {
      latte::VertexFetchInst vfetch;
      auto &attrib = attribs[i];
      std::memset(&vfetch, 0, sizeof(vfetch));

      if (attrib.buffer == 16) {
         // TODO: Figure out what these vars are for
         if (attrib.offset) {
            if (attrib.offset == 1) {
               someTessVar1 = attrib.location;
            }
         } else {
            someTessVar2 = attrib.location;
         }
      } else {
         // Semantic vertex fetch
         vfetch.word0 = vfetch.word0
            .VTX_INST(latte::SQ_VTX_INST_SEMANTIC)
            .BUFFER_ID(latte::SQ_RES_OFFSET::VS_ATTRIB_RESOURCE_0 + attrib.buffer - latte::SQ_RES_OFFSET::VS_TEX_RESOURCE_0);

         vfetch.word2 = vfetch.word2
            .OFFSET(attribs[i].offset);

         if (attrib.type) {
            auto selX = latte::SQ_SEL::SEL_X;
            auto fetchType = latte::SQ_VTX_FETCH_TYPE::VERTEX_DATA;

            if (attrib.type == GX2AttribIndexType::PerInstance) {
               if (attrib.aluDivisor == 1) {
                  fetchType = latte::SQ_VTX_FETCH_TYPE::INSTANCE_DATA;
                  selX = latte::SQ_SEL::SEL_W;
               } else if (attrib.aluDivisor == fetchShader->divisors[0]) {
                  fetchType = latte::SQ_VTX_FETCH_TYPE::INSTANCE_DATA;
                  selX = latte::SQ_SEL::SEL_Y;
               } else if (attrib.aluDivisor == fetchShader->divisors[1]) {
                  fetchType = latte::SQ_VTX_FETCH_TYPE::INSTANCE_DATA;
                  selX = latte::SQ_SEL::SEL_Z;
               } else {
                  fetchShader->divisors[fetchShader->numDivisors] = attrib.aluDivisor;

                  if (fetchShader->numDivisors == 0) {
                     selX = latte::SQ_SEL::SEL_Y;
                  } else if (fetchShader->numDivisors == 1) {
                     selX = latte::SQ_SEL::SEL_Z;
                  }

                  fetchShader->numDivisors++;
               }
            }

            vfetch.word0 = vfetch.word0
               .FETCH_TYPE(fetchType)
               .SRC_SEL_X(selX);
         } else {
            vfetch.word0 = vfetch.word0
               .SRC_GPR(indexMap[0].gpr)
               .SRC_SEL_X(static_cast<latte::SQ_SEL>(indexMap[0].chan));
         }

         // Setup dest
         vfetch.gpr = vfetch.gpr
            .DST_GPR(attrib.location);

         vfetch.word1 = vfetch.word1
            .DST_SEL_W(static_cast<latte::SQ_SEL>(attrib.mask & 0x7))
            .DST_SEL_Z(static_cast<latte::SQ_SEL>((attrib.mask >> 8) & 0x7))
            .DST_SEL_Y(static_cast<latte::SQ_SEL>((attrib.mask >> 16) & 0x7))
            .DST_SEL_X(static_cast<latte::SQ_SEL>((attrib.mask >> 24) & 0x7));

         // Setup mega fetch
         vfetch.word2 = vfetch.word2
            .MEGA_FETCH(1);

         vfetch.word0 = vfetch.word0
            .MEGA_FETCH_COUNT(internal::getAttribFormatBytes(attrib.format) - 1);

         // Setup format
         auto dataFormat = internal::getAttribFormatDataFormat(attrib.format);
         auto numFormat = latte::SQ_NUM_FORMAT::NORM;
         auto formatComp = latte::SQ_FORMAT_COMP::UNSIGNED;

         if (attribs[i].format & GX2AttribFormatFlags::SCALED) {
            numFormat = latte::SQ_NUM_FORMAT::SCALED;
         } else if (attribs[i].format & GX2AttribFormatFlags::INTEGER) {
            numFormat = latte::SQ_NUM_FORMAT::INT;
         }

         if (attribs[i].format & GX2AttribFormatFlags::SIGNED) {
            formatComp = latte::SQ_FORMAT_COMP::SIGNED;
         }

         vfetch.word1 = vfetch.word1
            .DATA_FORMAT(dataFormat)
            .NUM_FORMAT_ALL(numFormat)
            .FORMAT_COMP_ALL(formatComp);

         auto swapMode = internal::getSwapModeEndian(attribs[i].endianSwap & 3);

         if (attribs[i].endianSwap == latte::SQ_ENDIAN::AUTO) {
            swapMode = internal::getAttribFormatEndian(attribs[i].format);
         }

         vfetch.word2 = vfetch.word2
            .ENDIAN_SWAP(swapMode);

         // Append to program
         *(fetchPtr++) = vfetch;

         // Add extra tesselation vertex fetches
         if (type != GX2FetchShaderType::NoTessellation && attrib.type != GX2AttribIndexType::PerInstance) {
            auto perAttrib = GX2FetchInstsPerAttrib(type);

            for (auto j = 1u; j < perAttrib; ++j) {
               latte::VertexFetchInst vfetch2 = vfetch;

               // Update src/dst
               vfetch2.word0 = vfetch2.word0
                  .SRC_GPR(indexMap[j].gpr)
                  .SRC_SEL_X(static_cast<latte::SQ_SEL>(indexMap[j].chan));

               vfetch2.gpr = vfetch2.gpr
                  .DST_GPR(j + attrib.location);

               // Append to program
               *(fetchPtr++) = vfetch;
            }
         }
      }
   }

   // Generate tessellation ALU ops
   if (type != GX2FetchShaderType::NoTessellation) {
      numGPRs = 2;

      if (tessMode == GX2TessellationMode::Adaptive) {
         switch (type) {
         case GX2FetchShaderType::LineTessellation:
            numGPRs = 3;
            break;
         case GX2FetchShaderType::TriangleTessellation:
            numGPRs = 7;
            break;
         case GX2FetchShaderType::QuadTessellation:
            numGPRs = 7;
            break;
         }
      }

      // TODO: GX2FSGenTessAluOps
      barrier = true;
   }

   // Generate a VTX CF per 16 VFETCH
   if (fetchCount) {
      for (auto i = 0u; i < cfCount - 1; ++i) {
         auto fetches = FetchesPerControlFlow;

         if (fetchCount < (i + 1) * FetchesPerControlFlow) {
            // Don't overrun our fetches!
            fetches = fetchCount % FetchesPerControlFlow;
         }

         latte::ControlFlowInst inst;
         std::memset(&inst, 0, sizeof(inst));
         inst.word0.ADDR = static_cast<uint32_t>((fetchOffset + sizeof(latte::VertexFetchInst) * i * FetchesPerControlFlow) / 8);
         inst.word1 = inst.word1
            .COUNT((fetches - 1) & 0x7)
            .COUNT_3(((fetches - 1) >> 3) & 0x1)
            .CF_INST(latte::SQ_CF_INST_VTX_TC)
            .BARRIER(barrier ? 1 : 0);
         *(cfPtr++) = inst;
      }
   }

   // Generate tessellation "post" ALU ops
   if (numGPRs) {
      // TODO: GX2FSGenPostAluOps
   }

   // Generate an EOP
   latte::ControlFlowInst eop;
   std::memset(&eop, 0, sizeof(eop));
   eop.word1 = eop.word1
      .BARRIER(1)
      .CF_INST(latte::SQ_CF_INST_RETURN);
   *(cfPtr++) = eop;

   // Set sq_pgm_resources_fs
   auto sq_pgm_resources_fs = fetchShader->regs.sq_pgm_resources_fs.value();
   sq_pgm_resources_fs = sq_pgm_resources_fs
      .NUM_GPRS(numGPRs);
   fetchShader->regs.sq_pgm_resources_fs = sq_pgm_resources_fs;

   GX2Invalidate(GX2InvalidateMode::CPU, fetchShader->data, fetchShader->size);
}
Esempio n. 2
0
EXT_DONE PARAM0, R0.ZW01
*/

uint32_t R600_video_vs[] =
{

    /* 0 */
    CF_DWORD0(ADDR(4)),
    CF_DWORD1(POP_COUNT(0),
      CF_CONST(0),
      COND(SQ_CF_COND_ACTIVE),
     /* I_COUNT(1),*/ 0,
      CALL_COUNT(0),
      END_OF_PROGRAM(0),
      VALID_PIXEL_MODE(0),
      CF_INST(SQ_CF_INST_VTX),
      WHOLE_QUAD_MODE(0),
      BARRIER(1)),
    /* 1 */
    CF_ALLOC_IMP_EXP_DWORD0(
      ARRAY_BASE(CF_POS0),
      TYPE(SQ_EXPORT_POS),
      RW_GPR(0),
      RW_REL(ABSOLUTE),
      INDEX_GPR(0),
      ELEM_SIZE(0)),
    CF_ALLOC_IMP_EXP_DWORD1_SWIZ(
      SRC_SEL_X(SQ_SEL_X),
      SRC_SEL_Y(SQ_SEL_Y),
      SRC_SEL_Z(SQ_SEL_0),
      SRC_SEL_W(SQ_SEL_1),