Пример #1
0
static bool
repair_ssa_def(nir_ssa_def *def, void *void_state)
{
   struct repair_ssa_state *state = void_state;

   bool is_valid = true;
   nir_foreach_use(def, src) {
      if (!nir_block_dominates(def->parent_instr->block, get_src_block(src))) {
         is_valid = false;
         break;
      }
   }

   if (is_valid)
      return true;

   struct nir_phi_builder *pb = prep_build_phi(state);

   BITSET_SET(state->def_set, def->parent_instr->block->index);

   struct nir_phi_builder_value *val =
      nir_phi_builder_add_value(pb, def->num_components, def->bit_size,
                                state->def_set);

   nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def);

   nir_foreach_use_safe(def, src) {
      nir_block *src_block = get_src_block(src);
      if (!nir_block_dominates(def->parent_instr->block, src_block)) {
         nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa(
            nir_phi_builder_value_get_block_def(val, src_block)));
      }
   }
Пример #2
0
static physaddr_t _pmm_set_addr(physaddr_t addr, size_t pages, struct MMAPRegion* reg,
    int force_addr) {
    uint32_t startpage = (addr - reg->start) / PMM_PAGE_SIZE;
    uint32_t limit = startpage + (reg->len / PMM_PAGE_SIZE);
    //knotice("<<%x>>",addr);

    while (startpage < limit) {
        if (BITSET_ISSET(reg->region_bitset, startpage/8, startpage%8)) {
            //Occupied
            if (force_addr)
                return NULL; //Can't allocate at exact address.

//            knotice("f**k %d", startpage);
            startpage++;
        } else {
            //Not occupied.
            //Check if all future pages are occupied.
            size_t page = 0;
            do {
                if (BITSET_ISSET(reg->region_bitset,
                        (startpage+page)/8, (startpage+page)%8)) {
                    //One of future pages are occupied.
                    //Reset the outer loop and find some other occupied.
                    startpage += page;
//                    knotice("shit %d", startpage);
                }
                page++;
            } while (page < pages);

            if (page == pages) {
                break;
            } else {
                if (force_addr)
                    return NULL; //Can't allocate at exact address.
            }
        }

    }

    addr = reg->start + (startpage * PMM_PAGE_SIZE);

    /* all OK, fill the buffer */
    for (size_t p = 0; p < pages; p++) {
//        knotice("%x", addr+(p*PMM_PAGE_SIZE));
        BITSET_SET(reg->region_bitset, (startpage+p)/8, (startpage+p)%8);
    }

    if (!force_addr)
        reg->first_free_addr = addr + (pages * PMM_PAGE_SIZE);

    //knotice("[%x]", addr);
    return addr;
}
Пример #3
0
static inline void
update_single_program_texture_state(struct gl_context *ctx,
                                    struct gl_program *prog,
                                    int unit,
                                    BITSET_WORD *enabled_texture_units)
{
   struct gl_texture_object *texObj;

   texObj = update_single_program_texture(ctx, prog, unit);

   _mesa_reference_texobj(&ctx->Texture.Unit[unit]._Current, texObj);
   BITSET_SET(enabled_texture_units, unit);
   ctx->Texture._MaxEnabledTexImageUnit =
      MAX2(ctx->Texture._MaxEnabledTexImageUnit, (int)unit);
}
Пример #4
0
static bool
set_src_live(nir_src *src, void *void_live)
{
   BITSET_WORD *live = void_live;

   if (!src->is_ssa)
      return true;

   if (src->ssa->live_index == 0)
      return true;   /* undefined variables are never live */

   BITSET_SET(live, src->ssa->live_index);

   return true;
}
Пример #5
0
static void
qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip,
              struct qreg src)
{
        int var = qir_reg_to_var(src);
        if (var == -1)
                return;

        c->temp_start[var] = MIN2(c->temp_start[var], ip);
        c->temp_end[var] = MAX2(c->temp_end[var], ip);

        /* The use[] bitset marks when the block makes
         * use of a variable without having completely
         * defined that variable within the block.
         */
        if (!BITSET_TEST(block->def, var))
                BITSET_SET(block->use, var);
}
Пример #6
0
static void
fix_missing_textures_for_atifs(struct gl_context *ctx,
                               struct gl_program *prog,
                               BITSET_WORD *enabled_texture_units)
{
   GLbitfield mask = prog->SamplersUsed;

   while (mask) {
      const int s = u_bit_scan(&mask);
      const int unit = prog->SamplerUnits[s];
      const gl_texture_index target_index = ffs(prog->TexturesUsed[unit]) - 1;

      if (!ctx->Texture.Unit[unit]._Current) {
         struct gl_texture_object *texObj =
            _mesa_get_fallback_texture(ctx, target_index);
         _mesa_reference_texobj(&ctx->Texture.Unit[unit]._Current, texObj);
         BITSET_SET(enabled_texture_units, unit);
         ctx->Texture._MaxEnabledTexImageUnit =
            MAX2(ctx->Texture._MaxEnabledTexImageUnit, (int)unit);
      }
   }
}
Пример #7
0
Файл: vir.c Проект: imirkin/mesa
struct qreg
vir_get_temp(struct v3d_compile *c)
{
        struct qreg reg;

        reg.file = QFILE_TEMP;
        reg.index = c->num_temps++;

        if (c->num_temps > c->defs_array_size) {
                uint32_t old_size = c->defs_array_size;
                c->defs_array_size = MAX2(old_size * 2, 16);

                c->defs = reralloc(c, c->defs, struct qinst *,
                                   c->defs_array_size);
                memset(&c->defs[old_size], 0,
                       sizeof(c->defs[0]) * (c->defs_array_size - old_size));

                c->spillable = reralloc(c, c->spillable,
                                        BITSET_WORD,
                                        BITSET_WORDS(c->defs_array_size));
                for (int i = old_size; i < c->defs_array_size; i++)
                        BITSET_SET(c->spillable, i);
        }
Пример #8
0
static void
validate_ssa_def(nir_ssa_def *def, validate_state *state)
{
   assert(def->index < state->impl->ssa_alloc);
   assert(!BITSET_TEST(state->ssa_defs_found, def->index));
   BITSET_SET(state->ssa_defs_found, def->index);

   assert(def->parent_instr == state->instr);

   assert(def->num_components <= 4);

   list_validate(&def->uses);
   list_validate(&def->if_uses);

   ssa_def_validate_state *def_state = ralloc(state->ssa_defs,
                                              ssa_def_validate_state);
   def_state->where_defined = state->impl;
   def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer,
                                      _mesa_key_pointer_equal);
   def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer,
                                         _mesa_key_pointer_equal);
   _mesa_hash_table_insert(state->ssa_defs, def, def_state);
}
Пример #9
0
static void
update_program_texture_state(struct gl_context *ctx, struct gl_program **prog,
                             BITSET_WORD *enabled_texture_units)
{
    int i;

    for (i = 0; i < MESA_SHADER_STAGES; i++) {
        int s;

        if (!prog[i])
            continue;

        /* We can't only do the shifting trick as the loop condition because if
         * sampler 31 is active, the next iteration tries to shift by 32, which is
         * undefined.
         */
        for (s = 0; s < MAX_SAMPLERS && (1 << s) <= prog[i]->SamplersUsed; s++) {
            struct gl_texture_object *texObj;

            texObj = update_single_program_texture(ctx, prog[i], s);
            if (texObj) {
                int unit = prog[i]->SamplerUnits[s];
                _mesa_reference_texobj(&ctx->Texture.Unit[unit]._Current, texObj);
                BITSET_SET(enabled_texture_units, unit);
                ctx->Texture._MaxEnabledTexImageUnit =
                    MAX2(ctx->Texture._MaxEnabledTexImageUnit, (int)unit);
            }
        }
    }

    if (prog[MESA_SHADER_FRAGMENT]) {
        const GLuint coordMask = (1 << MAX_TEXTURE_COORD_UNITS) - 1;
        ctx->Texture._EnabledCoordUnits |=
            (prog[MESA_SHADER_FRAGMENT]->InputsRead >> VARYING_SLOT_TEX0) &
            coordMask;
    }
}
Пример #10
0
/*  Initialize physical memory manager.
    Autodetect RAM regions, report usable physical RAM at end
*/
int pmm_init(struct mmap_list* mm_list, physaddr_t kstart,
    /*out*/ physaddr_t* kend)
{

    regs_count = mm_list->size;

    /* No regions detected */
    if (regs_count <= 0) {
        kerror("PMM: No regions detected!");
        return 0;
    }

    /* Allocate memory for mmap regions, set kernel end accordingly */
    regs_data = (struct MMAPRegion*)(*kend);
    *kend = (*kend) + (sizeof(struct MMAPRegion) * regs_count);
    memset(regs_data, 0, sizeof(struct MMAPRegion)* (regs_count));

    int ir = 0;
    uint64_t freemem = 0;
    struct MMAPRegion* reg_kernel = NULL;
    for (int i = 0; i < mm_list->size; i++) {
        regs_data[ir].start = mm_list->maps[i].start;
        regs_data[ir].len = mm_list->maps[i].length;
        regs_data[ir].first_free_addr = regs_data[i].start;

        switch (mm_list->maps[i].type) {
            case MMAP_FREE: regs_data[ir].region_type = PMM_REG_DEFAULT; break;
            default: regs_data[ir].region_type = PMM_REG_HARDWARE; break;
        }

        /* if start and start + len < 1M, then map to legacy */
        if (regs_data[ir].start < 1048576 &&
            (regs_data[ir].start + regs_data[ir].len) < 1048576 &&
            regs_data[ir].region_type == PMM_REG_DEFAULT) {
                regs_data[ir].region_type = PMM_REG_LEGACY;
                regs_data[ir].first_free_addr = 0x1000; //safe area.
        }

        /* if start > 0xb0000 and ends below 1m, then map to rom, because
            this is where the BIOS ROM lies*/
        if (regs_data[ir].start > 0xb0000 &&
            (regs_data[ir].start + regs_data[ir].len) < 1048576 &&
            regs_data[ir].region_type == PMM_REG_HARDWARE) {
                regs_data[ir].region_type = PMM_REG_ROM;

        }

        if (regs_data[ir].region_type == PMM_REG_LEGACY ||
            regs_data[ir].region_type == PMM_REG_DEFAULT) {
            freemem += regs_data[ir].len;
        }

        /* TODO: Treat overlapping areas */

        char* typestr;
        switch (regs_data[ir].region_type) {
            case PMM_REG_DEFAULT: typestr = "default";          break;
            case PMM_REG_LEGACY: typestr = "legacy";            break;
            case PMM_REG_HARDWARE: typestr = "hardware-used";   break;
            case PMM_REG_FAULTY: typestr = "faulty";            break;
            default: typestr = "unknown";                       break;
        }

        knotice("PMM: detected RAM type %s starting at %x, %d bytes",
            typestr, regs_data[ir].start,
            regs_data[ir].len);

        /* Get area that kernel occups */
        if (regs_data[ir].start >= kstart &&
            *kend <= regs_data[ir].start+regs_data[ir].len &&
            regs_data[ir].region_type == PMM_REG_DEFAULT ) {
                reg_kernel = &regs_data[ir];
        }

        ir++;
    }
    regs_count = ir;

    if (!reg_kernel) {
        /* Could not find kernel area */
        kerror("PMM: memory area not found for kernel (0x%x-0x%x)",
            kstart, *kend);
        return 0;
    }

    knotice("PMM: %d kB of usable RAM detected",
        (uint32_t)((freemem / 1024) & 0xffffffff));

    /* Allocate memory for bitsets */
    for (int i = 0; i < regs_count; i++) {
        regs_data[i].region_bitset = (uint8_t*)*kend;
        size_t len = 1+(regs_data[i].len / PMM_PAGE_SIZE / 8);

        memset(regs_data[i].region_bitset, 0, (len + 3) & ~3);

        /*  Each bit means a page (now its 4 kB)
            C only allocates bytes, then we divide by 8 to get the amount of
            bytes for that bitset */

        *kend = (*kend) + len;

        /* Pad the bitset to 4 bytes */
        *kend = (*kend+3) & ~3;
    }

    uint64_t memocc = 0;

    /* Fill memory used by bitsets */
    uintptr_t kmemstart = kstart - reg_kernel->start;
    uintptr_t kmemend = (*kend) - reg_kernel->start;
    for (unsigned kpage = (kmemstart / PMM_PAGE_SIZE);
        kpage <= (kmemend / PMM_PAGE_SIZE);
        kpage++) {

            if (kpage + 8 < (kmemend / PMM_PAGE_SIZE)) {
                reg_kernel->region_bitset[kpage/8] = 0xff;
                kpage += 7;
                memocc += PMM_PAGE_SIZE*8;
                continue;
            }

            BITSET_SET(reg_kernel->region_bitset, kpage/8, kpage%8);
            memocc += PMM_PAGE_SIZE;

    }

    _kernel_start = kstart;
    _kernel_end = (*kend);
    reg_kernel->first_free_addr = kstart + memocc;
    knotice("PMM: %d kB of occupied RAM",
        (uint32_t)(memocc / 1024) & 0xffffffff);

    kshell_add("mem", "Show avaliable memory", &pmm_shell_cmd);
    return 1;
}
bool
fs_visitor::dead_code_eliminate()
{
   bool progress = false;

   calculate_live_intervals();

   int num_vars = live_intervals->num_vars;
   BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));

   foreach_block (block, cfg) {
      memcpy(live, live_intervals->bd[block->num].liveout,
             sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));

      foreach_inst_in_block_reverse(fs_inst, inst, block) {
         if (inst->dst.file == GRF &&
             !inst->has_side_effects() &&
             !inst->writes_flag()) {
            bool result_live = false;

            if (inst->regs_written == 1) {
               int var = live_intervals->var_from_reg(&inst->dst);
               result_live = BITSET_TEST(live, var);
            } else {
               int var = live_intervals->var_from_vgrf[inst->dst.reg];
               for (int i = 0; i < inst->regs_written; i++) {
                  result_live = result_live || BITSET_TEST(live, var + i);
               }
            }

            if (!result_live) {
               progress = true;

               if (inst->writes_accumulator) {
                  inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
               } else {
                  inst->opcode = BRW_OPCODE_NOP;
                  continue;
               }
            }
         }

         if (inst->dst.file == GRF) {
            if (!inst->is_partial_write()) {
               int var = live_intervals->var_from_vgrf[inst->dst.reg];
               for (int i = 0; i < inst->regs_written; i++) {
                  BITSET_CLEAR(live, var + inst->dst.reg_offset + i);
               }
            }
         }

         for (int i = 0; i < inst->sources; i++) {
            if (inst->src[i].file == GRF) {
               int var = live_intervals->var_from_vgrf[inst->src[i].reg];

               for (int j = 0; j < inst->regs_read(this, i); j++) {
                  BITSET_SET(live, var + inst->src[i].reg_offset + j);
               }
            }
         }
      }
   }
Пример #12
0
static void
update_ff_texture_state(struct gl_context *ctx,
                        BITSET_WORD *enabled_texture_units)
{
    int unit;

    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
        struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
        GLbitfield mask;
        bool complete;

        if (texUnit->Enabled == 0x0)
            continue;

        /* If a shader already dictated what texture target was used for this
         * unit, just go along with it.
         */
        if (BITSET_TEST(enabled_texture_units, unit))
            continue;

        /* From the GL 4.4 compat specification, section 16.2 ("Texture Application"):
         *
         *     "Texturing is enabled or disabled using the generic Enable and
         *      Disable commands, respectively, with the symbolic constants
         *      TEXTURE_1D, TEXTURE_2D, TEXTURE_RECTANGLE, TEXTURE_3D, or
         *      TEXTURE_CUBE_MAP to enable the one-, two-, rectangular,
         *      three-dimensional, or cube map texture, respectively. If more
         *      than one of these textures is enabled, the first one enabled
         *      from the following list is used:
         *
         *      • cube map texture
         *      • three-dimensional texture
         *      • rectangular texture
         *      • two-dimensional texture
         *      • one-dimensional texture"
         *
         * Note that the TEXTURE_x_INDEX values are in high to low priority.
         * Also:
         *
         *     "If a texture unit is disabled or has an invalid or incomplete
         *      texture (as defined in section 8.17) bound to it, then blending
         *      is disabled for that texture unit. If the texture environment
         *      for a given enabled texture unit references a disabled texture
         *      unit, or an invalid or incomplete texture that is bound to
         *      another unit, then the results of texture blending are
         *      undefined."
         */
        complete = false;
        mask = texUnit->Enabled;
        while (mask) {
            const int texIndex = u_bit_scan(&mask);
            struct gl_texture_object *texObj = texUnit->CurrentTex[texIndex];
            struct gl_sampler_object *sampler = texUnit->Sampler ?
                                                    texUnit->Sampler : &texObj->Sampler;

            if (!_mesa_is_texture_complete(texObj, sampler)) {
                _mesa_test_texobj_completeness(ctx, texObj);
            }
            if (_mesa_is_texture_complete(texObj, sampler)) {
                _mesa_reference_texobj(&texUnit->_Current, texObj);
                complete = true;
                break;
            }
        }

        if (!complete)
            continue;

        /* if we get here, we know this texture unit is enabled */
        BITSET_SET(enabled_texture_units, unit);
        ctx->Texture._MaxEnabledTexImageUnit =
            MAX2(ctx->Texture._MaxEnabledTexImageUnit, (int)unit);

        ctx->Texture._EnabledCoordUnits |= 1 << unit;

        update_tex_combine(ctx, texUnit);
    }
}
Пример #13
0
void
fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
				 struct fd5_emit *emit)
{
	struct stage s[MAX_STAGES];
	uint32_t pos_regid, psize_regid, color_regid[8];
	uint32_t face_regid, coord_regid, zwcoord_regid;
	uint32_t vcoord_regid, vertex_regid, instance_regid;
	enum a3xx_threadsize fssz;
	uint8_t psize_loc = ~0;
	int i, j;

	setup_stages(emit, s);

	fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS;

	pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
	psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
	vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
	instance_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID);

	if (s[FS].v->color0_mrt) {
		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
		color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
			ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
	} else {
		color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
		color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
		color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
		color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
		color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
		color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
		color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
		color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
	}

	/* TODO get these dynamically: */
	face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
	coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
	zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
	vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0);

	/* we could probably divide this up into things that need to be
	 * emitted if frag-prog is dirty vs if vert-prog is dirty..
	 */

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONFIG, 5);
	OUT_RING(ring, A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) |
			COND(s[VS].v, A5XX_HLSQ_VS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) |
			COND(s[FS].v, A5XX_HLSQ_FS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) |
			COND(s[HS].v, A5XX_HLSQ_HS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) |
			COND(s[DS].v, A5XX_HLSQ_DS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) |
			COND(s[GS].v, A5XX_HLSQ_GS_CONFIG_ENABLED));

	OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
	OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) |
			COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE));
	OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) |
			COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE));
	OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) |
			COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE));
	OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) |
			COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE));
	OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) |
			COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE));

	OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5);
	OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) |
			COND(s[VS].v, A5XX_SP_VS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) |
			COND(s[FS].v, A5XX_SP_FS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) |
			COND(s[HS].v, A5XX_SP_HS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) |
			COND(s[DS].v, A5XX_SP_DS_CONFIG_ENABLED));
	OUT_RING(ring, A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) |
			COND(s[GS].v, A5XX_SP_GS_CONFIG_ENABLED));

	OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2);
	OUT_RING(ring, s[VS].constlen);    /* HLSQ_VS_CONSTLEN */
	OUT_RING(ring, s[VS].instrlen);    /* HLSQ_VS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2);
	OUT_RING(ring, s[FS].constlen);    /* HLSQ_FS_CONSTLEN */
	OUT_RING(ring, s[FS].instrlen);    /* HLSQ_FS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2);
	OUT_RING(ring, s[HS].constlen);    /* HLSQ_HS_CONSTLEN */
	OUT_RING(ring, s[HS].instrlen);    /* HLSQ_HS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2);
	OUT_RING(ring, s[DS].constlen);    /* HLSQ_DS_CONSTLEN */
	OUT_RING(ring, s[DS].instrlen);    /* HLSQ_DS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2);
	OUT_RING(ring, s[GS].constlen);    /* HLSQ_GS_CONSTLEN */
	OUT_RING(ring, s[GS].instrlen);    /* HLSQ_GS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
	OUT_RING(ring, 0x00000000);        /* HLSQ_CS_CONSTLEN */
	OUT_RING(ring, 0x00000000);        /* HLSQ_CS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
	OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
			A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
			0x6 | /* XXX seems to be always set? */
			A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
			COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));

	struct ir3_shader_linkage l = {0};
	ir3_link_shaders(&l, s[VS].v, s[FS].v);

	if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
			!emit->key.binning_pass)
		link_stream_out(&l, s[VS].v);

	BITSET_DECLARE(varbs, 128) = {0};
	uint32_t *varmask = (uint32_t *)varbs;

	for (i = 0; i < l.cnt; i++)
		for (j = 0; j < util_last_bit(l.var[i].compmask); j++)
			BITSET_SET(varbs, l.var[i].loc + j);

	OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4);
	OUT_RING(ring, ~varmask[0]);  /* VPC_VAR[0].DISABLE */
	OUT_RING(ring, ~varmask[1]);  /* VPC_VAR[1].DISABLE */
	OUT_RING(ring, ~varmask[2]);  /* VPC_VAR[2].DISABLE */
	OUT_RING(ring, ~varmask[3]);  /* VPC_VAR[3].DISABLE */

	/* a5xx appends pos/psize to end of the linkage map: */
	if (pos_regid != regid(63,0))
		ir3_link_add(&l, pos_regid, 0xf, l.max_loc);

	if (psize_regid != regid(63,0)) {
		psize_loc = l.max_loc;
		ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
	}

	if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
			!emit->key.binning_pass) {
		emit_stream_out(ring, s[VS].v, &l);

		OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
		OUT_RING(ring, 0x00000000);
	} else {
		OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
		OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
	}

	for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
		uint32_t reg = 0;

		OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1);

		reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
		reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
		j++;

		reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
		reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
		j++;

		OUT_RING(ring, reg);
	}

	for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
		uint32_t reg = 0;

		OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1);

		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);

		OUT_RING(ring, reg);
	}

	OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2);
	OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0);  /* SP_VS_OBJ_START_LO/HI */

	if (s[VS].instrlen)
		fd5_emit_shader(ring, s[VS].v);

	// TODO depending on other bits in this reg (if any) set somewhere else?
	OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE));

	OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1);
	OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));

	OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1);
	OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) |
			COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_VPC_CNTL_0_VARYING) |
			0x10000);    // XXX

	fd5_context(ctx)->max_loc = l.max_loc;

	if (emit->key.binning_pass) {
		OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
		OUT_RING(ring, 0x00000000);    /* SP_FS_OBJ_START_LO */
		OUT_RING(ring, 0x00000000);    /* SP_FS_OBJ_START_HI */
	} else {
		OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
		OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0);  /* SP_FS_OBJ_START_LO/HI */
	}

	OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
	OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
			A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(TWO_QUADS) |
			0x00000880);               /* XXX HLSQ_CONTROL_0 */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63));
	OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
			0xfcfcfc00);               /* XXX */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) |
			0xfcfcfc00);               /* XXX */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
			A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
			0x0000fcfc);               /* XXX */

	OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1);
	OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_SP_FS_CTRL_REG0_VARYING) |
			0x40006 | /* XXX set pretty much everywhere */
			A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
			A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
			A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
			A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
			COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));

	OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
	OUT_RING(ring, 0x020fffff);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1);
	OUT_RING(ring, 0x0000ffff);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
	OUT_RING(ring, 0x00000010);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
	OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_GRAS_CNTL_XCOORD |
					A5XX_GRAS_CNTL_YCOORD |
					A5XX_GRAS_CNTL_ZCOORD |
					A5XX_GRAS_CNTL_WCOORD |
					A5XX_GRAS_CNTL_UNK3) |
			COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_UNK3));

	OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 2);
	OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD |
					A5XX_RB_RENDER_CONTROL0_YCOORD |
					A5XX_RB_RENDER_CONTROL0_ZCOORD |
					A5XX_RB_RENDER_CONTROL0_WCOORD |
					A5XX_RB_RENDER_CONTROL0_UNK3) |
			COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_UNK3));
	OUT_RING(ring, COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS));

	OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_REG(0), 8);
	for (i = 0; i < 8; i++) {
		OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
				COND(emit->key.half_precision,
					A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
	}


	OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
	OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
			A5XX_VPC_PACK_PSIZELOC(psize_loc));

	if (!emit->key.binning_pass) {
		uint32_t vinterp[8], vpsrepl[8];

		memset(vinterp, 0, sizeof(vinterp));
		memset(vpsrepl, 0, sizeof(vpsrepl));

		/* looks like we need to do int varyings in the frag
		 * shader on a5xx (no flatshad reg?  or a420.0 bug?):
		 *
		 *    (sy)(ss)nop
		 *    (sy)ldlv.u32 r0.x,l[r0.x], 1
		 *    ldlv.u32 r0.y,l[r0.x+1], 1
		 *    (ss)bary.f (ei)r63.x, 0, r0.x
		 *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
		 *    (rpt5)nop
		 *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
		 *
		 * Possibly on later a5xx variants we'll be able to use
		 * something like the code below instead of workaround
		 * in the shader:
		 */
		/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
		for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
			/* NOTE: varyings are packed, so if compmask is 0xb
			 * then first, third, and fourth component occupy
			 * three consecutive varying slots:
			 */
			unsigned compmask = s[FS].v->inputs[j].compmask;

			uint32_t inloc = s[FS].v->inputs[j].inloc;

			if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) ||
					(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
				uint32_t loc = inloc;

				for (i = 0; i < 4; i++) {
					if (compmask & (1 << i)) {
						vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
						//flatshade[loc / 32] |= 1 << (loc % 32);
						loc++;
					}
				}
			}

			gl_varying_slot slot = s[FS].v->inputs[j].slot;

			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
			if (slot >= VARYING_SLOT_VAR0) {
				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
				/* Replace the .xy coordinates with S/T from the point sprite. Set
				 * interpolation bits for .zw such that they become .01
				 */
				if (emit->sprite_coord_enable & texmask) {
					/* mask is two 2-bit fields, where:
					 *   '01' -> S
					 *   '10' -> T
					 *   '11' -> 1 - T  (flip mode)
					 */
					unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
					uint32_t loc = inloc;
					if (compmask & 0x1) {
						vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x2) {
						vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
Пример #14
0
static void
qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
              struct hash_table *partial_update_ht, struct qinst *inst)
{
        /* The def[] bitset marks when an initialization in a
         * block completely screens off previous updates of
         * that variable.
         */
        int var = qir_reg_to_var(inst->dst);
        if (var == -1)
                return;

        c->temp_start[var] = MIN2(c->temp_start[var], ip);
        c->temp_end[var] = MAX2(c->temp_end[var], ip);

        /* If we've already tracked this as a def, or already used it within
         * the block, there's nothing to do.
         */
        if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
                return;

        /* Easy, common case: unconditional full register update. */
        if (inst->cond == QPU_COND_ALWAYS && !inst->dst.pack) {
                BITSET_SET(block->def, var);
                return;
        }

        /* Finally, look at the condition code and packing and mark it as a
         * def.  We need to make sure that we understand sequences
         * instructions like:
         *
         *     mov.zs t0, t1
         *     mov.zc t0, t2
         *
         * or:
         *
         *     mmov t0.8a, t1
         *     mmov t0.8b, t2
         *     mmov t0.8c, t3
         *     mmov t0.8d, t4
         *
         * as defining the temp within the block, because otherwise dst's live
         * range will get extended up the control flow to the top of the
         * program.
         */
        struct partial_update_state *state =
                get_partial_update_state(partial_update_ht, inst);
        uint8_t mask = qir_channels_written(inst);

        if (inst->cond == QPU_COND_ALWAYS) {
                state->channels |= mask;
        } else {
                for (int i = 0; i < 4; i++) {
                        if (!(mask & (1 << i)))
                                continue;

                        if (state->insts[i] &&
                            state->insts[i]->cond ==
                            qpu_cond_complement(inst->cond))
                                state->channels |= 1 << i;
                        else
                                state->insts[i] = inst;
                }
        }

        if (state->channels == 0xf)
                BITSET_SET(block->def, var);
}