void r600_streamout_buffers_dirty(struct r600_common_context *rctx) { struct r600_atom *begin = &rctx->streamout.begin_atom; unsigned num_bufs = util_bitcount(rctx->streamout.enabled_mask); unsigned num_bufs_appended = util_bitcount(rctx->streamout.enabled_mask & rctx->streamout.append_bitmask); if (!num_bufs) return; rctx->streamout.num_dw_for_end = 12 + /* flush_vgt_streamout */ num_bufs * 11; /* STRMOUT_BUFFER_UPDATE, BUFFER_SIZE */ begin->num_dw = 12; /* flush_vgt_streamout */ if (rctx->chip_class >= SI) { begin->num_dw += num_bufs * 4; /* SET_CONTEXT_REG */ } else { begin->num_dw += num_bufs * 7; /* SET_CONTEXT_REG */ if (rctx->family >= CHIP_RS780 && rctx->family <= CHIP_RV740) begin->num_dw += num_bufs * 5; /* STRMOUT_BASE_UPDATE */ } begin->num_dw += num_bufs_appended * 8 + /* STRMOUT_BUFFER_UPDATE */ (num_bufs - num_bufs_appended) * 6 + /* STRMOUT_BUFFER_UPDATE */ (rctx->family > CHIP_R600 && rctx->family < CHIP_RS780 ? 2 : 0); /* SURFACE_BASE_UPDATE */ begin->dirty = true; r600_set_streamout_enable(rctx, true); }
void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; struct r300_vertex_program_code* code = &vs->code; struct r300_screen* r300screen = r300->screen; unsigned instruction_count = code->length / 4; unsigned vtx_mem_size = r300screen->caps.is_r500 ? 128 : 72; unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1); unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1); unsigned temp_count = MAX2(code->num_temporaries, 1); unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count, vtx_mem_size / output_count, 10); unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5); CS_LOCALS(r300); BEGIN_CS(size); /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL * R300_VAP_PVS_CODE_CNTL_1 * See the r5xx docs for instructions on how to use these. */ OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, R300_PVS_FIRST_INST(0) | R300_PVS_XYZW_VALID_INST(instruction_count - 1) | R300_PVS_LAST_INST(instruction_count - 1)); OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, instruction_count - 1); OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length); OUT_CS_TABLE(code->body.d, code->length); OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) | R300_PVS_NUM_CNTLRS(pvs_num_controllers) | R300_PVS_NUM_FPUS(r300screen->caps.num_vert_fpus) | R300_PVS_VF_MAX_VTX_NUM(12) | (r300->clip_halfz ? R300_DX_CLIP_SPACE_DEF : 0) | (r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); /* Emit flow control instructions. Even if there are no fc instructions, * we still need to write the registers to make sure they are cleared. */ OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); if (r300screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2); OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2); } else { OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS); OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS); } OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS); OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS); END_CS; }
/** * For GLX_EXT_texture_from_pixmap */ XMesaBuffer XMesaCreatePixmapTextureBuffer(XMesaVisual v, Pixmap p, Colormap cmap, int format, int target, int mipmap) { GET_CURRENT_CONTEXT(ctx); XMesaBuffer b; assert(v); b = create_xmesa_buffer((Drawable) p, PIXMAP, v, cmap); if (!b) return NULL; /* get pixmap size */ xmesa_get_window_size(v->display, b, &b->width, &b->height); if (target == 0) { /* examine dims */ if (ctx->Extensions.ARB_texture_non_power_of_two) { target = GLX_TEXTURE_2D_EXT; } else if ( util_bitcount(b->width) == 1 && util_bitcount(b->height) == 1) { /* power of two size */ if (b->height == 1) { target = GLX_TEXTURE_1D_EXT; } else { target = GLX_TEXTURE_2D_EXT; } } else if (ctx->Extensions.NV_texture_rectangle) { target = GLX_TEXTURE_RECTANGLE_EXT; } else { /* non power of two textures not supported */ XMesaDestroyBuffer(b); return 0; } } b->TextureTarget = target; b->TextureFormat = format; b->TextureMipmap = mipmap; if (!initialize_visual_and_buffer(v, b, v->mesa_visual.rgbMode, (Drawable) p, cmap)) { xmesa_free_buffer(b); return NULL; } return b; }
unsigned int BitSet::popCount() const { unsigned int count = 0; for (unsigned int i = 0; i < (size + 31) / 32; ++i) if (data[i]) count += util_bitcount(data[i]); return count; }
static void nv50_sprite_coords_validate(struct nv50_context *nv50) { struct nouveau_pushbuf *push = nv50->base.pushbuf; uint32_t pntc[8], mode; struct nv50_program *fp = nv50->fragprog; unsigned i, c; unsigned m = (nv50->state.interpolant_ctrl >> 8) & 0xff; if (!nv50->rast->pipe.point_quad_rasterization) { if (nv50->state.point_sprite) { BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8); for (i = 0; i < 8; ++i) PUSH_DATA(push, 0); nv50->state.point_sprite = FALSE; } return; } else { nv50->state.point_sprite = TRUE; } memset(pntc, 0, sizeof(pntc)); for (i = 0; i < fp->in_nr; i++) { unsigned n = util_bitcount(fp->in[i].mask); if (fp->in[i].sn != TGSI_SEMANTIC_GENERIC) { m += n; continue; } if (!(nv50->rast->pipe.sprite_coord_enable & (1 << fp->in[i].si))) { m += n; continue; } for (c = 0; c < 4; ++c) { if (fp->in[i].mask & (1 << c)) { pntc[m / 8] |= (c + 1) << ((m % 8) * 4); ++m; } } } if (nv50->rast->pipe.sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT) mode = 0x00; else mode = 0x10; BEGIN_NV04(push, NV50_3D(POINT_SPRITE_CTRL), 1); PUSH_DATA (push, mode); BEGIN_NV04(push, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8); PUSH_DATAp(push, pntc, 8); }
bool fd3_needs_manual_clipping(const struct ir3_shader *shader, const struct pipe_rasterizer_state *rast) { uint64_t outputs = ir3_shader_outputs(shader); return (!rast->depth_clip_near || util_bitcount(rast->clip_plane_enable) > 6 || outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) | (1ULL << VARYING_SLOT_CLIP_DIST0) | (1ULL << VARYING_SLOT_CLIP_DIST1))); }
static void validate_immediate(struct i915_context *i915, unsigned *batch_space) { unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & i915->immediate_dirty; if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo) i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; *batch_space = 1 + util_bitcount(dirty); }
static void si_dump_reg(FILE *file, unsigned offset, uint32_t value, uint32_t field_mask) { int r, f; for (r = 0; r < ARRAY_SIZE(sid_reg_table); r++) { const struct si_reg *reg = &sid_reg_table[r]; const char *reg_name = sid_strings + reg->name_offset; if (reg->offset == offset) { bool first_field = true; print_spaces(file, INDENT_PKT); fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", reg_name); if (!reg->num_fields) { print_value(file, value, 32); return; } for (f = 0; f < reg->num_fields; f++) { const struct si_field *field = sid_fields_table + reg->fields_offset + f; const int *values_offsets = sid_strings_offsets + field->values_offset; uint32_t val = (value & field->mask) >> (ffs(field->mask) - 1); if (!(field->mask & field_mask)) continue; /* Indent the field. */ if (!first_field) print_spaces(file, INDENT_PKT + strlen(reg_name) + 4); /* Print the field. */ fprintf(file, "%s = ", sid_strings + field->name_offset); if (val < field->num_values && values_offsets[val] >= 0) fprintf(file, "%s\n", sid_strings + values_offsets[val]); else print_value(file, val, util_bitcount(field->mask)); first_field = false; } return; } }
/** * Emit a quad (pass to next stage) with clipping. */ static inline void clip_emit_quad(struct setup_context *setup, struct quad_header *quad) { quad_clip(setup, quad); if (quad->inout.mask) { struct softpipe_context *sp = setup->softpipe; #if DEBUG_FRAGS setup->numFragsEmitted += util_bitcount(quad->inout.mask); #endif sp->quad.first->run( sp->quad.first, &quad, 1 ); } }
/** * Do the given flags have a Post Sync or LRI Post Sync operation? */ static enum pipe_control_flags get_post_sync_flags(enum pipe_control_flags flags) { flags &= PIPE_CONTROL_WRITE_IMMEDIATE | PIPE_CONTROL_WRITE_DEPTH_COUNT | PIPE_CONTROL_WRITE_TIMESTAMP | PIPE_CONTROL_LRI_POST_SYNC_OP; /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with * "LRI Post Sync Operation". So more than one bit set would be illegal. */ assert(util_bitcount(flags) <= 1); return flags; }
/** * Execute fragment shader for the four fragments in the quad. * \return TRUE if quad is alive, FALSE if all four pixels are killed */ static INLINE boolean shade_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; struct tgsi_exec_machine *machine = softpipe->fs_machine; if (softpipe->active_statistics_queries) { softpipe->pipeline_statistics.ps_invocations += util_bitcount(quad->inout.mask); } /* run shader */ machine->flatshade_color = softpipe->rasterizer->flatshade ? TRUE : FALSE; return softpipe->fs_variant->run( softpipe->fs_variant, machine, quad ); }
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->b.gfx.cs; unsigned i; /* make sure that the gfx ring is only one active */ if (ctx->b.dma.cs && ctx->b.dma.cs->cdw) { ctx->b.dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); } /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); /* emit config state */ if (ctx->b.chip_class == EVERGREEN) r600_emit_atom(ctx, &ctx->config_state.atom); ctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); /* Emit colorbuffers. */ /* XXX support more than 8 colorbuffers (the offsets are not a multiple of 0x3C for CB8-11) */ for (i = 0; i < 8 && i < ctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; unsigned reloc = radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER); radeon_compute_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ radeon_emit(cs, cb->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ radeon_emit(cs, cb->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ radeon_emit(cs, reloc); if (!ctx->keep_tiling_flags) { radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ radeon_emit(cs, reloc); } radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ radeon_emit(cs, reloc); } if (ctx->keep_tiling_flags) { for (; i < 8 ; i++) { radeon_compute_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, S_028C70_FORMAT(V_028C70_COLOR_INVALID)); } for (; i < 12; i++) { radeon_compute_set_context_reg(cs, R_028E50_CB_COLOR8_INFO + (i - 8) * 0x1C, S_028C70_FORMAT(V_028C70_COLOR_INVALID)); } } /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK, ctx->compute_cb_target_mask); /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); /* Emit constant buffer state */ r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); /* Emit sampler state */ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].states.atom); /* Emit sampler view (texture resource) state */ r600_emit_atom(ctx, &ctx->samplers[PIPE_SHADER_COMPUTE].views.atom); /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ ctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE | R600_CONTEXT_INV_VERTEX_CACHE | R600_CONTEXT_INV_TEX_CACHE; r600_flush_emit(ctx); ctx->b.flags = 0; if (ctx->b.chip_class >= CAYMAN) { cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4); /* DEALLOC_STATE prevents the GPU from hanging when a * SURFACE_SYNC packet is emitted some time after a DISPATCH_DIRECT * with any of the CB*_DEST_BASE_ENA or DB_DEST_BASE_ENA bits set. */ cs->buf[cs->cdw++] = PKT3C(PKT3_DEALLOC_STATE, 0, 0); cs->buf[cs->cdw++] = 0; } #if 0 COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, cs->buf[i]); } #endif }
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; unsigned flush_flags = 0; int i; struct r600_resource *onebo = NULL; struct evergreen_compute_resource *resources = ctx->cs_shader_state.shader->resources; /* make sure that the gfx ring is only one active */ if (ctx->rings.dma.cs) { ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC); } /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); /* Emit colorbuffers. */ for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE); r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); r600_write_value(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ r600_write_value(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ r600_write_value(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ r600_write_value(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ r600_write_value(cs, cb->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ r600_write_value(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ r600_write_value(cs, cb->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ r600_write_value(cs, reloc); if (!ctx->keep_tiling_flags) { r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ r600_write_value(cs, reloc); } r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ r600_write_value(cs, reloc); } /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK, ctx->compute_cb_target_mask); /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); for (i = 0; i < get_compute_resource_num(); i++) { if (resources[i].enabled) { int j; COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw); for (j = 0; j < resources[i].cs_end; j++) { if (resources[i].do_reloc[j]) { assert(resources[i].bo); evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); } cs->buf[cs->cdw++] = resources[i].cs[j]; } if (resources[i].bo) { onebo = resources[i].bo; evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); ///special case for textures if (resources[i].do_reloc [resources[i].cs_end] == 2) { evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); } } } } /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ ctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; r600_flush_emit(ctx); #if 0 COMPUTE_DBG("cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]); } #endif flush_flags = RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE; if (ctx->keep_tiling_flags) { flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; } ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags); ctx->pm4_dirty_cdwords = 0; ctx->flags = 0; COMPUTE_DBG("shader started\n"); ctx->ws->buffer_wait(onebo->buf, 0); COMPUTE_DBG("...\n"); ctx->streamout_start = TRUE; ctx->streamout_append_bitmask = ~0; }
/** * Render a horizontal span of quads */ static void flush_spans(struct setup_context *setup) { const int step = MAX_QUADS; const int xleft0 = setup->span.left[0]; const int xleft1 = setup->span.left[1]; const int xright0 = setup->span.right[0]; const int xright1 = setup->span.right[1]; struct quad_stage *pipe = setup->softpipe->quad.first; const int minleft = block_x(MIN2(xleft0, xleft1)); const int maxright = MAX2(xright0, xright1); int x; /* process quads in horizontal chunks of 16 */ for (x = minleft; x < maxright; x += step) { unsigned skip_left0 = CLAMP(xleft0 - x, 0, step); unsigned skip_left1 = CLAMP(xleft1 - x, 0, step); unsigned skip_right0 = CLAMP(x + step - xright0, 0, step); unsigned skip_right1 = CLAMP(x + step - xright1, 0, step); unsigned lx = x; unsigned q = 0; unsigned skipmask_left0 = (1U << skip_left0) - 1U; unsigned skipmask_left1 = (1U << skip_left1) - 1U; /* These calculations fail when step == 32 and skip_right == 0. */ unsigned skipmask_right0 = ~0U << (unsigned)(step - skip_right0); unsigned skipmask_right1 = ~0U << (unsigned)(step - skip_right1); unsigned mask0 = ~skipmask_left0 & ~skipmask_right0; unsigned mask1 = ~skipmask_left1 & ~skipmask_right1; if (mask0 | mask1) { do { unsigned quadmask = (mask0 & 3) | ((mask1 & 3) << 2); if (quadmask) { setup->quad[q].input.x0 = lx; setup->quad[q].input.y0 = setup->span.y; setup->quad[q].input.facing = setup->facing; setup->quad[q].inout.mask = quadmask; setup->quad_ptrs[q] = &setup->quad[q]; q++; #if DEBUG_FRAGS setup->numFragsEmitted += util_bitcount(quadmask); #endif } mask0 >>= 2; mask1 >>= 2; lx += 2; } while (mask0 | mask1); pipe->run( pipe, setup->quad_ptrs, q ); } } setup->span.y = 0; setup->span.right[0] = 0; setup->span.right[1] = 0; setup->span.left[0] = 1000000; /* greater than right[0] */ setup->span.left[1] = 1000000; /* greater than right[1] */ }
/** * Scan the given TGSI shader to collect information such as number of * registers used, special instructions used, etc. * \return info the result of the scan */ void tgsi_scan_shader(const struct tgsi_token *tokens, struct tgsi_shader_info *info) { uint procType, i; struct tgsi_parse_context parse; memset(info, 0, sizeof(*info)); for (i = 0; i < TGSI_FILE_COUNT; i++) info->file_max[i] = -1; /** ** Setup to begin parsing input shader **/ if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); return; } procType = parse.FullHeader.Processor.Processor; assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX || procType == TGSI_PROCESSOR_GEOMETRY || procType == TGSI_PROCESSOR_COMPUTE); info->processor = procType; /** ** Loop over incoming program tokens/instructions */ while( !tgsi_parse_end_of_tokens( &parse ) ) { info->num_tokens++; tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_INSTRUCTION: { const struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; uint i; assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); info->opcode_count[fullinst->Instruction.Opcode]++; for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *src = &fullinst->Src[i]; int ind = src->Register.Index; /* Mark which inputs are effectively used */ if (src->Register.File == TGSI_FILE_INPUT) { unsigned usage_mask; usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); if (src->Register.Indirect) { for (ind = 0; ind < info->num_inputs; ++ind) { info->input_usage_mask[ind] |= usage_mask; } } else { assert(ind >= 0); assert(ind < PIPE_MAX_SHADER_INPUTS); info->input_usage_mask[ind] |= usage_mask; } if (procType == TGSI_PROCESSOR_FRAGMENT && src->Register.File == TGSI_FILE_INPUT && info->reads_position && src->Register.Index == 0 && (src->Register.SwizzleX == TGSI_SWIZZLE_Z || src->Register.SwizzleY == TGSI_SWIZZLE_Z || src->Register.SwizzleZ == TGSI_SWIZZLE_Z || src->Register.SwizzleW == TGSI_SWIZZLE_Z)) { info->reads_z = TRUE; } } /* check for indirect register reads */ if (src->Register.Indirect) { info->indirect_files |= (1 << src->Register.File); } } /* check for indirect register writes */ for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; if (dst->Register.Indirect) { info->indirect_files |= (1 << dst->Register.File); } } info->num_instructions++; } break; case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *fulldecl = &parse.FullToken.FullDeclaration; const uint file = fulldecl->Declaration.File; uint reg; for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) { /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << reg); info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); if (file == TGSI_FILE_INPUT) { info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; info->input_centroid[reg] = (ubyte)fulldecl->Interp.Centroid; info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; info->num_inputs++; if (procType == TGSI_PROCESSOR_FRAGMENT && fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) info->reads_position = TRUE; } else if (file == TGSI_FILE_SYSTEM_VALUE) { unsigned index = fulldecl->Range.First; unsigned semName = fulldecl->Semantic.Name; info->system_value_semantic_name[index] = semName; info->num_system_values = MAX2(info->num_system_values, index + 1); /* info->system_value_semantic_name[info->num_system_values++] = fulldecl->Semantic.Name; */ if (fulldecl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { info->uses_instanceid = TRUE; } else if (fulldecl->Semantic.Name == TGSI_SEMANTIC_VERTEXID) { info->uses_vertexid = TRUE; } else if (fulldecl->Semantic.Name == TGSI_SEMANTIC_PRIMID) { info->uses_primid = TRUE; } } else if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name; info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index; info->num_outputs++; if ((procType == TGSI_PROCESSOR_VERTEX || procType == TGSI_PROCESSOR_GEOMETRY) && fulldecl->Semantic.Name == TGSI_SEMANTIC_CLIPDIST) { info->num_written_clipdistance += util_bitcount(fulldecl->Declaration.UsageMask); } if ((procType == TGSI_PROCESSOR_VERTEX || procType == TGSI_PROCESSOR_GEOMETRY) && fulldecl->Semantic.Name == TGSI_SEMANTIC_CULLDIST) { info->num_written_culldistance += util_bitcount(fulldecl->Declaration.UsageMask); } /* extra info for special outputs */ if (procType == TGSI_PROCESSOR_FRAGMENT && fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) info->writes_z = TRUE; if (procType == TGSI_PROCESSOR_FRAGMENT && fulldecl->Semantic.Name == TGSI_SEMANTIC_STENCIL) info->writes_stencil = TRUE; if (procType == TGSI_PROCESSOR_VERTEX && fulldecl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG) { info->writes_edgeflag = TRUE; } if (procType == TGSI_PROCESSOR_GEOMETRY && fulldecl->Semantic.Name == TGSI_SEMANTIC_VIEWPORT_INDEX) { info->writes_viewport_index = TRUE; } if (procType == TGSI_PROCESSOR_GEOMETRY && fulldecl->Semantic.Name == TGSI_SEMANTIC_LAYER) { info->writes_layer = TRUE; } } } } break; case TGSI_TOKEN_TYPE_IMMEDIATE: { uint reg = info->immediate_count++; uint file = TGSI_FILE_IMMEDIATE; info->file_mask[file] |= (1 << reg); info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); } break; case TGSI_TOKEN_TYPE_PROPERTY: { const struct tgsi_full_property *fullprop = &parse.FullToken.FullProperty; info->properties[info->num_properties].name = fullprop->Property.PropertyName; memcpy(info->properties[info->num_properties].data, fullprop->u, 8 * sizeof(unsigned));; ++info->num_properties; } break; default: assert( 0 ); } } info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || info->opcode_count[TGSI_OPCODE_KILP]); /* extract simple properties */ for (i = 0; i < info->num_properties; ++i) { switch (info->properties[i].name) { case TGSI_PROPERTY_FS_COORD_ORIGIN: info->origin_lower_left = info->properties[i].data[0]; break; case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: info->pixel_center_integer = info->properties[i].data[0]; break; case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: info->color0_writes_all_cbufs = info->properties[i].data[0]; break; case TGSI_PROPERTY_GS_INPUT_PRIM: /* The dimensions of the IN decleration in geometry shader have * to be deduced from the type of the input primitive. */ if (procType == TGSI_PROCESSOR_GEOMETRY) { unsigned input_primitive = info->properties[i].data[0]; int num_verts = u_vertices_per_prim(input_primitive); unsigned j; info->file_count[TGSI_FILE_INPUT] = num_verts; info->file_max[TGSI_FILE_INPUT] = MAX2(info->file_max[TGSI_FILE_INPUT], num_verts - 1); for (j = 0; j < num_verts; ++j) { info->file_mask[TGSI_FILE_INPUT] |= (1 << j); } } break; default: ; } } tgsi_parse_free (&parse); }
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->cs; int i; struct r600_resource *onebo = NULL; struct r600_pipe_state *cb_state; struct evergreen_compute_resource *resources = ctx->cs_shader_state.shader->resources; /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_atom(ctx, &ctx->start_compute_cs_cmd.atom); ctx->flags |= R600_CONTEXT_CB_FLUSH; r600_flush_emit(ctx); /* Emit cb_state */ cb_state = ctx->states[R600_PIPE_STATE_FRAMEBUFFER]; r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE); /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK, ctx->compute_cb_target_mask); /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); for (i = 0; i < get_compute_resource_num(); i++) { if (resources[i].enabled) { int j; COMPUTE_DBG("resnum: %i, cdw: %i\n", i, cs->cdw); for (j = 0; j < resources[i].cs_end; j++) { if (resources[i].do_reloc[j]) { assert(resources[i].bo); evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); } cs->buf[cs->cdw++] = resources[i].cs[j]; } if (resources[i].bo) { onebo = resources[i].bo; evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); ///special case for textures if (resources[i].do_reloc [resources[i].cs_end] == 2) { evergreen_emit_ctx_reloc(ctx, resources[i].bo, resources[i].usage); } } } } /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ ctx->flags |= R600_CONTEXT_CB_FLUSH; r600_flush_emit(ctx); #if 0 COMPUTE_DBG("cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { COMPUTE_DBG("%4i : 0x%08X\n", i, ctx->cs->buf[i]); } #endif ctx->ws->cs_flush(ctx->cs, RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE); ctx->pm4_dirty_cdwords = 0; ctx->flags = 0; COMPUTE_DBG("shader started\n"); ctx->ws->buffer_wait(onebo->buf, 0); COMPUTE_DBG("...\n"); ctx->streamout_start = TRUE; ctx->streamout_append_bitmask = ~0; }
static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; unsigned flush_flags = 0; int i; /* make sure that the gfx ring is only one active */ if (ctx->rings.dma.cs) { ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC); } /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); /* Emit colorbuffers. */ for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx, (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE); r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); r600_write_value(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ r600_write_value(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ r600_write_value(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ r600_write_value(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ r600_write_value(cs, cb->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ r600_write_value(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ r600_write_value(cs, cb->cb_color_dim); /* R_028C78_CB_COLOR0_DIM */ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C60_CB_COLOR0_BASE */ r600_write_value(cs, reloc); if (!ctx->keep_tiling_flags) { r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ r600_write_value(cs, reloc); } r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C74_CB_COLOR0_ATTRIB */ r600_write_value(cs, reloc); } /* Set CB_TARGET_MASK XXX: Use cb_misc_state */ r600_write_compute_context_reg(cs, R_028238_CB_TARGET_MASK, ctx->compute_cb_target_mask); /* Emit vertex buffer state */ ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask); r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom); /* Emit constant buffer state */ r600_emit_atom(ctx, &ctx->constbuf_state[PIPE_SHADER_COMPUTE].atom); /* Emit compute shader state */ r600_emit_atom(ctx, &ctx->cs_shader_state.atom); /* Emit dispatch state and dispatch packet */ evergreen_emit_direct_dispatch(ctx, block_layout, grid_layout); /* XXX evergreen_flush_emit() hardcodes the CP_COHER_SIZE to 0xffffffff */ ctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; r600_flush_emit(ctx); #if 0 COMPUTE_DBG(ctx->screen, "cdw: %i\n", cs->cdw); for (i = 0; i < cs->cdw; i++) { COMPUTE_DBG(ctx->screen, "%4i : 0x%08X\n", i, ctx->cs->buf[i]); } #endif flush_flags = RADEON_FLUSH_ASYNC | RADEON_FLUSH_COMPUTE; if (ctx->keep_tiling_flags) { flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; } ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags, ctx->screen->cs_count++); ctx->flags = 0; COMPUTE_DBG(ctx->screen, "shader started\n"); }
/** * Scan the given TGSI shader to collect information such as number of * registers used, special instructions used, etc. * \return info the result of the scan */ void tgsi_scan_shader(const struct tgsi_token *tokens, struct tgsi_shader_info *info) { uint procType, i; struct tgsi_parse_context parse; memset(info, 0, sizeof(*info)); for (i = 0; i < TGSI_FILE_COUNT; i++) info->file_max[i] = -1; for (i = 0; i < Elements(info->const_file_max); i++) info->const_file_max[i] = -1; /** ** Setup to begin parsing input shader **/ if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); return; } procType = parse.FullHeader.Processor.Processor; assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX || procType == TGSI_PROCESSOR_GEOMETRY || procType == TGSI_PROCESSOR_COMPUTE); info->processor = procType; /** ** Loop over incoming program tokens/instructions */ while( !tgsi_parse_end_of_tokens( &parse ) ) { info->num_tokens++; tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_INSTRUCTION: { const struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; uint i; assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); info->opcode_count[fullinst->Instruction.Opcode]++; for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *src = &fullinst->Src[i]; int ind = src->Register.Index; /* Mark which inputs are effectively used */ if (src->Register.File == TGSI_FILE_INPUT) { unsigned usage_mask; usage_mask = tgsi_util_get_inst_usage_mask(fullinst, i); if (src->Register.Indirect) { for (ind = 0; ind < info->num_inputs; ++ind) { info->input_usage_mask[ind] |= usage_mask; } } else { assert(ind >= 0); assert(ind < PIPE_MAX_SHADER_INPUTS); info->input_usage_mask[ind] |= usage_mask; } if (procType == TGSI_PROCESSOR_FRAGMENT && info->reads_position && src->Register.Index == 0 && (src->Register.SwizzleX == TGSI_SWIZZLE_Z || src->Register.SwizzleY == TGSI_SWIZZLE_Z || src->Register.SwizzleZ == TGSI_SWIZZLE_Z || src->Register.SwizzleW == TGSI_SWIZZLE_Z)) { info->reads_z = TRUE; } } /* check for indirect register reads */ if (src->Register.Indirect) { info->indirect_files |= (1 << src->Register.File); } /* MSAA samplers */ if (src->Register.File == TGSI_FILE_SAMPLER) { assert(fullinst->Instruction.Texture); assert(src->Register.Index < Elements(info->is_msaa_sampler)); if (fullinst->Instruction.Texture && (fullinst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || fullinst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA)) { info->is_msaa_sampler[src->Register.Index] = TRUE; } } } /* check for indirect register writes */ for (i = 0; i < fullinst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *dst = &fullinst->Dst[i]; if (dst->Register.Indirect) { info->indirect_files |= (1 << dst->Register.File); } } info->num_instructions++; } break; case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *fulldecl = &parse.FullToken.FullDeclaration; const uint file = fulldecl->Declaration.File; uint reg; for (reg = fulldecl->Range.First; reg <= fulldecl->Range.Last; reg++) { unsigned semName = fulldecl->Semantic.Name; unsigned semIndex = fulldecl->Semantic.Index; /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << reg); info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); if (file == TGSI_FILE_CONSTANT) { int buffer = 0; if (fulldecl->Declaration.Dimension) buffer = fulldecl->Dim.Index2D; info->const_file_max[buffer] = MAX2(info->const_file_max[buffer], (int)reg); } else if (file == TGSI_FILE_INPUT) { info->input_semantic_name[reg] = (ubyte) semName; info->input_semantic_index[reg] = (ubyte) semIndex; info->input_interpolate[reg] = (ubyte)fulldecl->Interp.Interpolate; info->input_interpolate_loc[reg] = (ubyte)fulldecl->Interp.Location; info->input_cylindrical_wrap[reg] = (ubyte)fulldecl->Interp.CylindricalWrap; info->num_inputs++; if (semName == TGSI_SEMANTIC_PRIMID) info->uses_primid = TRUE; else if (procType == TGSI_PROCESSOR_FRAGMENT) { if (semName == TGSI_SEMANTIC_POSITION) info->reads_position = TRUE; else if (semName == TGSI_SEMANTIC_FACE) info->uses_frontface = TRUE; } } else if (file == TGSI_FILE_SYSTEM_VALUE) { unsigned index = fulldecl->Range.First; info->system_value_semantic_name[index] = semName; info->num_system_values = MAX2(info->num_system_values, index + 1); if (semName == TGSI_SEMANTIC_INSTANCEID) { info->uses_instanceid = TRUE; } else if (semName == TGSI_SEMANTIC_VERTEXID) { info->uses_vertexid = TRUE; } else if (semName == TGSI_SEMANTIC_PRIMID) { info->uses_primid = TRUE; } } else if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[reg] = (ubyte) semName; info->output_semantic_index[reg] = (ubyte) semIndex; info->num_outputs++; if (procType == TGSI_PROCESSOR_VERTEX || procType == TGSI_PROCESSOR_GEOMETRY) { if (semName == TGSI_SEMANTIC_CLIPDIST) { info->num_written_clipdistance += util_bitcount(fulldecl->Declaration.UsageMask); } else if (semName == TGSI_SEMANTIC_CULLDIST) { info->num_written_culldistance += util_bitcount(fulldecl->Declaration.UsageMask); } } if (procType == TGSI_PROCESSOR_FRAGMENT) { if (semName == TGSI_SEMANTIC_POSITION) { info->writes_z = TRUE; } else if (semName == TGSI_SEMANTIC_STENCIL) { info->writes_stencil = TRUE; } } if (procType == TGSI_PROCESSOR_VERTEX) { if (semName == TGSI_SEMANTIC_EDGEFLAG) { info->writes_edgeflag = TRUE; } } if (procType == TGSI_PROCESSOR_GEOMETRY) { if (semName == TGSI_SEMANTIC_VIEWPORT_INDEX) { info->writes_viewport_index = TRUE; } else if (semName == TGSI_SEMANTIC_LAYER) { info->writes_layer = TRUE; } } } } } break; case TGSI_TOKEN_TYPE_IMMEDIATE: { uint reg = info->immediate_count++; uint file = TGSI_FILE_IMMEDIATE; info->file_mask[file] |= (1 << reg); info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)reg); } break; case TGSI_TOKEN_TYPE_PROPERTY: { const struct tgsi_full_property *fullprop = &parse.FullToken.FullProperty; unsigned name = fullprop->Property.PropertyName; assert(name < Elements(info->properties)); info->properties[name] = fullprop->u[0].Data; } break; default: assert( 0 ); } } info->uses_kill = (info->opcode_count[TGSI_OPCODE_KILL_IF] || info->opcode_count[TGSI_OPCODE_KILL]); /* The dimensions of the IN decleration in geometry shader have * to be deduced from the type of the input primitive. */ if (procType == TGSI_PROCESSOR_GEOMETRY) { unsigned input_primitive = info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]; int num_verts = u_vertices_per_prim(input_primitive); int j; info->file_count[TGSI_FILE_INPUT] = num_verts; info->file_max[TGSI_FILE_INPUT] = MAX2(info->file_max[TGSI_FILE_INPUT], num_verts - 1); for (j = 0; j < num_verts; ++j) { info->file_mask[TGSI_FILE_INPUT] |= (1 << j); } } tgsi_parse_free (&parse); }
void si_init_config(struct radv_physical_device *physical_device, struct radv_cmd_buffer *cmd_buffer) { unsigned num_rb = MIN2(physical_device->rad_info.num_render_backends, 16); unsigned rb_mask = physical_device->rad_info.enabled_rb_mask; unsigned raster_config, raster_config_1; int i; struct radeon_winsys_cs *cs = cmd_buffer->cs; radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); radeon_emit(cs, CONTEXT_CONTROL_LOAD_ENABLE(1)); radeon_emit(cs, CONTEXT_CONTROL_SHADOW_ENABLE(1)); radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); /* FIXME calculate these values somehow ??? */ radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40); radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2); radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (physical_device->rad_info.chip_class < CIK) radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); radeon_set_context_reg(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210); radeon_set_context_reg(cs, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98); radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0); for (i = 0; i < 16; i++) { radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0); radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0)); } switch (physical_device->rad_info.family) { case CHIP_TAHITI: case CHIP_PITCAIRN: raster_config = 0x2a00126a; raster_config_1 = 0x00000000; break; case CHIP_VERDE: raster_config = 0x0000124a; raster_config_1 = 0x00000000; break; case CHIP_OLAND: raster_config = 0x00000082; raster_config_1 = 0x00000000; break; case CHIP_HAINAN: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; case CHIP_BONAIRE: raster_config = 0x16000012; raster_config_1 = 0x00000000; break; case CHIP_HAWAII: raster_config = 0x3a00161a; raster_config_1 = 0x0000002e; break; case CHIP_FIJI: if (physical_device->rad_info.cik_macrotile_mode_array[0] == 0x000000e8) { /* old kernels with old tiling config */ raster_config = 0x16000012; raster_config_1 = 0x0000002a; } else { raster_config = 0x3a00161a; raster_config_1 = 0x0000002e; } break; case CHIP_POLARIS10: raster_config = 0x16000012; raster_config_1 = 0x0000002a; break; case CHIP_POLARIS11: raster_config = 0x16000012; raster_config_1 = 0x00000000; break; case CHIP_TONGA: raster_config = 0x16000012; raster_config_1 = 0x0000002a; break; case CHIP_ICELAND: if (num_rb == 1) raster_config = 0x00000000; else raster_config = 0x00000002; raster_config_1 = 0x00000000; break; case CHIP_CARRIZO: raster_config = 0x00000002; raster_config_1 = 0x00000000; break; case CHIP_KAVERI: /* KV should be 0x00000002, but that causes problems with radeon */ raster_config = 0x00000000; /* 0x00000002 */ raster_config_1 = 0x00000000; break; case CHIP_KABINI: case CHIP_MULLINS: case CHIP_STONEY: raster_config = 0x00000000; raster_config_1 = 0x00000000; break; default: fprintf(stderr, "radeonsi: Unknown GPU, using 0 for raster_config\n"); raster_config = 0x00000000; raster_config_1 = 0x00000000; break; } /* Always use the default config when all backends are enabled * (or when we failed to determine the enabled backends). */ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config); if (physical_device->rad_info.chip_class >= CIK) radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1); } radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF); radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */ radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0); radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0); radeon_set_context_reg(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0)); radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0); radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE)); radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0); radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0); radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); if (physical_device->rad_info.chip_class >= CIK) { radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0); radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff)); if (physical_device->rad_info.num_good_compute_units / (physical_device->rad_info.max_se * physical_device->rad_info.max_sh_per_se) <= 4) { /* Too few available compute units per SH. Disallowing * VS to run on CU0 could hurt us more than late VS * allocation would help. * * LATE_ALLOC_VS = 2 is the highest safe number. */ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2)); } else { /* Set LATE_ALLOC_VS == 31. It should be less than * the number of scratch waves. Limitations: * - VS can't execute on CU0. * - If HS writes outputs to LDS, LS can't execute on CU0. */ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe)); radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe)); radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31)); } radeon_set_sh_reg(cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); } if (physical_device->rad_info.chip_class >= VI) { radeon_set_context_reg(cs, R_028424_CB_DCC_CONTROL, S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) | S_028424_OVERWRITE_COMBINER_WATERMARK(4)); radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) | S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16)); } else { radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); } if (physical_device->rad_info.family == CHIP_STONEY) radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0); si_init_compute(physical_device, cs); }
/* * Create a new X/Mesa visual. * Input: display - X11 display * visinfo - an XVisualInfo pointer * rgb_flag - GL_TRUE = RGB mode, * GL_FALSE = color index mode * alpha_flag - alpha buffer requested? * db_flag - GL_TRUE = double-buffered, * GL_FALSE = single buffered * stereo_flag - stereo visual? * ximage_flag - GL_TRUE = use an XImage for back buffer, * GL_FALSE = use an off-screen pixmap for back buffer * depth_size - requested bits/depth values, or zero * stencil_size - requested bits/stencil values, or zero * accum_red_size - requested bits/red accum values, or zero * accum_green_size - requested bits/green accum values, or zero * accum_blue_size - requested bits/blue accum values, or zero * accum_alpha_size - requested bits/alpha accum values, or zero * num_samples - number of samples/pixel if multisampling, or zero * level - visual level, usually 0 * visualCaveat - ala the GLX extension, usually GLX_NONE * Return; a new XMesaVisual or 0 if error. */ PUBLIC XMesaVisual XMesaCreateVisual( Display *display, XVisualInfo * visinfo, GLboolean rgb_flag, GLboolean alpha_flag, GLboolean db_flag, GLboolean stereo_flag, GLboolean ximage_flag, GLint depth_size, GLint stencil_size, GLint accum_red_size, GLint accum_green_size, GLint accum_blue_size, GLint accum_alpha_size, GLint num_samples, GLint level, GLint visualCaveat ) { XMesaDisplay xmdpy = xmesa_init_display(display); XMesaVisual v; GLint red_bits, green_bits, blue_bits, alpha_bits; if (!xmdpy) return NULL; /* For debugging only */ if (getenv("MESA_XSYNC")) { /* This makes debugging X easier. * In your debugger, set a breakpoint on _XError to stop when an * X protocol error is generated. */ XSynchronize( display, 1 ); } v = (XMesaVisual) CALLOC_STRUCT(xmesa_visual); if (!v) { return NULL; } v->display = display; /* Save a copy of the XVisualInfo struct because the user may Xfree() * the struct but we may need some of the information contained in it * at a later time. */ v->visinfo = malloc(sizeof(*visinfo)); if (!v->visinfo) { free(v); return NULL; } memcpy(v->visinfo, visinfo, sizeof(*visinfo)); v->ximage_flag = ximage_flag; v->mesa_visual.redMask = visinfo->red_mask; v->mesa_visual.greenMask = visinfo->green_mask; v->mesa_visual.blueMask = visinfo->blue_mask; v->visualID = visinfo->visualid; v->screen = visinfo->screen; #if !(defined(__cplusplus) || defined(c_plusplus)) v->visualType = xmesa_convert_from_x_visual_type(visinfo->class); #else v->visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); #endif v->mesa_visual.visualRating = visualCaveat; if (alpha_flag) v->mesa_visual.alphaBits = 8; (void) initialize_visual_and_buffer( v, NULL, rgb_flag, 0, 0 ); { const int xclass = v->visualType; if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) { red_bits = util_bitcount(GET_REDMASK(v)); green_bits = util_bitcount(GET_GREENMASK(v)); blue_bits = util_bitcount(GET_BLUEMASK(v)); } else { /* this is an approximation */ int depth; depth = v->visinfo->depth; red_bits = depth / 3; depth -= red_bits; green_bits = depth / 2; depth -= green_bits; blue_bits = depth; alpha_bits = 0; assert( red_bits + green_bits + blue_bits == v->visinfo->depth ); } alpha_bits = v->mesa_visual.alphaBits; } /* initialize visual */ { struct gl_config *vis = &v->mesa_visual; vis->rgbMode = GL_TRUE; vis->doubleBufferMode = db_flag; vis->stereoMode = stereo_flag; vis->redBits = red_bits; vis->greenBits = green_bits; vis->blueBits = blue_bits; vis->alphaBits = alpha_bits; vis->rgbBits = red_bits + green_bits + blue_bits; vis->indexBits = 0; vis->depthBits = depth_size; vis->stencilBits = stencil_size; vis->accumRedBits = accum_red_size; vis->accumGreenBits = accum_green_size; vis->accumBlueBits = accum_blue_size; vis->accumAlphaBits = accum_alpha_size; vis->haveAccumBuffer = accum_red_size > 0; vis->haveDepthBuffer = depth_size > 0; vis->haveStencilBuffer = stencil_size > 0; vis->numAuxBuffers = 0; vis->level = 0; vis->sampleBuffers = num_samples > 1; vis->samples = num_samples; } v->stvis.buffer_mask = ST_ATTACHMENT_FRONT_LEFT_MASK; if (db_flag) v->stvis.buffer_mask |= ST_ATTACHMENT_BACK_LEFT_MASK; if (stereo_flag) { v->stvis.buffer_mask |= ST_ATTACHMENT_FRONT_RIGHT_MASK; if (db_flag) v->stvis.buffer_mask |= ST_ATTACHMENT_BACK_RIGHT_MASK; } v->stvis.color_format = choose_pixel_format(v); /* Check format support at requested num_samples (for multisample) */ if (!xmdpy->screen->is_format_supported(xmdpy->screen, v->stvis.color_format, PIPE_TEXTURE_2D, num_samples, num_samples, PIPE_BIND_RENDER_TARGET)) v->stvis.color_format = PIPE_FORMAT_NONE; if (v->stvis.color_format == PIPE_FORMAT_NONE) { free(v->visinfo); free(v); return NULL; } v->stvis.depth_stencil_format = choose_depth_stencil_format(xmdpy, depth_size, stencil_size, num_samples); v->stvis.accum_format = (accum_red_size + accum_green_size + accum_blue_size + accum_alpha_size) ? PIPE_FORMAT_R16G16B16A16_SNORM : PIPE_FORMAT_NONE; v->stvis.samples = num_samples; v->stvis.render_buffer = ST_ATTACHMENT_INVALID; /* XXX minor hack */ v->mesa_visual.level = level; return v; }