void brw_set_default_compression_control(struct brw_compile *p, enum brw_compression compression_control) { struct brw_context *brw = p->brw; p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); if (brw->gen >= 6) { /* Since we don't use the SIMD32 support in gen6, we translate * the pre-gen6 compression control here. */ switch (compression_control) { case BRW_COMPRESSION_NONE: /* This is the "use the first set of bits of dmask/vmask/arf * according to execsize" option. */ brw_inst_set_qtr_control(brw, p->current, GEN6_COMPRESSION_1Q); break; case BRW_COMPRESSION_2NDHALF: /* For SIMD8, this is "use the second set of 8 bits." */ brw_inst_set_qtr_control(brw, p->current, GEN6_COMPRESSION_2Q); break; case BRW_COMPRESSION_COMPRESSED: /* For SIMD16 instruction compression, use the first set of 16 bits * since we don't do SIMD32 dispatch. */ brw_inst_set_qtr_control(brw, p->current, GEN6_COMPRESSION_1H); break; default: unreachable("not reached"); } } else { brw_inst_set_qtr_control(brw, p->current, compression_control); } }
static struct cache_entry * tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache, const unsigned char *sha1) { const uint32_t mask = cache->table_size - 1; const uint32_t start = (*(uint32_t *) sha1); if (cache->table_size == 0) return NULL; for (uint32_t i = 0; i < cache->table_size; i++) { const uint32_t index = (start + i) & mask; struct cache_entry *entry = cache->hash_table[index]; if (!entry) return NULL; if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { return entry; } } unreachable("hash table should never be full"); }
static inline void atomic_trees_add (tree t) { #if 0 /*assert (TREE_CODE (t) == EMPTY_MARK, "only EMPTY_MARK nodes can be added to atomic_tres");*/ if (atomic_trees_size == 0) { const size_t initial_size = 32; atomic_trees = (tree *) malloc (initial_size * sizeof (tree)); atomic_trees_size = initial_size; } if (atomic_trees_idx == atomic_trees_size) { atomic_trees = (tree *) realloc (atomic_trees, 2 * atomic_trees_size * sizeof (tree)); atomic_trees_size *= 2; } /* Most likely we don't need to search anything. */ {/* For testing purposes only. */ size_t i; for (i = 0; i < atomic_trees_idx; i++) if (atomic_trees[i] == t) unreachable ("double insert of node in atomic_trees"); } atomic_trees[atomic_trees_idx++] = t; //printf ("-- atomix_idx = %i\n", (int)atomic_trees_idx); #endif }
bool brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) { switch (type) { case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: reg->d = -reg->d; return true; case BRW_REGISTER_TYPE_W: case BRW_REGISTER_TYPE_UW: reg->d = -(int16_t)reg->ud; return true; case BRW_REGISTER_TYPE_F: reg->f = -reg->f; return true; case BRW_REGISTER_TYPE_VF: reg->ud ^= 0x80808080; return true; case BRW_REGISTER_TYPE_DF: reg->df = -reg->df; return true; case BRW_REGISTER_TYPE_UB: case BRW_REGISTER_TYPE_B: unreachable("no UB/B immediates"); case BRW_REGISTER_TYPE_UV: case BRW_REGISTER_TYPE_V: assert(!"unimplemented: negate UV/V immediate"); case BRW_REGISTER_TYPE_UQ: case BRW_REGISTER_TYPE_Q: assert(!"unimplemented: negate UQ/Q immediate"); case BRW_REGISTER_TYPE_HF: assert(!"unimplemented: negate HF immediate"); } return false; }
int intel_translate_compare_func(GLenum func) { switch (func) { case GL_NEVER: return BRW_COMPAREFUNCTION_NEVER; case GL_LESS: return BRW_COMPAREFUNCTION_LESS; case GL_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL; case GL_GREATER: return BRW_COMPAREFUNCTION_GREATER; case GL_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL; case GL_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL; case GL_EQUAL: return BRW_COMPAREFUNCTION_EQUAL; case GL_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS; } unreachable("Invalid comparison function."); }
static uint32_t get_qpitch(const struct isl_surf *surf) { switch (surf->dim_layout) { default: unreachable("Bad isl_surf_dim"); case ISL_DIM_LAYOUT_GEN4_2D: case ISL_DIM_LAYOUT_GEN4_3D: if (GEN_GEN >= 9) { return isl_surf_get_array_pitch_el_rows(surf); } else { /* From the Broadwell PRM for RENDER_SURFACE_STATE.QPitch * * "This field must be set to an integer multiple of the Surface * Vertical Alignment. For compressed textures (BC*, FXT1, * ETC*, and EAC* Surface Formats), this field is in units of * rows in the uncompressed surface, and must be set to an * integer multiple of the vertical alignment parameter "j" * defined in the Common Surface Formats section." */ return isl_surf_get_array_pitch_sa_rows(surf); } case ISL_DIM_LAYOUT_GEN9_1D: /* QPitch is usually expressed as rows of surface elements (where * a surface element is an compression block or a single surface * sample). Skylake 1D is an outlier. * * From the Skylake BSpec >> Memory Views >> Common Surface * Formats >> Surface Layout and Tiling >> 1D Surfaces: * * Surface QPitch specifies the distance in pixels between array * slices. */ return isl_surf_get_array_pitch_el(surf); } }
void vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { dst_reg dest; src_reg src; switch (instr->intrinsic) { case nir_intrinsic_load_per_vertex_input: { /* The EmitNoIndirectInput flag guarantees our vertex index will * be constant. We should handle indirects someday. */ nir_const_value *vertex = nir_src_as_const_value(instr->src[0]); nir_const_value *offset = nir_src_as_const_value(instr->src[1]); /* Make up a type...we have no way of knowing... */ const glsl_type *const type = glsl_type::ivec(instr->num_components); src = src_reg(ATTR, BRW_VARYING_SLOT_COUNT * vertex->u32[0] + instr->const_index[0] + offset->u32[0], type); /* gl_PointSize is passed in the .w component of the VUE header */ if (instr->const_index[0] == VARYING_SLOT_PSIZ) src.swizzle = BRW_SWIZZLE_WWWW; dest = get_nir_dest(instr->dest, src.type); dest.writemask = brw_writemask_for_size(instr->num_components); emit(MOV(dest, src)); break; } case nir_intrinsic_load_input: unreachable("nir_lower_io should have produced per_vertex intrinsics"); case nir_intrinsic_emit_vertex_with_counter: { this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); int stream_id = instr->const_index[0]; gs_emit_vertex(stream_id); break; } case nir_intrinsic_end_primitive_with_counter: this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); gs_end_primitive(); break; case nir_intrinsic_set_vertex_count: this->vertex_count = retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); break; case nir_intrinsic_load_primitive_id: assert(gs_prog_data->include_primitive_id); dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D))); break; case nir_intrinsic_load_invocation_id: { src_reg invocation_id = src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]); assert(invocation_id.file != BAD_FILE); dest = get_nir_dest(instr->dest, invocation_id.type); emit(MOV(dest, invocation_id)); break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }
static bool dead_cf_list(struct exec_list *list, bool *list_ends_in_jump) { bool progress = false; *list_ends_in_jump = false; nir_cf_node *prev = NULL; foreach_list_typed(nir_cf_node, cur, node, list) { switch (cur->type) { case nir_cf_node_block: { nir_block *block = nir_cf_node_as_block(cur); if (dead_cf_block(block)) { /* We just deleted the if or loop after this block, so we may have * deleted the block before or after it -- which one is an * implementation detail. Therefore, to recover the place we were * at, we have to use the previous cf_node. */ if (prev) { cur = nir_cf_node_next(prev); } else { cur = exec_node_data(nir_cf_node, exec_list_get_head(list), node); } block = nir_cf_node_as_block(cur); progress = true; } if (ends_in_jump(block)) { *list_ends_in_jump = true; if (!exec_node_is_tail_sentinel(cur->node.next)) { remove_after_cf_node(cur); return true; } } break; } case nir_cf_node_if: { nir_if *if_stmt = nir_cf_node_as_if(cur); bool then_ends_in_jump, else_ends_in_jump; progress |= dead_cf_list(&if_stmt->then_list, &then_ends_in_jump); progress |= dead_cf_list(&if_stmt->else_list, &else_ends_in_jump); if (then_ends_in_jump && else_ends_in_jump) { *list_ends_in_jump = true; nir_block *next = nir_cf_node_as_block(nir_cf_node_next(cur)); if (!exec_list_is_empty(&next->instr_list) || !exec_node_is_tail_sentinel(next->cf_node.node.next)) { remove_after_cf_node(cur); return true; } } break; } case nir_cf_node_loop: { nir_loop *loop = nir_cf_node_as_loop(cur); bool dummy; progress |= dead_cf_list(&loop->body, &dummy); break; } default: unreachable("unknown cf node type"); } prev = cur; } return progress; }
void brw_codegen_ff_gs_prog(struct brw_context *brw, struct brw_ff_gs_prog_key *key) { struct brw_ff_gs_compile c; const GLuint *program; void *mem_ctx; GLuint program_size; memset(&c, 0, sizeof(c)); c.key = *key; c.vue_map = brw->vs.prog_data->base.vue_map; c.nr_regs = (c.vue_map.num_slots + 1)/2; mem_ctx = ralloc_context(NULL); /* Begin the compilation: */ brw_init_codegen(brw->intelScreen->devinfo, &c.func, mem_ctx); c.func.single_program_flow = 1; /* For some reason the thread is spawned with only 4 channels * unmasked. */ brw_set_default_mask_control(&c.func, BRW_MASK_DISABLE); if (brw->gen >= 6) { unsigned num_verts; bool check_edge_flag; /* On Sandybridge, we use the GS for implementing transform feedback * (called "Stream Out" in the PRM). */ switch (key->primitive) { case _3DPRIM_POINTLIST: num_verts = 1; check_edge_flag = false; break; case _3DPRIM_LINELIST: case _3DPRIM_LINESTRIP: case _3DPRIM_LINELOOP: num_verts = 2; check_edge_flag = false; break; case _3DPRIM_TRILIST: case _3DPRIM_TRIFAN: case _3DPRIM_TRISTRIP: case _3DPRIM_RECTLIST: num_verts = 3; check_edge_flag = false; break; case _3DPRIM_QUADLIST: case _3DPRIM_QUADSTRIP: case _3DPRIM_POLYGON: num_verts = 3; check_edge_flag = true; break; default: unreachable("Unexpected primitive type in Gen6 SOL program."); } gen6_sol_program(&c, key, num_verts, check_edge_flag); } else { /* On Gen4-5, we use the GS to decompose certain types of primitives. * Note that primitives which don't require a GS program have already * been weeded out by now. */ switch (key->primitive) { case _3DPRIM_QUADLIST: brw_ff_gs_quads( &c, key ); break; case _3DPRIM_QUADSTRIP: brw_ff_gs_quad_strip( &c, key ); break; case _3DPRIM_LINELOOP: brw_ff_gs_lines( &c ); break; default: ralloc_free(mem_ctx); return; } } brw_compact_instructions(&c.func, 0, 0, NULL); /* get the program */ program = brw_get_program(&c.func, &program_size); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { fprintf(stderr, "gs:\n"); brw_disassemble(brw->intelScreen->devinfo, c.func.store, 0, program_size, stderr); fprintf(stderr, "\n"); } brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG, &c.key, sizeof(c.key), program, program_size, &c.prog_data, sizeof(c.prog_data), &brw->ff_gs.prog_offset, &brw->ff_gs.prog_data); ralloc_free(mem_ctx); }
int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle) { unreachable("Unused"); }
int anv_gem_destroy_context(struct anv_device *device, int context) { unreachable("Unused"); }
bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling) { unreachable("Unused"); }
static void lower_fragcoord(lower_wpos_ytransform_state *state, nir_intrinsic_instr *intr) { const nir_lower_wpos_ytransform_options *options = state->options; nir_variable *fragcoord = intr->variables[0]->var; float adjX = 0.0f; float adjY[2] = { 0.0f, 0.0f }; bool invert = false; /* Based on logic in emit_wpos(): * * Query the pixel center conventions supported by the pipe driver and set * adjX, adjY to help out if it cannot handle the requested one internally. * * The bias of the y-coordinate depends on whether y-inversion takes place * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are * drawing to an FBO (causes additional inversion), and whether the pipe * driver origin and the requested origin differ (the latter condition is * stored in the 'invert' variable). * * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): * * center shift only: * i -> h: +0.5 * h -> i: -0.5 * * inversion only: * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 * * inversion and center shift: * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 */ if (fragcoord->data.origin_upper_left) { /* Fragment shader wants origin in upper-left */ if (options->fs_coord_origin_upper_left) { /* the driver supports upper-left origin */ } else if (options->fs_coord_origin_lower_left) { /* the driver supports lower-left origin, need to invert Y */ invert = true; } else { unreachable("invalid options"); } } else { /* Fragment shader wants origin in lower-left */ if (options->fs_coord_origin_lower_left) { /* the driver supports lower-left origin */ } else if (options->fs_coord_origin_upper_left) { /* the driver supports upper-left origin, need to invert Y */ invert = true; } else { unreachable("invalid options"); } } if (fragcoord->data.pixel_center_integer) { /* Fragment shader wants pixel center integer */ if (options->fs_coord_pixel_center_integer) { /* the driver supports pixel center integer */ adjY[1] = 1.0f; } else if (options->fs_coord_pixel_center_half_integer) { /* the driver supports pixel center half integer, need to bias X,Y */ adjX = -0.5f; adjY[0] = -0.5f; adjY[1] = 0.5f; } else { unreachable("invalid options"); } } else { /* Fragment shader wants pixel center half integer */ if (options->fs_coord_pixel_center_half_integer) { /* the driver supports pixel center half integer */ } else if (options->fs_coord_pixel_center_integer) { /* the driver supports pixel center integer, need to bias X,Y */ adjX = adjY[0] = adjY[1] = 0.5f; } else { unreachable("invalid options"); } } emit_wpos_adjustment(state, intr, invert, adjX, adjY); }
static void * vc5_vertex_state_create(struct pipe_context *pctx, unsigned num_elements, const struct pipe_vertex_element *elements) { struct vc5_context *vc5 = vc5_context(pctx); struct vc5_vertex_stateobj *so = CALLOC_STRUCT(vc5_vertex_stateobj); if (!so) return NULL; memcpy(so->pipe, elements, sizeof(*elements) * num_elements); so->num_elements = num_elements; for (int i = 0; i < so->num_elements; i++) { const struct pipe_vertex_element *elem = &elements[i]; const struct util_format_description *desc = util_format_description(elem->src_format); uint32_t r_size = desc->channel[0].size; struct V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD attr_unpacked = { /* vec_size == 0 means 4 */ .vec_size = desc->nr_channels & 3, .signed_int_type = (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED), .normalized_int_type = desc->channel[0].normalized, .read_as_int_uint = desc->channel[0].pure_integer, .instance_divisor = elem->instance_divisor, }; switch (desc->channel[0].type) { case UTIL_FORMAT_TYPE_FLOAT: if (r_size == 32) { attr_unpacked.type = ATTRIBUTE_FLOAT; } else { assert(r_size == 16); attr_unpacked.type = ATTRIBUTE_HALF_FLOAT; } break; case UTIL_FORMAT_TYPE_SIGNED: case UTIL_FORMAT_TYPE_UNSIGNED: switch (r_size) { case 32: attr_unpacked.type = ATTRIBUTE_INT; break; case 16: attr_unpacked.type = ATTRIBUTE_SHORT; break; case 10: attr_unpacked.type = ATTRIBUTE_INT2_10_10_10; break; case 8: attr_unpacked.type = ATTRIBUTE_BYTE; break; default: fprintf(stderr, "format %s unsupported\n", desc->name); attr_unpacked.type = ATTRIBUTE_BYTE; abort(); } break; default: fprintf(stderr, "format %s unsupported\n", desc->name); abort(); } const uint32_t size = cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); V3D33_GL_SHADER_STATE_ATTRIBUTE_RECORD_pack(NULL, (uint8_t *)&so->attrs[i * size], &attr_unpacked); } /* Set up the default attribute values in case any of the vertex * elements use them. */ so->default_attribute_values = vc5_bo_alloc(vc5->screen, VC5_MAX_ATTRIBUTES * 4 * sizeof(float), "default attributes"); uint32_t *attrs = vc5_bo_map(so->default_attribute_values); for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) { attrs[i * 4 + 0] = 0; attrs[i * 4 + 1] = 0; attrs[i * 4 + 2] = 0; if (i < so->num_elements && util_format_is_pure_integer(so->pipe[i].src_format)) { attrs[i * 4 + 3] = 1; } else { attrs[i * 4 + 3] = fui(1.0); } } return so; } static void vc5_vertex_state_bind(struct pipe_context *pctx, void *hwcso) { struct vc5_context *vc5 = vc5_context(pctx); vc5->vtx = hwcso; vc5->dirty |= VC5_DIRTY_VTXSTATE; } static void vc5_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index, const struct pipe_constant_buffer *cb) { struct vc5_context *vc5 = vc5_context(pctx); struct vc5_constbuf_stateobj *so = &vc5->constbuf[shader]; util_copy_constant_buffer(&so->cb[index], cb); /* Note that the state tracker can unbind constant buffers by * passing NULL here. */ if (unlikely(!cb)) { so->enabled_mask &= ~(1 << index); so->dirty_mask &= ~(1 << index); return; } so->enabled_mask |= 1 << index; so->dirty_mask |= 1 << index; vc5->dirty |= VC5_DIRTY_CONSTBUF; } static void vc5_set_framebuffer_state(struct pipe_context *pctx, const struct pipe_framebuffer_state *framebuffer) { struct vc5_context *vc5 = vc5_context(pctx); struct pipe_framebuffer_state *cso = &vc5->framebuffer; unsigned i; vc5->job = NULL; for (i = 0; i < framebuffer->nr_cbufs; i++) pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]); for (; i < vc5->framebuffer.nr_cbufs; i++) pipe_surface_reference(&cso->cbufs[i], NULL); cso->nr_cbufs = framebuffer->nr_cbufs; pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf); cso->width = framebuffer->width; cso->height = framebuffer->height; vc5->dirty |= VC5_DIRTY_FRAMEBUFFER; } static struct vc5_texture_stateobj * vc5_get_stage_tex(struct vc5_context *vc5, enum pipe_shader_type shader) { switch (shader) { case PIPE_SHADER_FRAGMENT: vc5->dirty |= VC5_DIRTY_FRAGTEX; return &vc5->fragtex; break; case PIPE_SHADER_VERTEX: vc5->dirty |= VC5_DIRTY_VERTTEX; return &vc5->verttex; break; default: fprintf(stderr, "Unknown shader target %d\n", shader); abort(); } } static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest) { switch (pipe_wrap) { case PIPE_TEX_WRAP_REPEAT: return 0; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return 1; case PIPE_TEX_WRAP_MIRROR_REPEAT: return 2; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return 3; case PIPE_TEX_WRAP_CLAMP: return (using_nearest ? 1 : 3); default: unreachable("Unknown wrap mode"); } } static void * vc5_create_sampler_state(struct pipe_context *pctx, const struct pipe_sampler_state *cso) { struct vc5_sampler_state *so = CALLOC_STRUCT(vc5_sampler_state); if (!so) return NULL; memcpy(so, cso, sizeof(*cso)); bool either_nearest = (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = { .s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest), .t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest), .r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest), }; V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL, (uint8_t *)&so->p0, &p0_unpacked); struct V3D33_TEXTURE_SHADER_STATE state_unpacked = { cl_packet_header(TEXTURE_SHADER_STATE), .min_level_of_detail = MAX2(cso->min_lod, 0.0), .depth_compare_function = cso->compare_func, .fixed_bias = cso->lod_bias, }; STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) == cl_packet_length(TEXTURE_SHADER_STATE)); cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state, &state_unpacked); return so; } static void vc5_sampler_states_bind(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned nr, void **hwcso) { struct vc5_context *vc5 = vc5_context(pctx); struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); assert(start == 0); unsigned i; unsigned new_nr = 0; for (i = 0; i < nr; i++) { if (hwcso[i]) new_nr = i + 1; stage_tex->samplers[i] = hwcso[i]; } for (; i < stage_tex->num_samplers; i++) { stage_tex->samplers[i] = NULL; } stage_tex->num_samplers = new_nr; } static uint32_t translate_swizzle(unsigned char pipe_swizzle) { switch (pipe_swizzle) { case PIPE_SWIZZLE_0: return 0; case PIPE_SWIZZLE_1: return 1; case PIPE_SWIZZLE_X: case PIPE_SWIZZLE_Y: case PIPE_SWIZZLE_Z: case PIPE_SWIZZLE_W: return 2 + pipe_swizzle; default: unreachable("unknown swizzle"); } } static struct pipe_sampler_view * vc5_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc, const struct pipe_sampler_view *cso) { struct vc5_sampler_view *so = CALLOC_STRUCT(vc5_sampler_view); struct vc5_resource *rsc = vc5_resource(prsc); if (!so) return NULL; so->base = *cso; pipe_reference(NULL, &prsc->reference); struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = { }; unpacked.return_word_0_of_texture_data = true; if (vc5_get_tex_return_size(cso->format) == 16) { unpacked.return_word_1_of_texture_data = true; } else { int chans = vc5_get_tex_return_channels(cso->format); if (chans > 1) unpacked.return_word_1_of_texture_data = true; if (chans > 2) unpacked.return_word_2_of_texture_data = true; if (chans > 3) unpacked.return_word_3_of_texture_data = true; } V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL, (uint8_t *)&so->p1, &unpacked); /* Compute the sampler view's swizzle up front. This will be plugged * into either the sampler (for 16-bit returns) or the shader's * texture key (for 32) */ uint8_t view_swizzle[4] = { cso->swizzle_r, cso->swizzle_g, cso->swizzle_b, cso->swizzle_a }; const uint8_t *fmt_swizzle = vc5_get_format_swizzle(so->base.format); util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle); so->base.texture = prsc; so->base.reference.count = 1; so->base.context = pctx; struct V3D33_TEXTURE_SHADER_STATE state_unpacked = { cl_packet_header(TEXTURE_SHADER_STATE), .image_width = prsc->width0, .image_height = prsc->height0, .image_depth = prsc->depth0, .texture_type = rsc->tex_format, .srgb = util_format_is_srgb(cso->format), .base_level = cso->u.tex.first_level, .array_stride_64_byte_aligned = rsc->cube_map_stride / 64, }; /* Note: Contrary to the docs, the swizzle still applies even * if the return size is 32. It's just that you probably want * to swizzle in the shader, because you need the Y/Z/W * channels to be defined. */ if (vc5_get_tex_return_size(cso->format) != 32) { state_unpacked.swizzle_r = translate_swizzle(so->swizzle[0]); state_unpacked.swizzle_g = translate_swizzle(so->swizzle[1]); state_unpacked.swizzle_b = translate_swizzle(so->swizzle[2]); state_unpacked.swizzle_a = translate_swizzle(so->swizzle[3]); } else { state_unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X); state_unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y); state_unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z); state_unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W); } /* XXX: While we need to use this flag to enable tiled * resource sharing (even a small shared buffer should be UIF, * not UBLINEAR or raster), this is also at the moment * patching up the fact that our resource layout's decisions * about XOR don't quite match the HW's. */ switch (rsc->slices[0].tiling) { case VC5_TILING_UIF_NO_XOR: case VC5_TILING_UIF_XOR: state_unpacked.level_0_is_strictly_uif = true; state_unpacked.level_0_xor_enable = false; break; default: break; } STATIC_ASSERT(ARRAY_SIZE(so->texture_shader_state) == cl_packet_length(TEXTURE_SHADER_STATE)); cl_packet_pack(TEXTURE_SHADER_STATE)(NULL, so->texture_shader_state, &state_unpacked); return &so->base; } static void vc5_sampler_view_destroy(struct pipe_context *pctx, struct pipe_sampler_view *view) { pipe_resource_reference(&view->texture, NULL); free(view); } static void vc5_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, unsigned start, unsigned nr, struct pipe_sampler_view **views) { struct vc5_context *vc5 = vc5_context(pctx); struct vc5_texture_stateobj *stage_tex = vc5_get_stage_tex(vc5, shader); unsigned i; unsigned new_nr = 0; assert(start == 0); for (i = 0; i < nr; i++) { if (views[i]) new_nr = i + 1; pipe_sampler_view_reference(&stage_tex->textures[i], views[i]); } for (; i < stage_tex->num_textures; i++) { pipe_sampler_view_reference(&stage_tex->textures[i], NULL); } stage_tex->num_textures = new_nr; } static struct pipe_stream_output_target * vc5_create_stream_output_target(struct pipe_context *pctx, struct pipe_resource *prsc, unsigned buffer_offset, unsigned buffer_size) { struct pipe_stream_output_target *target; target = CALLOC_STRUCT(pipe_stream_output_target); if (!target) return NULL; pipe_reference_init(&target->reference, 1); pipe_resource_reference(&target->buffer, prsc); target->context = pctx; target->buffer_offset = buffer_offset; target->buffer_size = buffer_size; return target; } static void vc5_stream_output_target_destroy(struct pipe_context *pctx, struct pipe_stream_output_target *target) { pipe_resource_reference(&target->buffer, NULL); free(target); }
void ssh2channel_send_terminal_size_change(SshChannel *sc, int w, int h) { unreachable("Should never be called in the server"); }
EGLBoolean _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surface, EGLint attribute, EGLint *value) { switch (attribute) { case EGL_WIDTH: *value = surface->Width; break; case EGL_HEIGHT: *value = surface->Height; break; case EGL_CONFIG_ID: *value = surface->Config->ConfigID; break; case EGL_LARGEST_PBUFFER: if (surface->Type == EGL_PBUFFER_BIT) *value = surface->LargestPbuffer; break; case EGL_TEXTURE_FORMAT: /* texture attributes: only for pbuffers, no error otherwise */ if (surface->Type == EGL_PBUFFER_BIT) *value = surface->TextureFormat; break; case EGL_TEXTURE_TARGET: if (surface->Type == EGL_PBUFFER_BIT) *value = surface->TextureTarget; break; case EGL_MIPMAP_TEXTURE: if (surface->Type == EGL_PBUFFER_BIT) *value = surface->MipmapTexture; break; case EGL_MIPMAP_LEVEL: if (surface->Type == EGL_PBUFFER_BIT) *value = surface->MipmapLevel; break; case EGL_SWAP_BEHAVIOR: *value = surface->SwapBehavior; break; case EGL_RENDER_BUFFER: /* From the EGL_KHR_mutable_render_buffer spec (v12): * * Querying EGL_RENDER_BUFFER returns the buffer which client API * rendering is requested to use. For a window surface, this is the * attribute value specified when the surface was created or last set * via eglSurfaceAttrib. * * In other words, querying a window surface returns the value most * recently *requested* by the user. * * The paragraph continues in the EGL 1.5 spec (2014.08.27): * * For a pbuffer surface, it is always EGL_BACK_BUFFER . For a pixmap * surface, it is always EGL_SINGLE_BUFFER . To determine the actual * buffer being rendered to by a context, call eglQueryContext. */ switch (surface->Type) { default: unreachable("bad EGLSurface type"); case EGL_WINDOW_BIT: *value = surface->RequestedRenderBuffer; break; case EGL_PBUFFER_BIT: *value = EGL_BACK_BUFFER; break; case EGL_PIXMAP_BIT: *value = EGL_SINGLE_BUFFER; break; } break; case EGL_PIXEL_ASPECT_RATIO: *value = surface->AspectRatio; break; case EGL_HORIZONTAL_RESOLUTION: *value = surface->HorizontalResolution; break; case EGL_VERTICAL_RESOLUTION: *value = surface->VerticalResolution; break; case EGL_MULTISAMPLE_RESOLVE: *value = surface->MultisampleResolve; break; case EGL_VG_ALPHA_FORMAT: *value = surface->VGAlphaFormat; break; case EGL_VG_COLORSPACE: *value = surface->VGColorspace; break; case EGL_GL_COLORSPACE_KHR: if (!disp->Extensions.KHR_gl_colorspace) return _eglError(EGL_BAD_ATTRIBUTE, "eglQuerySurface"); *value = surface->GLColorspace; break; case EGL_POST_SUB_BUFFER_SUPPORTED_NV: *value = surface->PostSubBufferSupportedNV; break; case EGL_BUFFER_AGE_EXT: /* Both EXT_buffer_age and KHR_partial_update accept EGL_BUFFER_AGE_EXT. * To be precise, the KHR one accepts EGL_BUFFER_AGE_KHR which is an * alias with the same numeric value. */ if (!disp->Extensions.EXT_buffer_age && !disp->Extensions.KHR_partial_update) return _eglError(EGL_BAD_ATTRIBUTE, "eglQuerySurface"); _EGLContext *ctx = _eglGetCurrentContext(); EGLint result = drv->API.QueryBufferAge(drv, disp, surface); /* error happened */ if (result < 0) return EGL_FALSE; if (_eglGetContextHandle(ctx) == EGL_NO_CONTEXT || ctx->DrawSurface != surface) return _eglError(EGL_BAD_SURFACE, "eglQuerySurface"); *value = result; surface->BufferAgeRead = EGL_TRUE; break; case EGL_SMPTE2086_DISPLAY_PRIMARY_RX_EXT: *value = surface->HdrMetadata.display_primary_r.x; break; case EGL_SMPTE2086_DISPLAY_PRIMARY_RY_EXT: *value = surface->HdrMetadata.display_primary_r.y; break; case EGL_SMPTE2086_DISPLAY_PRIMARY_GX_EXT: *value = surface->HdrMetadata.display_primary_g.x; break; case EGL_SMPTE2086_DISPLAY_PRIMARY_GY_EXT: *value = surface->HdrMetadata.display_primary_g.y; break; case EGL_SMPTE2086_DISPLAY_PRIMARY_BX_EXT: *value = surface->HdrMetadata.display_primary_b.x; break; case EGL_SMPTE2086_DISPLAY_PRIMARY_BY_EXT: *value = surface->HdrMetadata.display_primary_b.y; break; case EGL_SMPTE2086_WHITE_POINT_X_EXT: *value = surface->HdrMetadata.white_point.x; break; case EGL_SMPTE2086_WHITE_POINT_Y_EXT: *value = surface->HdrMetadata.white_point.y; break; case EGL_SMPTE2086_MAX_LUMINANCE_EXT: *value = surface->HdrMetadata.max_luminance; break; case EGL_SMPTE2086_MIN_LUMINANCE_EXT: *value = surface->HdrMetadata.min_luminance; break; case EGL_CTA861_3_MAX_CONTENT_LIGHT_LEVEL_EXT: *value = surface->HdrMetadata.max_cll; break; case EGL_CTA861_3_MAX_FRAME_AVERAGE_LEVEL_EXT: *value = surface->HdrMetadata.max_fall; break; default: return _eglError(EGL_BAD_ATTRIBUTE, "eglQuerySurface"); } return EGL_TRUE; }
int anv_gem_get_param(int fd, uint32_t param) { unreachable("Unused"); }
static void project_src(nir_builder *b, nir_tex_instr *tex) { /* Find the projector in the srcs list, if present. */ int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector); if (proj_index < 0) return; b->cursor = nir_before_instr(&tex->instr); nir_ssa_def *inv_proj = nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); /* Walk through the sources projecting the arguments. */ for (unsigned i = 0; i < tex->num_srcs; i++) { switch (tex->src[i].src_type) { case nir_tex_src_coord: case nir_tex_src_comparator: break; default: continue; } nir_ssa_def *unprojected = nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); /* Array indices don't get projected, so make an new vector with the * coordinate's array index untouched. */ if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { switch (tex->coord_components) { case 4: projected = nir_vec4(b, nir_channel(b, projected, 0), nir_channel(b, projected, 1), nir_channel(b, projected, 2), nir_channel(b, unprojected, 3)); break; case 3: projected = nir_vec3(b, nir_channel(b, projected, 0), nir_channel(b, projected, 1), nir_channel(b, unprojected, 2)); break; case 2: projected = nir_vec2(b, nir_channel(b, projected, 0), nir_channel(b, unprojected, 1)); break; default: unreachable("bad texture coord count for array"); break; } } nir_instr_rewrite_src(&tex->instr, &tex->src[i].src, nir_src_for_ssa(projected)); } nir_tex_instr_remove_src(tex, proj_index); }
int anv_gem_create_context(struct anv_device *device) { unreachable("Unused"); }
static int radv_init_surface(struct radv_device *device, struct radeon_surf *surface, const struct radv_image_create_info *create_info) { const VkImageCreateInfo *pCreateInfo = create_info->vk_info; unsigned array_mode = radv_choose_tiling(device, create_info); const struct vk_format_description *desc = vk_format_description(pCreateInfo->format); bool is_depth, is_stencil; is_depth = vk_format_has_depth(desc); is_stencil = vk_format_has_stencil(desc); surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format); surface->blk_h = vk_format_get_blockheight(pCreateInfo->format); surface->bpe = vk_format_get_blocksize(vk_format_depth_only(pCreateInfo->format)); /* align byte per element on dword */ if (surface->bpe == 3) { surface->bpe = 4; } surface->flags = RADEON_SURF_SET(array_mode, MODE); switch (pCreateInfo->imageType){ case VK_IMAGE_TYPE_1D: if (pCreateInfo->arrayLayers > 1) surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE); else surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE); break; case VK_IMAGE_TYPE_2D: if (pCreateInfo->arrayLayers > 1) surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE); else surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE); break; case VK_IMAGE_TYPE_3D: surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE); break; default: unreachable("unhandled image type"); } if (is_depth) { surface->flags |= RADEON_SURF_ZBUFFER; if (radv_use_tc_compat_htile_for_image(device, pCreateInfo)) surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE; } if (is_stencil) surface->flags |= RADEON_SURF_SBUFFER; surface->flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE; if (!radv_use_dcc_for_image(device, create_info, pCreateInfo)) surface->flags |= RADEON_SURF_DISABLE_DCC; if (create_info->scanout) surface->flags |= RADEON_SURF_SCANOUT; return 0; }
int anv_gem_get_aperture(int fd, uint64_t *size) { unreachable("Unused"); }
/** * Check if OK to draw arrays/elements. */ static bool check_valid_to_render(struct gl_context *ctx, const char *function) { if (!_mesa_valid_to_render(ctx, function)) { return false; } switch (ctx->API) { case API_OPENGLES2: /* For ES2, we can draw if we have a vertex program/shader). */ return ctx->VertexProgram._Current != NULL; case API_OPENGLES: /* For OpenGL ES, only draw if we have vertex positions */ if (!ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS].Enabled) return false; break; case API_OPENGL_CORE: /* Section 10.4 (Drawing Commands Using Vertex Arrays) of the OpenGL 4.5 * Core Profile spec says: * * "An INVALID_OPERATION error is generated if no vertex array * object is bound (see section 10.3.1)." */ if (ctx->Array.VAO == ctx->Array.DefaultVAO) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(no VAO bound)", function); return false; } /* The spec argues that this is allowed because a tess ctrl shader * without a tess eval shader can be used with transform feedback. * However, glBeginTransformFeedback doesn't allow GL_PATCHES and * therefore doesn't allow tessellation. * * Further investigation showed that this is indeed a spec bug and * a tess ctrl shader without a tess eval shader shouldn't have been * allowed, because there is no API in GL 4.0 that can make use this * to produce something useful. * * Also, all vendors except one don't support a tess ctrl shader without * a tess eval shader anyway. */ if (ctx->TessCtrlProgram._Current && !ctx->TessEvalProgram._Current) { _mesa_error(ctx, GL_INVALID_OPERATION, "%s(tess eval shader is missing)", function); return false; } /* Section 7.3 (Program Objects) of the OpenGL 4.5 Core Profile spec * says: * * "If there is no active program for the vertex or fragment shader * stages, the results of vertex and/or fragment processing will be * undefined. However, this is not an error." * * The fragment shader is not tested here because other state (e.g., * GL_RASTERIZER_DISCARD) affects whether or not we actually care. */ return ctx->VertexProgram._Current != NULL; case API_OPENGL_COMPAT: if (ctx->VertexProgram._Current != NULL) { /* Draw regardless of whether or not we have any vertex arrays. * (Ex: could draw a point using a constant vertex pos) */ return true; } else { /* Draw if we have vertex positions (GL_VERTEX_ARRAY or generic * array [0]). */ return (ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POS].Enabled || ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_GENERIC0].Enabled); } break; default: unreachable("Invalid API value in check_valid_to_render()"); } return true; }
uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd) { unreachable("Unused"); }
/** * Check if the given texture target is a legal texture object target * for a glTexStorage() command. * This is a bit different than legal_teximage_target() when it comes * to cube maps. */ static bool legal_texobj_target(const struct gl_context *ctx, GLuint dims, GLenum target) { if (dims < 1 || dims > 3) { _mesa_problem(ctx, "invalid dims=%u in legal_texobj_target()", dims); return false; } switch (dims) { case 2: switch (target) { case GL_TEXTURE_2D: return true; case GL_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map; } break; case 3: switch (target) { case GL_TEXTURE_3D: return true; case GL_TEXTURE_2D_ARRAY: return ctx->Extensions.EXT_texture_array; case GL_TEXTURE_CUBE_MAP_ARRAY: return _mesa_has_texture_cube_map_array(ctx); } break; } if (!_mesa_is_desktop_gl(ctx)) return false; switch (dims) { case 1: switch (target) { case GL_TEXTURE_1D: case GL_PROXY_TEXTURE_1D: return true; default: return false; } case 2: switch (target) { case GL_PROXY_TEXTURE_2D: return true; case GL_PROXY_TEXTURE_CUBE_MAP: return ctx->Extensions.ARB_texture_cube_map; case GL_TEXTURE_RECTANGLE: case GL_PROXY_TEXTURE_RECTANGLE: return ctx->Extensions.NV_texture_rectangle; case GL_TEXTURE_1D_ARRAY: case GL_PROXY_TEXTURE_1D_ARRAY: return ctx->Extensions.EXT_texture_array; default: return false; } case 3: switch (target) { case GL_PROXY_TEXTURE_3D: return true; case GL_PROXY_TEXTURE_2D_ARRAY: return ctx->Extensions.EXT_texture_array; case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY: return ctx->Extensions.ARB_texture_cube_map_array; default: return false; } default: unreachable("impossible dimensions"); } }
static GLboolean brwProgramStringNotify(struct gl_context *ctx, GLenum target, struct gl_program *prog) { struct brw_context *brw = brw_context(ctx); const struct brw_compiler *compiler = brw->intelScreen->compiler; switch (target) { case GL_FRAGMENT_PROGRAM_ARB: { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; struct brw_fragment_program *newFP = brw_fragment_program(fprog); const struct brw_fragment_program *curFP = brw_fragment_program_const(brw->fragment_program); if (newFP == curFP) brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = get_new_program_id(brw->intelScreen); brw_add_texrect_params(prog); prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true); brw_fs_precompile(ctx, NULL, prog); break; } case GL_VERTEX_PROGRAM_ARB: { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; struct brw_vertex_program *newVP = brw_vertex_program(vprog); const struct brw_vertex_program *curVP = brw_vertex_program_const(brw->vertex_program); if (newVP == curVP) brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM; if (newVP->program.IsPositionInvariant) { _mesa_insert_mvp_code(ctx, &newVP->program); } newVP->id = get_new_program_id(brw->intelScreen); /* Also tell tnl about it: */ _tnl_program_string(ctx, target, prog); brw_add_texrect_params(prog); prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX, compiler->scalar_stage[MESA_SHADER_VERTEX]); brw_vs_precompile(ctx, NULL, prog); break; } default: /* * driver->ProgramStringNotify is only called for ARB programs, fixed * function vertex programs, and ir_to_mesa (which isn't used by the * i965 back-end). Therefore, even after geometry shaders are added, * this function should only ever be called with a target of * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB. */ unreachable("Unexpected target in brwProgramStringNotify"); } return true; }
ir_visitor_status ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) { ir_expression *expr = ir->rhs->as_expression(); bool found_vector = false; unsigned int i, vector_elements = 1; ir_variable *op_var[3]; if (!expr) return visit_continue; if (!this->mem_ctx) this->mem_ctx = ralloc_parent(ir); for (i = 0; i < expr->get_num_operands(); i++) { if (expr->operands[i]->type->is_vector()) { found_vector = true; vector_elements = expr->operands[i]->type->vector_elements; break; } } if (!found_vector) return visit_continue; switch (expr->operation) { case ir_unop_interpolate_at_centroid: case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: return visit_continue; default: break; } /* Store the expression operands in temps so we can use them * multiple times. */ for (i = 0; i < expr->get_num_operands(); i++) { ir_assignment *assign; ir_dereference *deref; assert(!expr->operands[i]->type->is_matrix()); op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type, "channel_expressions", ir_var_temporary); ir->insert_before(op_var[i]); deref = new(mem_ctx) ir_dereference_variable(op_var[i]); assign = new(mem_ctx) ir_assignment(deref, expr->operands[i], NULL); ir->insert_before(assign); } const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type, 1, 1); /* OK, time to break down this vector operation. */ switch (expr->operation) { case ir_unop_bit_not: case ir_unop_logic_not: case ir_unop_neg: case ir_unop_abs: case ir_unop_sign: case ir_unop_rcp: case ir_unop_rsq: case ir_unop_sqrt: case ir_unop_exp: case ir_unop_log: case ir_unop_exp2: case ir_unop_log2: case ir_unop_bitcast_i2f: case ir_unop_bitcast_f2i: case ir_unop_bitcast_f2u: case ir_unop_bitcast_u2f: case ir_unop_i2u: case ir_unop_u2i: case ir_unop_f2i: case ir_unop_f2u: case ir_unop_i2f: case ir_unop_f2b: case ir_unop_b2f: case ir_unop_i2b: case ir_unop_b2i: case ir_unop_u2f: case ir_unop_trunc: case ir_unop_ceil: case ir_unop_floor: case ir_unop_fract: case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: case ir_unop_sin_reduced: case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdy: case ir_unop_bitfield_reverse: case ir_unop_bit_count: case ir_unop_find_msb: case ir_unop_find_lsb: for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); assign(ir, i, new(mem_ctx) ir_expression(expr->operation, element_type, op0, NULL)); } break; case ir_binop_add: case ir_binop_sub: case ir_binop_mul: case ir_binop_imul_high: case ir_binop_div: case ir_binop_carry: case ir_binop_borrow: case ir_binop_mod: case ir_binop_min: case ir_binop_max: case ir_binop_pow: case ir_binop_lshift: case ir_binop_rshift: case ir_binop_bit_and: case ir_binop_bit_xor: case ir_binop_bit_or: case ir_binop_less: case ir_binop_greater: case ir_binop_lequal: case ir_binop_gequal: case ir_binop_equal: case ir_binop_nequal: for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); ir_rvalue *op1 = get_element(op_var[1], i); assign(ir, i, new(mem_ctx) ir_expression(expr->operation, element_type, op0, op1)); } break; case ir_unop_any: { ir_expression *temp; temp = new(mem_ctx) ir_expression(ir_binop_logic_or, element_type, get_element(op_var[0], 0), get_element(op_var[0], 1)); for (i = 2; i < vector_elements; i++) { temp = new(mem_ctx) ir_expression(ir_binop_logic_or, element_type, get_element(op_var[0], i), temp); } assign(ir, 0, temp); break; } case ir_binop_dot: { ir_expression *last = NULL; for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); ir_rvalue *op1 = get_element(op_var[1], i); ir_expression *temp; temp = new(mem_ctx) ir_expression(ir_binop_mul, element_type, op0, op1); if (last) { last = new(mem_ctx) ir_expression(ir_binop_add, element_type, temp, last); } else { last = temp; } } assign(ir, 0, last); break; } case ir_binop_logic_and: case ir_binop_logic_xor: case ir_binop_logic_or: ir->fprint(stderr); fprintf(stderr, "\n"); unreachable("not reached: expression operates on scalars only"); case ir_binop_all_equal: case ir_binop_any_nequal: { ir_expression *last = NULL; for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); ir_rvalue *op1 = get_element(op_var[1], i); ir_expression *temp; ir_expression_operation join; if (expr->operation == ir_binop_all_equal) join = ir_binop_logic_and; else join = ir_binop_logic_or; temp = new(mem_ctx) ir_expression(expr->operation, element_type, op0, op1); if (last) { last = new(mem_ctx) ir_expression(join, element_type, temp, last); } else { last = temp; } } assign(ir, 0, last); break; } case ir_unop_noise: unreachable("noise should have been broken down to function call"); case ir_binop_bfm: { /* Does not need to be scalarized, since its result will be identical * for all channels. */ ir_rvalue *op0 = get_element(op_var[0], 0); ir_rvalue *op1 = get_element(op_var[1], 0); assign(ir, 0, new(mem_ctx) ir_expression(expr->operation, element_type, op0, op1)); break; } case ir_binop_ubo_load: unreachable("not yet supported"); case ir_triop_fma: case ir_triop_lrp: case ir_triop_csel: case ir_triop_bitfield_extract: for (i = 0; i < vector_elements; i++) { ir_rvalue *op0 = get_element(op_var[0], i); ir_rvalue *op1 = get_element(op_var[1], i); ir_rvalue *op2 = get_element(op_var[2], i); assign(ir, i, new(mem_ctx) ir_expression(expr->operation, element_type, op0, op1, op2)); } break; case ir_triop_bfi: { /* Only a single BFM is needed for multiple BFIs. */ ir_rvalue *op0 = get_element(op_var[0], 0); for (i = 0; i < vector_elements; i++) { ir_rvalue *op1 = get_element(op_var[1], i); ir_rvalue *op2 = get_element(op_var[2], i); assign(ir, i, new(mem_ctx) ir_expression(expr->operation, element_type, op0->clone(mem_ctx, NULL), op1, op2)); } break; } case ir_unop_pack_snorm_2x16: case ir_unop_pack_snorm_4x8: case ir_unop_pack_unorm_2x16: case ir_unop_pack_unorm_4x8: case ir_unop_pack_half_2x16: case ir_unop_unpack_snorm_2x16: case ir_unop_unpack_snorm_4x8: case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: case ir_binop_ldexp: case ir_binop_vector_extract: case ir_triop_vector_insert: case ir_quadop_bitfield_insert: case ir_quadop_vector: unreachable("should have been lowered"); case ir_unop_unpack_half_2x16_split_x: case ir_unop_unpack_half_2x16_split_y: case ir_binop_pack_half_2x16_split: case ir_unop_interpolate_at_centroid: case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: unreachable("not reached: expression operates on scalars only"); } ir->remove(); this->progress = true; return visit_continue; }
static void get_fast_clear_rect(struct brw_context *brw, struct gl_framebuffer *fb, struct intel_renderbuffer *irb, struct rect *rect) { unsigned int x_align, y_align; unsigned int x_scaledown, y_scaledown; if (irb->mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE) { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): * * Clear pass must have a clear rectangle that must follow * alignment rules in terms of pixels and lines as shown in the * table below. Further, the clear-rectangle height and width * must be multiple of the following dimensions. If the height * and width of the render target being cleared do not meet these * requirements, an MCS buffer can be created such that it * follows the requirement and covers the RT. * * The alignment size in the table that follows is related to the * alignment size returned by intel_get_non_msrt_mcs_alignment(), but * with X alignment multiplied by 16 and Y alignment multiplied by 32. */ intel_get_non_msrt_mcs_alignment(brw, irb->mt, &x_align, &y_align); x_align *= 16; y_align *= 32; /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "Fast Color Clear" bullet (p327): * * In order to optimize the performance MCS buffer (when bound to * 1X RT) clear similarly to MCS buffer clear for MSRT case, * clear rect is required to be scaled by the following factors * in the horizontal and vertical directions: * * The X and Y scale down factors in the table that follows are each * equal to half the alignment value computed above. */ x_scaledown = x_align / 2; y_scaledown = y_align / 2; /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color * Clear of Non-MultiSampled Render Target Restrictions": * * Clear rectangle must be aligned to two times the number of * pixels in the table shown below due to 16x16 hashing across the * slice. */ x_align *= 2; y_align *= 2; } else { /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render * Target(s)", beneath the "MSAA Compression" bullet (p326): * * Clear pass for this case requires that scaled down primitive * is sent down with upper left co-ordinate to coincide with * actual rectangle being cleared. For MSAA, clear rectangle’s * height and width need to as show in the following table in * terms of (width,height) of the RT. * * MSAA Width of Clear Rect Height of Clear Rect * 4X Ceil(1/8*width) Ceil(1/2*height) * 8X Ceil(1/2*width) Ceil(1/2*height) * * The text "with upper left co-ordinate to coincide with actual * rectangle being cleared" is a little confusing--it seems to imply * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to * feed the pipeline using the rectangle (x,y) to * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on * the number of samples. Experiments indicate that this is not * quite correct; actually, what the hardware appears to do is to * align whatever rectangle is sent down the pipeline to the nearest * multiple of 2x2 blocks, and then scale it up by a factor of N * horizontally and 2 vertically. So the resulting alignment is 4 * vertically and either 4 or 16 horizontally, and the scaledown * factor is 2 vertically and either 2 or 8 horizontally. */ switch (irb->mt->num_samples) { case 2: case 4: x_scaledown = 8; break; case 8: x_scaledown = 2; break; default: unreachable("Unexpected sample count for fast clear"); } y_scaledown = 2; x_align = x_scaledown * 2; y_align = y_scaledown * 2; } rect->x0 = fb->_Xmin; rect->x1 = fb->_Xmax; if (fb->Name != 0) { rect->y0 = fb->_Ymin; rect->y1 = fb->_Ymax; } else { rect->y0 = fb->Height - fb->_Ymax; rect->y1 = fb->Height - fb->_Ymin; } rect->x0 = ROUND_DOWN_TO(rect->x0, x_align) / x_scaledown; rect->y0 = ROUND_DOWN_TO(rect->y0, y_align) / y_scaledown; rect->x1 = ALIGN(rect->x1, x_align) / x_scaledown; rect->y1 = ALIGN(rect->y1, y_align) / y_scaledown; }
const char * brw_instruction_name(enum opcode op) { switch (op) { case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP: assert(opcode_descs[op].name); return opcode_descs[op].name; case FS_OPCODE_FB_WRITE: return "fb_write"; case FS_OPCODE_FB_WRITE_LOGICAL: return "fb_write_logical"; case FS_OPCODE_PACK_STENCIL_REF: return "pack_stencil_ref"; case FS_OPCODE_BLORP_FB_WRITE: return "blorp_fb_write"; case FS_OPCODE_REP_FB_WRITE: return "rep_fb_write"; case SHADER_OPCODE_RCP: return "rcp"; case SHADER_OPCODE_RSQ: return "rsq"; case SHADER_OPCODE_SQRT: return "sqrt"; case SHADER_OPCODE_EXP2: return "exp2"; case SHADER_OPCODE_LOG2: return "log2"; case SHADER_OPCODE_POW: return "pow"; case SHADER_OPCODE_INT_QUOTIENT: return "int_quot"; case SHADER_OPCODE_INT_REMAINDER: return "int_rem"; case SHADER_OPCODE_SIN: return "sin"; case SHADER_OPCODE_COS: return "cos"; case SHADER_OPCODE_TEX: return "tex"; case SHADER_OPCODE_TEX_LOGICAL: return "tex_logical"; case SHADER_OPCODE_TXD: return "txd"; case SHADER_OPCODE_TXD_LOGICAL: return "txd_logical"; case SHADER_OPCODE_TXF: return "txf"; case SHADER_OPCODE_TXF_LOGICAL: return "txf_logical"; case SHADER_OPCODE_TXL: return "txl"; case SHADER_OPCODE_TXL_LOGICAL: return "txl_logical"; case SHADER_OPCODE_TXS: return "txs"; case SHADER_OPCODE_TXS_LOGICAL: return "txs_logical"; case FS_OPCODE_TXB: return "txb"; case FS_OPCODE_TXB_LOGICAL: return "txb_logical"; case SHADER_OPCODE_TXF_CMS: return "txf_cms"; case SHADER_OPCODE_TXF_CMS_LOGICAL: return "txf_cms_logical"; case SHADER_OPCODE_TXF_CMS_W: return "txf_cms_w"; case SHADER_OPCODE_TXF_CMS_W_LOGICAL: return "txf_cms_w_logical"; case SHADER_OPCODE_TXF_UMS: return "txf_ums"; case SHADER_OPCODE_TXF_UMS_LOGICAL: return "txf_ums_logical"; case SHADER_OPCODE_TXF_MCS: return "txf_mcs"; case SHADER_OPCODE_TXF_MCS_LOGICAL: return "txf_mcs_logical"; case SHADER_OPCODE_LOD: return "lod"; case SHADER_OPCODE_LOD_LOGICAL: return "lod_logical"; case SHADER_OPCODE_TG4: return "tg4"; case SHADER_OPCODE_TG4_LOGICAL: return "tg4_logical"; case SHADER_OPCODE_TG4_OFFSET: return "tg4_offset"; case SHADER_OPCODE_TG4_OFFSET_LOGICAL: return "tg4_offset_logical"; case SHADER_OPCODE_SAMPLEINFO: return "sampleinfo"; case SHADER_OPCODE_SHADER_TIME_ADD: return "shader_time_add"; case SHADER_OPCODE_UNTYPED_ATOMIC: return "untyped_atomic"; case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: return "untyped_atomic_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_READ: return "untyped_surface_read"; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: return "untyped_surface_read_logical"; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: return "untyped_surface_write"; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: return "untyped_surface_write_logical"; case SHADER_OPCODE_TYPED_ATOMIC: return "typed_atomic"; case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: return "typed_atomic_logical"; case SHADER_OPCODE_TYPED_SURFACE_READ: return "typed_surface_read"; case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: return "typed_surface_read_logical"; case SHADER_OPCODE_TYPED_SURFACE_WRITE: return "typed_surface_write"; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return "typed_surface_write_logical"; case SHADER_OPCODE_MEMORY_FENCE: return "memory_fence"; case SHADER_OPCODE_LOAD_PAYLOAD: return "load_payload"; case SHADER_OPCODE_GEN4_SCRATCH_READ: return "gen4_scratch_read"; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: return "gen4_scratch_write"; case SHADER_OPCODE_GEN7_SCRATCH_READ: return "gen7_scratch_read"; case SHADER_OPCODE_URB_WRITE_SIMD8: return "gen8_urb_write_simd8"; case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: return "gen8_urb_write_simd8_per_slot"; case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: return "gen8_urb_write_simd8_masked"; case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: return "gen8_urb_write_simd8_masked_per_slot"; case SHADER_OPCODE_URB_READ_SIMD8: return "urb_read_simd8"; case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: return "urb_read_simd8_per_slot"; case SHADER_OPCODE_FIND_LIVE_CHANNEL: return "find_live_channel"; case SHADER_OPCODE_BROADCAST: return "broadcast"; case SHADER_OPCODE_EXTRACT_BYTE: return "extract_byte"; case SHADER_OPCODE_EXTRACT_WORD: return "extract_word"; case VEC4_OPCODE_MOV_BYTES: return "mov_bytes"; case VEC4_OPCODE_PACK_BYTES: return "pack_bytes"; case VEC4_OPCODE_UNPACK_UNIFORM: return "unpack_uniform"; case FS_OPCODE_DDX_COARSE: return "ddx_coarse"; case FS_OPCODE_DDX_FINE: return "ddx_fine"; case FS_OPCODE_DDY_COARSE: return "ddy_coarse"; case FS_OPCODE_DDY_FINE: return "ddy_fine"; case FS_OPCODE_CINTERP: return "cinterp"; case FS_OPCODE_LINTERP: return "linterp"; case FS_OPCODE_PIXEL_X: return "pixel_x"; case FS_OPCODE_PIXEL_Y: return "pixel_y"; case FS_OPCODE_GET_BUFFER_SIZE: return "fs_get_buffer_size"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return "uniform_pull_const"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: return "uniform_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: return "varying_pull_const"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: return "varying_pull_const_gen7"; case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: return "mov_dispatch_to_flags"; case FS_OPCODE_DISCARD_JUMP: return "discard_jump"; case FS_OPCODE_SET_SAMPLE_ID: return "set_sample_id"; case FS_OPCODE_SET_SIMD4X2_OFFSET: return "set_simd4x2_offset"; case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: return "unpack_half_2x16_split_x"; case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: return "unpack_half_2x16_split_y"; case FS_OPCODE_PLACEHOLDER_HALT: return "placeholder_halt"; case FS_OPCODE_INTERPOLATE_AT_CENTROID: return "interp_centroid"; case FS_OPCODE_INTERPOLATE_AT_SAMPLE: return "interp_sample"; case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: return "interp_shared_offset"; case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: return "interp_per_slot_offset"; case VS_OPCODE_URB_WRITE: return "vs_urb_write"; case VS_OPCODE_PULL_CONSTANT_LOAD: return "pull_constant_load"; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: return "pull_constant_load_gen7"; case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: return "set_simd4x2_header_gen9"; case VS_OPCODE_GET_BUFFER_SIZE: return "vs_get_buffer_size"; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: return "unpack_flags_simd4x2"; case GS_OPCODE_URB_WRITE: return "gs_urb_write"; case GS_OPCODE_URB_WRITE_ALLOCATE: return "gs_urb_write_allocate"; case GS_OPCODE_THREAD_END: return "gs_thread_end"; case GS_OPCODE_SET_WRITE_OFFSET: return "set_write_offset"; case GS_OPCODE_SET_VERTEX_COUNT: return "set_vertex_count"; case GS_OPCODE_SET_DWORD_2: return "set_dword_2"; case GS_OPCODE_PREPARE_CHANNEL_MASKS: return "prepare_channel_masks"; case GS_OPCODE_SET_CHANNEL_MASKS: return "set_channel_masks"; case GS_OPCODE_GET_INSTANCE_ID: return "get_instance_id"; case GS_OPCODE_FF_SYNC: return "ff_sync"; case GS_OPCODE_SET_PRIMITIVE_ID: return "set_primitive_id"; case GS_OPCODE_SVB_WRITE: return "gs_svb_write"; case GS_OPCODE_SVB_SET_DST_INDEX: return "gs_svb_set_dst_index"; case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: return "gs_ff_sync_set_primitives"; case CS_OPCODE_CS_TERMINATE: return "cs_terminate"; case SHADER_OPCODE_BARRIER: return "barrier"; case SHADER_OPCODE_MULH: return "mulh"; case SHADER_OPCODE_MOV_INDIRECT: return "mov_indirect"; case VEC4_OPCODE_URB_READ: return "urb_read"; case TCS_OPCODE_GET_INSTANCE_ID: return "tcs_get_instance_id"; case TCS_OPCODE_URB_WRITE: return "tcs_urb_write"; case TCS_OPCODE_SET_INPUT_URB_OFFSETS: return "tcs_set_input_urb_offsets"; case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: return "tcs_set_output_urb_offsets"; case TCS_OPCODE_GET_PRIMITIVE_ID: return "tcs_get_primitive_id"; case TCS_OPCODE_CREATE_BARRIER_HEADER: return "tcs_create_barrier_header"; case TCS_OPCODE_SRC0_010_IS_ZERO: return "tcs_src0<0,1,0>_is_zero"; case TCS_OPCODE_RELEASE_INPUT: return "tcs_release_input"; case TCS_OPCODE_THREAD_END: return "tcs_thread_end"; case TES_OPCODE_CREATE_INPUT_READ_HEADER: return "tes_create_input_read_header"; case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: return "tes_add_indirect_urb_offset"; case TES_OPCODE_GET_PRIMITIVE_ID: return "tes_get_primitive_id"; } unreachable("not reached"); }
static void upload_raster(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; uint32_t dw1 = 0; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(brw->ctx.DrawBuffer); /* _NEW_POLYGON */ if (ctx->Polygon._FrontBit == render_to_fbo) dw1 |= GEN8_RASTER_FRONT_WINDING_CCW; if (ctx->Polygon.CullFlag) { switch (ctx->Polygon.CullFaceMode) { case GL_FRONT: dw1 |= GEN8_RASTER_CULL_FRONT; break; case GL_BACK: dw1 |= GEN8_RASTER_CULL_BACK; break; case GL_FRONT_AND_BACK: dw1 |= GEN8_RASTER_CULL_BOTH; break; default: unreachable("not reached"); } } else { dw1 |= GEN8_RASTER_CULL_NONE; } /* _NEW_POINT */ if (ctx->Point.SmoothFlag) dw1 |= GEN8_RASTER_SMOOTH_POINT_ENABLE; if (_mesa_is_multisample_enabled(ctx)) dw1 |= GEN8_RASTER_API_MULTISAMPLE_ENABLE; if (ctx->Polygon.OffsetFill) dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; if (ctx->Polygon.OffsetLine) dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; if (ctx->Polygon.OffsetPoint) dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; switch (ctx->Polygon.FrontMode) { case GL_FILL: dw1 |= GEN6_SF_FRONT_SOLID; break; case GL_LINE: dw1 |= GEN6_SF_FRONT_WIREFRAME; break; case GL_POINT: dw1 |= GEN6_SF_FRONT_POINT; break; default: unreachable("not reached"); } switch (ctx->Polygon.BackMode) { case GL_FILL: dw1 |= GEN6_SF_BACK_SOLID; break; case GL_LINE: dw1 |= GEN6_SF_BACK_WIREFRAME; break; case GL_POINT: dw1 |= GEN6_SF_BACK_POINT; break; default: unreachable("not reached"); } /* _NEW_LINE */ if (ctx->Line.SmoothFlag) dw1 |= GEN8_RASTER_LINE_AA_ENABLE; /* _NEW_SCISSOR */ if (ctx->Scissor.EnableFlags) dw1 |= GEN8_RASTER_SCISSOR_ENABLE; /* _NEW_TRANSFORM */ if (!ctx->Transform.DepthClamp) { if (brw->gen >= 9) { dw1 |= GEN9_RASTER_VIEWPORT_Z_NEAR_CLIP_TEST_ENABLE | GEN9_RASTER_VIEWPORT_Z_FAR_CLIP_TEST_ENABLE; } else { dw1 |= GEN8_RASTER_VIEWPORT_Z_CLIP_TEST_ENABLE; } } BEGIN_BATCH(5); OUT_BATCH(_3DSTATE_RASTER << 16 | (5 - 2)); OUT_BATCH(dw1); OUT_BATCH_F(ctx->Polygon.OffsetUnits * 2); /* constant. copied from gen4 */ OUT_BATCH_F(ctx->Polygon.OffsetFactor); /* scale */ OUT_BATCH_F(ctx->Polygon.OffsetClamp); /* global depth offset clamp */ ADVANCE_BATCH(); }
/** * Enable or disable thread dispatch and set the HiZ op appropriately. */ static void gen6_blorp_emit_wm_config(struct brw_context *brw, const struct brw_blorp_params *params) { const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2; /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be * nonzero to prevent the GPU from hanging. While the documentation doesn't * mention this explicitly, it notes that the valid range for the field is * [1,39] = [2,40] threads, which excludes zero. * * To be safe (and to minimize extraneous code) we go ahead and fully * configure the WM state whether or not there is a WM program. */ dw2 = dw4 = dw5 = dw6 = ksp0 = ksp2 = 0; switch (params->hiz_op) { case GEN6_HIZ_OP_DEPTH_CLEAR: dw4 |= GEN6_WM_DEPTH_CLEAR; break; case GEN6_HIZ_OP_DEPTH_RESOLVE: dw4 |= GEN6_WM_DEPTH_RESOLVE; break; case GEN6_HIZ_OP_HIZ_RESOLVE: dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; break; case GEN6_HIZ_OP_NONE: break; default: unreachable("not reached"); } dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */ if (params->wm_prog_data) { dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */ dw4 |= prog_data->first_curbe_grf_0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0; dw4 |= prog_data->first_curbe_grf_2 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2; ksp0 = params->wm_prog_kernel; ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2; if (params->wm_prog_data->dispatch_8) dw5 |= GEN6_WM_8_DISPATCH_ENABLE; if (params->wm_prog_data->dispatch_16) dw5 |= GEN6_WM_16_DISPATCH_ENABLE; } if (params->src.mt) { dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */ dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ } if (params->dst.num_samples > 1) { dw6 |= GEN6_WM_MSRAST_ON_PATTERN; if (prog_data && prog_data->persample_msaa_dispatch) dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; else dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; } else { dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; } BEGIN_BATCH(9); OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); OUT_BATCH(ksp0); OUT_BATCH(dw2); OUT_BATCH(0); /* No scratch needed */ OUT_BATCH(dw4); OUT_BATCH(dw5); OUT_BATCH(dw6); OUT_BATCH(0); /* kernel 1 pointer */ OUT_BATCH(ksp2); ADVANCE_BATCH(); }