static bool run_tests(struct brw_context *brw) { bool fail = false; for (int i = 0; i < ARRAY_SIZE(tests); i++) { for (int align_16 = 0; align_16 <= 1; align_16++) { struct brw_compile *p = rzalloc(NULL, struct brw_compile); brw_init_compile(brw, p, p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); if (align_16) brw_set_default_access_mode(p, BRW_ALIGN_16); else brw_set_default_access_mode(p, BRW_ALIGN_1); tests[i].func(p); assert(p->nr_insn == 1); if (!test_compact_instruction(p, p->store[0])) { fail = true; continue; } if (!test_fuzz_compact_instruction(p, p->store[0])) { fail = true; continue; } ralloc_free(p); } } return fail; }
void vec4_generator::generate_math2_gen4(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src0, struct brw_reg src1) { /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13 * "Message Payload": * * "Operand0[7]. For the INT DIV functions, this operand is the * denominator." * ... * "Operand1[7]. For the INT DIV functions, this operand is the * numerator." */ bool is_int_div = inst->opcode != SHADER_OPCODE_POW; struct brw_reg &op0 = is_int_div ? src1 : src0; struct brw_reg &op1 = is_int_div ? src0 : src1; brw_push_insn_state(p); brw_set_default_saturate(p, false); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1); brw_pop_insn_state(p); gen4_math(p, dst, brw_math_function(inst->opcode), inst->base_mrf, op0, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); }
static void set_predicate_control_flag_value(struct brw_compile *p, struct brw_sf_compile *c, unsigned value) { brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); if (value != 0xff) { if (value != c->flag_value) { brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value)); c->flag_value = value; } brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); } }
/* Points setup - several simplifications as all attributes are * constant across the face of the point (point sprites excluded!) */ void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 1; if (allocate) alloc_regs(c); copy_z_inv_w(c); brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ for (i = 0; i < c->nr_setup_regs; i++) { struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { /* This seems odd as the values are all constant, but the * fragment shader will be expecting it: */ set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); } /* The delta values are always zero, just send the starting * coordinate. Again, this is to fit in with the interpolation * code in the fragment shader. */ { set_predicate_control_flag_value(p, c, pc); brw_MOV(p, c->m3C0, a0); /* constant value */ /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } } brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
static void gen_f0_0_MOV_GRF_GRF(struct brw_codegen *p) { struct brw_reg g0 = brw_vec8_grf(0, 0); struct brw_reg g2 = brw_vec8_grf(2, 0); brw_push_insn_state(p); brw_set_default_predicate_control(p, true); brw_MOV(p, g0, g2); brw_pop_insn_state(p); }
/* The handling of f0.1 vs f0.0 changes between gen6 and gen7. Explicitly test * it, so that we run the fuzzing can run over all the other bits that might * interact with it. */ static void gen_f0_1_MOV_GRF_GRF(struct brw_codegen *p) { struct brw_reg g0 = brw_vec8_grf(0, 0); struct brw_reg g2 = brw_vec8_grf(2, 0); brw_push_insn_state(p); brw_set_default_predicate_control(p, true); brw_inst *mov = brw_MOV(p, g0, g2); brw_inst_set_flag_subreg_nr(p->devinfo, mov, 1); brw_pop_insn_state(p); }
void brw_clip_ff_sync(struct brw_clip_compile *c) { struct brw_codegen *p = &c->func; if (p->devinfo->gen == 5) { brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z); brw_IF(p, BRW_EXECUTE_1); { brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); brw_ff_sync(p, c->reg.R0, 0, c->reg.R0, 1, /* allocate */ 1, /* response length */ 0 /* eot */); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); } }
void brw_clip_ff_sync(struct brw_clip_compile *c) { struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; if (brw->gen == 5) { brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); brw_last_inst->header.destreg__conditionalmod = BRW_CONDITIONAL_Z; brw_IF(p, BRW_EXECUTE_1); { brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); brw_ff_sync(p, c->reg.R0, 0, c->reg.R0, 1, /* allocate */ 1, /* response length */ 0 /* eot */); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); } }
static bool run_tests(const struct gen_device_info *devinfo) { brw_init_compaction_tables(devinfo); bool fail = false; for (unsigned i = 0; i < ARRAY_SIZE(tests); i++) { for (int align_16 = 0; align_16 <= 1; align_16++) { struct brw_codegen *p = rzalloc(NULL, struct brw_codegen); brw_init_codegen(devinfo, p, p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); if (align_16) brw_set_default_access_mode(p, BRW_ALIGN_16); else brw_set_default_access_mode(p, BRW_ALIGN_1); tests[i].func(p); assert(p->nr_insn == 1); if (!test_compact_instruction(p, p->store[0])) { fail = true; continue; } if (!test_fuzz_compact_instruction(p, p->store[0])) { fail = true; continue; } ralloc_free(p); } } return fail; }
/** * Generate the geometry shader program used on Gen6 to perform stream output * (transform feedback). */ void gen6_sol_program(struct brw_ff_gs_compile *c, struct brw_ff_gs_prog_key *key, unsigned num_verts, bool check_edge_flags) { struct brw_codegen *p = &c->func; brw_inst *inst; c->prog_data.svbi_postincrement_value = num_verts; brw_ff_gs_alloc_regs(c, num_verts, true); brw_ff_gs_initialize_header(c); if (key->num_transform_feedback_bindings > 0) { unsigned vertex, binding; struct brw_reg destination_indices_uw = vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); /* Note: since we use the binding table to keep track of buffer offsets * and stride, the GS doesn't need to keep track of a separate pointer * into each buffer; it uses a single pointer which increments by 1 for * each vertex. So we use SVBI0 for this pointer, regardless of whether * transform feedback is in interleaved or separate attribs mode. * * Make sure that the buffers have enough room for all the vertices. */ brw_ADD(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.SVBI, 4)); brw_IF(p, BRW_EXECUTE_1); /* Compute the destination indices to write to. Usually we use SVBI[0] * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the * vertices come down the pipeline in reversed winding order, so we need * to flip the order when writing to the transform feedback buffer. To * ensure that flatshading accuracy is preserved, we need to write them * in order SVBI[0] + (0, 2, 1) if we're using the first provoking * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using * the last provoking vertex convention. * * Note: since brw_imm_v can only be used in instructions in * packed-word execution mode, and SVBI is a double-word, we need to * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), * or (1, 0, 2)) to the destination_indices register, and then add SVBI * using a separate instruction. Also, since the immediate constant is * expressed as packed words, and we need to load double-words into * destination_indices, we need to intersperse zeros to fill the upper * halves of each double-word. */ brw_MOV(p, destination_indices_uw, brw_imm_v(0x00020100)); /* (0, 1, 2) */ if (num_verts == 3) { /* Get primitive type into temp register. */ brw_AND(p, get_element_ud(c->reg.temp, 0), get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as * an 8-wide comparison so that the conditional MOV that follows * moves all 8 words correctly. */ brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, get_element_ud(c->reg.temp, 0), brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); /* If so, then overwrite destination_indices_uw with the appropriate * reordering. */ inst = brw_MOV(p, destination_indices_uw, brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ : 0x00020001)); /* (1, 0, 2) */ brw_inst_set_pred_control(p->devinfo, inst, BRW_PREDICATE_NORMAL); } assert(c->reg.destination_indices.width == BRW_EXECUTE_4); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_ADD(p, c->reg.destination_indices, c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); brw_pop_insn_state(p); /* For each vertex, generate code to output each varying using the * appropriate binding table entry. */ for (vertex = 0; vertex < num_verts; ++vertex) { /* Set up the correct destination index for this vertex */ brw_MOV(p, get_element_ud(c->reg.header, 5), get_element_ud(c->reg.destination_indices, vertex)); for (binding = 0; binding < key->num_transform_feedback_bindings; ++binding) { unsigned char varying = key->transform_feedback_bindings[binding]; unsigned char slot = c->vue_map.varying_to_slot[varying]; /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: * * "Prior to End of Thread with a URB_WRITE, the kernel must * ensure that all writes are complete by sending the final * write as a committed write." */ bool final_write = binding == key->num_transform_feedback_bindings - 1 && vertex == num_verts - 1; struct brw_reg vertex_slot = c->reg.vertex[vertex]; vertex_slot.nr += slot / 2; vertex_slot.subnr = (slot % 2) * 16; /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ vertex_slot.swizzle = varying == VARYING_SLOT_PSIZ ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; brw_set_default_access_mode(p, BRW_ALIGN_16); brw_push_insn_state(p); brw_set_default_exec_size(p, BRW_EXECUTE_4); brw_MOV(p, stride(c->reg.header, 4, 4, 1), retype(vertex_slot, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_svb_write(p, final_write ? c->reg.temp : brw_null_reg(), /* dest */ 1, /* msg_reg_nr */ c->reg.header, /* src0 */ SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */ final_write); /* send_commit_msg */ } } brw_ENDIF(p); /* Now, reinitialize the header register from R0 to restore the parts of * the register that we overwrote while streaming out transform feedback * data. */ brw_ff_gs_initialize_header(c); /* Finally, wait for the write commit to occur so that we can proceed to * other things safely. * * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: * * The write commit does not modify the destination register, but * merely clears the dependency associated with the destination * register. Thus, a simple “mov” instruction using the register as a * source is sufficient to wait for the write commit to occur. */ brw_MOV(p, c->reg.temp, c->reg.temp); } brw_ff_gs_ff_sync(c, 1); brw_ff_gs_overwrite_header_dw2_from_r0(c); switch (num_verts) { case 1: brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); brw_ff_gs_emit_vue(c, c->reg.vertex[0], true); break; case 2: brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[1], true); break; case 3: if (check_edge_flags) { /* Only emit vertices 0 and 1 if this is the first triangle of the * polygon. Otherwise they are redundant. */ brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); } brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[0], false); brw_ff_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); brw_ff_gs_emit_vue(c, c->reg.vertex[1], false); if (check_edge_flags) { brw_ENDIF(p); /* Only emit vertex 2 in PRIM_END mode if this is the last triangle * of the polygon. Otherwise leave the primitive incomplete because * there are more polygon vertices coming. */ brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), get_element_ud(c->reg.R0, 2), brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); } brw_ff_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_ff_gs_emit_vue(c, c->reg.vertex[2], true); break; } }
void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 1; if (allocate) alloc_regs(c); copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear, pc_coord_replace; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); pc_coord_replace = calculate_point_sprite_mask(c, i); pc_persp &= ~pc_coord_replace; if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); } /* Point sprite coordinate replacement: A texcoord with this * enabled gets replaced with the value (x, y, 0, 1) where x and * y vary from 0 to 1 across the horizontal and vertical of the * point. */ if (pc_coord_replace) { set_predicate_control_flag_value(p, c, pc_coord_replace); /* Caculate 1.0/PointWidth */ gen4_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, 0, c->dx0, BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); brw_set_default_access_mode(p, BRW_ALIGN_16); /* dA/dx, dA/dy */ brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } /* attribute constant offset */ brw_MOV(p, c->m3C0, brw_imm_f(0.0)); if (c->key.sprite_origin_lower_left) { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); } else { brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); } brw_set_default_access_mode(p, BRW_ALIGN_1); } if (pc & ~pc_coord_replace) { set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace); brw_MOV(p, c->m1Cx, brw_imm_ud(0)); brw_MOV(p, c->m2Cy, brw_imm_ud(0)); brw_MOV(p, c->m3C0, a0); /* constant value */ } set_predicate_control_flag_value(p, c, pc); /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 2; if (allocate) alloc_regs(c); invert_det(c); copy_z_inv_w(c); if (c->has_flat_shading) do_flatshade_line(c); for (i = 0; i < c->nr_setup_regs; i++) { /* Pair of incoming attributes: */ struct brw_reg a0 = offset(c->vert[0], i); struct brw_reg a1 = offset(c->vert[1], i); GLushort pc, pc_persp, pc_linear; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); brw_MUL(p, a1, a1, c->inv_w[1]); } /* Calculate coefficients for position, color: */ if (pc_linear) { set_predicate_control_flag_value(p, c, pc_linear); brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); } { set_predicate_control_flag_value(p, c, pc); /* start point for interpolation */ brw_MOV(p, c->m3C0, a0); /* Copy m0..m3 to URB. */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* urb destination offset */ BRW_URB_SWIZZLE_TRANSPOSE); } } brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) { struct brw_compile *p = &c->func; GLuint i; c->flag_value = 0xff; c->nr_verts = 3; if (allocate) alloc_regs(c); invert_det(c); copy_z_inv_w(c); if (c->key.do_twoside_color) do_twoside_color(c); if (c->has_flat_shading) do_flatshade_triangle(c); for (i = 0; i < c->nr_setup_regs; i++) { /* Pair of incoming attributes: */ struct brw_reg a0 = offset(c->vert[0], i); struct brw_reg a1 = offset(c->vert[1], i); struct brw_reg a2 = offset(c->vert[2], i); GLushort pc, pc_persp, pc_linear; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { set_predicate_control_flag_value(p, c, pc_persp); brw_MUL(p, a0, a0, c->inv_w[0]); brw_MUL(p, a1, a1, c->inv_w[1]); brw_MUL(p, a2, a2, c->inv_w[2]); } /* Calculate coefficients for interpolated values: */ if (pc_linear) { set_predicate_control_flag_value(p, c, pc_linear); brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); /* calculate dA/dx */ brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); /* calculate dA/dy */ brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); } { set_predicate_control_flag_value(p, c, pc); /* start point for interpolation */ brw_MOV(p, c->m3C0, a0); /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in * the send instruction: */ brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ last ? BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS, 4, /* msg len */ 0, /* response len */ i*4, /* offset */ BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ } } brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
static void brw_clip_test( struct brw_clip_compile *c ) { struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); struct brw_reg v0 = get_tmp(c); struct brw_reg v1 = get_tmp(c); struct brw_reg v2 = get_tmp(c); struct brw_indirect vt0 = brw_indirect(0, 0); struct brw_indirect vt1 = brw_indirect(1, 0); struct brw_indirect vt2 = brw_indirect(2, 0); struct brw_codegen *p = &c->func; struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); brw_MOV(p, v0, deref_4f(vt0, hpos_offset)); brw_MOV(p, v1, deref_4f(vt1, hpos_offset)); brw_MOV(p, v2, deref_4f(vt2, hpos_offset)); brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ /* clip.xyz < -clip.w */ brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); /* All vertices are outside of a plane, rejected */ brw_AND(p, t, t1, t2); brw_AND(p, t, t, t3); brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); brw_OR(p, tmp0, tmp0, get_element(t, 2)); brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); brw_AND(p, t, t, brw_imm_ud(0x1)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); /* test farz, xmax, ymax plane */ /* clip.xyz > clip.w */ brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); /* All vertices are outside of a plane, rejected */ brw_AND(p, t, t1, t2); brw_AND(p, t, t, t3); brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); brw_OR(p, tmp0, tmp0, get_element(t, 2)); brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { brw_clip_kill_thread(c); } brw_ENDIF(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); brw_AND(p, t, t, brw_imm_ud(0x1)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); release_tmps(c); }
/* Use mesa's clipping algorithms, translated to GEN4 assembly. */ void brw_clip_tri( struct brw_clip_compile *c ) { struct brw_codegen *p = &c->func; struct brw_indirect vtx = brw_indirect(0, 0); struct brw_indirect vtxPrev = brw_indirect(1, 0); struct brw_indirect vtxOut = brw_indirect(2, 0); struct brw_indirect plane_ptr = brw_indirect(3, 0); struct brw_indirect inlist_ptr = brw_indirect(4, 0); struct brw_indirect outlist_ptr = brw_indirect(5, 0); struct brw_indirect freelist_ptr = brw_indirect(6, 0); GLuint hpos_offset = brw_varying_to_offset(&c->vue_map, VARYING_SLOT_POS); GLint clipdist0_offset = c->key.nr_userclip ? brw_varying_to_offset(&c->vue_map, VARYING_SLOT_CLIP_DIST0) : 0; brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); /* Set the initial vertex source mask: The first 6 planes are the bounds * of the view volume; the next 8 planes are the user clipping planes. */ brw_MOV(p, c->reg.vertex_src_mask, brw_imm_ud(0x3fc0)); /* Set the initial clipdistance offset to be 6 floats before gl_ClipDistance[0]. * We'll increment 6 times before we start hitting actual user clipping. */ brw_MOV(p, c->reg.clipdistance_offset, brw_imm_d(clipdist0_offset - 6*sizeof(float))); brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) */ brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_IF(p, BRW_EXECUTE_1); { /* vtxOut = freelist_ptr++ */ brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); if (c->key.nr_userclip) brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); else brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); brw_DO(p, BRW_EXECUTE_1); { /* vtx = *input_ptr; */ brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); load_clip_distance(c, vtxPrev, c->reg.dpPrev, hpos_offset, BRW_CONDITIONAL_L); /* (prev < 0.0f) */ brw_IF(p, BRW_EXECUTE_1); { load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_GE); /* IS_POSITIVE(next) */ brw_IF(p, BRW_EXECUTE_1); { /* Coming back in. */ brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); /* If (vtxOut == 0) vtxOut = vtxPrev */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, false); /* *outlist_ptr++ = vtxOut; * nr_verts++; * vtxOut = 0; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); } brw_ENDIF(p); } brw_ELSE(p); { /* *outlist_ptr++ = vtxPrev; * nr_verts++; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); load_clip_distance(c, vtx, c->reg.dp, hpos_offset, BRW_CONDITIONAL_L); /* (next < 0.0f) */ brw_IF(p, BRW_EXECUTE_1); { /* Going out of bounds. Avoid division by zero as we * know dp != dpPrev from DIFFERENT_SIGNS, above. */ brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); brw_math_invert(p, c->reg.t, c->reg.t); brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); /* If (vtxOut == 0) vtxOut = vtx */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx)); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, true); /* *outlist_ptr++ = vtxOut; * nr_verts++; * vtxOut = 0; */ brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); } brw_ENDIF(p); } brw_ENDIF(p); /* vtxPrev = vtx; * inlist_ptr++; */ brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); /* while (--loopcount != 0) */ brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); } brw_WHILE(p); brw_inst_set_pred_control(p->devinfo, brw_last_inst, BRW_PREDICATE_NORMAL); /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] * inlist = outlist * inlist_ptr = &inlist[0] * outlist_ptr = &outlist[0] */ brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); } brw_ENDIF(p); /* plane_ptr++; */ brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); /* nr_verts >= 3 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_GE, c->reg.nr_verts, brw_imm_ud(3)); brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL); /* && (planemask>>=1) != 0 */ brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); brw_SHR(p, c->reg.vertex_src_mask, c->reg.vertex_src_mask, brw_imm_ud(1)); brw_ADD(p, c->reg.clipdistance_offset, c->reg.clipdistance_offset, brw_imm_w(sizeof(float))); } brw_WHILE(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); }
void vec4_generator::generate_code(const cfg_t *cfg) { struct annotation_info annotation; memset(&annotation, 0, sizeof(annotation)); foreach_block_and_inst (block, vec4_instruction, inst, cfg) { struct brw_reg src[3], dst; if (unlikely(debug_flag)) annotate(brw, &annotation, cfg, inst, p->next_insn_offset); for (unsigned int i = 0; i < 3; i++) { src[i] = inst->get_src(this->prog_data, i); } dst = inst->get_dst(); brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); brw_set_default_acc_write_control(p, inst->writes_accumulator); unsigned pre_emit_nr_insn = p->nr_insn; generate_vec4_instruction(inst, dst, src); if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) { assert(p->nr_insn == pre_emit_nr_insn + 1 || !"conditional_mod, no_dd_check, or no_dd_clear set for IR " "emitting more than 1 instruction"); brw_inst *last = &p->store[pre_emit_nr_insn]; brw_inst_set_cond_modifier(brw, last, inst->conditional_mod); brw_inst_set_no_dd_clear(brw, last, inst->no_dd_clear); brw_inst_set_no_dd_check(brw, last, inst->no_dd_check); } } brw_set_uip_jip(p); annotation_finalize(&annotation, p->next_insn_offset); int before_size = p->next_insn_offset; brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann); int after_size = p->next_insn_offset; if (unlikely(debug_flag)) { if (shader_prog) { fprintf(stderr, "Native code for %s vertex shader %d:\n", shader_prog->Label ? shader_prog->Label : "unnamed", shader_prog->Name); } else { fprintf(stderr, "Native code for vertex program %d:\n", prog->Id); } fprintf(stderr, "vec4 shader: %d instructions. Compacted %d to %d" " bytes (%.0f%%)\n", before_size / 16, before_size, after_size, 100.0f * (before_size - after_size) / before_size); dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog); ralloc_free(annotation.ann); } }
/** * Generate assembly for a Vec4 IR instruction. * * \param instruction The Vec4 IR instruction to generate code for. * \param dst The destination register. * \param src An array of up to three source registers. */ void vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, struct brw_reg dst, struct brw_reg *src) { vec4_instruction *inst = (vec4_instruction *) instruction; if (dst.width == BRW_WIDTH_4) { /* This happens in attribute fixups for "dual instanced" geometry * shaders, since they use attributes that are vec4's. Since the exec * width is only 4, it's essential that the caller set * force_writemask_all in order to make sure the instruction is executed * regardless of which channels are enabled. */ assert(inst->force_writemask_all); /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy * the following register region restrictions (from Graphics BSpec: * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions * > Register Region Restrictions) * * 1. ExecSize must be greater than or equal to Width. * * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set * to Width * HorzStride." */ for (int i = 0; i < 3; i++) { if (src[i].file == BRW_GENERAL_REGISTER_FILE) src[i] = stride(src[i], 4, 4, 1); } } switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: brw_ADD(p, dst, src[0], src[1]); break; case BRW_OPCODE_MUL: brw_MUL(p, dst, src[0], src[1]); break; case BRW_OPCODE_MACH: brw_MACH(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAD: assert(brw->gen >= 6); brw_MAD(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_FRC: brw_FRC(p, dst, src[0]); break; case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; case BRW_OPCODE_RNDE: brw_RNDE(p, dst, src[0]); break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; case BRW_OPCODE_AND: brw_AND(p, dst, src[0], src[1]); break; case BRW_OPCODE_OR: brw_OR(p, dst, src[0], src[1]); break; case BRW_OPCODE_XOR: brw_XOR(p, dst, src[0], src[1]); break; case BRW_OPCODE_NOT: brw_NOT(p, dst, src[0]); break; case BRW_OPCODE_ASR: brw_ASR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHR: brw_SHR(p, dst, src[0], src[1]); break; case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_DPH: brw_DPH(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP4: brw_DP4(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP3: brw_DP3(p, dst, src[0], src[1]); break; case BRW_OPCODE_DP2: brw_DP2(p, dst, src[0], src[1]); break; case BRW_OPCODE_F32TO16: assert(brw->gen >= 7); brw_F32TO16(p, dst, src[0]); break; case BRW_OPCODE_F16TO32: assert(brw->gen >= 7); brw_F16TO32(p, dst, src[0]); break; case BRW_OPCODE_LRP: assert(brw->gen >= 6); brw_LRP(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFREV: assert(brw->gen >= 7); /* BFREV only supports UD type for src and dst. */ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: assert(brw->gen >= 7); /* FBH only supports UD type for dst. */ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_FBL: assert(brw->gen >= 7); /* FBL only supports UD type for dst. */ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_CBIT: assert(brw->gen >= 7); /* CBIT only supports UD type for dst. */ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); break; case BRW_OPCODE_ADDC: assert(brw->gen >= 7); brw_ADDC(p, dst, src[0], src[1]); break; case BRW_OPCODE_SUBB: assert(brw->gen >= 7); brw_SUBB(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAC: brw_MAC(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFE: assert(brw->gen >= 7); brw_BFE(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFI1: assert(brw->gen >= 7); brw_BFI1(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFI2: assert(brw->gen >= 7); brw_BFI2(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_IF: if (inst->src[0].file != BAD_FILE) { /* The instruction has an embedded compare (only allowed on gen6) */ assert(brw->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8); brw_inst_set_pred_control(brw, if_inst, inst->predicate); } break; case BRW_OPCODE_ELSE: brw_ELSE(p); break; case BRW_OPCODE_ENDIF: brw_ENDIF(p); break; case BRW_OPCODE_DO: brw_DO(p, BRW_EXECUTE_8); break; case BRW_OPCODE_BREAK: brw_BREAK(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: brw_CONT(p); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_WHILE: brw_WHILE(p); break; case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: case SHADER_OPCODE_SQRT: case SHADER_OPCODE_EXP2: case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: if (brw->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], brw_null_reg()); } else if (brw->gen == 6) { generate_math_gen6(inst, dst, src[0], brw_null_reg()); } else { generate_math1_gen4(inst, dst, src[0]); } break; case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: if (brw->gen >= 7) { gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); } else if (brw->gen == 6) { generate_math_gen6(inst, dst, src[0], src[1]); } else { generate_math2_gen4(inst, dst, src[0], src[1]); } break; case SHADER_OPCODE_TEX: case SHADER_OPCODE_TXD: case SHADER_OPCODE_TXF: case SHADER_OPCODE_TXF_CMS: case SHADER_OPCODE_TXF_MCS: case SHADER_OPCODE_TXL: case SHADER_OPCODE_TXS: case SHADER_OPCODE_TG4: case SHADER_OPCODE_TG4_OFFSET: generate_tex(inst, dst, src[0], src[1]); break; case VS_OPCODE_URB_WRITE: generate_vs_urb_write(inst); break; case SHADER_OPCODE_GEN4_SCRATCH_READ: generate_scratch_read(inst, dst, src[0]); break; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: generate_scratch_write(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD: generate_pull_constant_load(inst, dst, src[0], src[1]); break; case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: generate_pull_constant_load_gen7(inst, dst, src[0], src[1]); break; case GS_OPCODE_URB_WRITE: generate_gs_urb_write(inst); break; case GS_OPCODE_THREAD_END: generate_gs_thread_end(inst); break; case GS_OPCODE_SET_WRITE_OFFSET: generate_gs_set_write_offset(dst, src[0], src[1]); break; case GS_OPCODE_SET_VERTEX_COUNT: generate_gs_set_vertex_count(dst, src[0]); break; case GS_OPCODE_SET_DWORD_2_IMMED: generate_gs_set_dword_2_immed(dst, src[0]); break; case GS_OPCODE_PREPARE_CHANNEL_MASKS: generate_gs_prepare_channel_masks(dst); break; case GS_OPCODE_SET_CHANNEL_MASKS: generate_gs_set_channel_masks(dst, src[0]); break; case GS_OPCODE_GET_INSTANCE_ID: generate_gs_get_instance_id(dst); break; case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], prog_data->base.binding_table.shader_time_start); brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.shader_time_start); break; case SHADER_OPCODE_UNTYPED_ATOMIC: generate_untyped_atomic(inst, dst, src[0], src[1]); break; case SHADER_OPCODE_UNTYPED_SURFACE_READ: generate_untyped_surface_read(inst, dst, src[0]); break; case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: generate_unpack_flags(inst, dst); break; default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n", opcode_descs[inst->opcode].name); } else { _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode); } abort(); } }
void vec4_generator::generate_scratch_write(vec4_instruction *inst, struct brw_reg dst, struct brw_reg src, struct brw_reg index) { struct brw_reg header = brw_vec8_grf(0, 0); bool write_commit; /* If the instruction is predicated, we'll predicate the send, not * the header setup. */ brw_set_default_predicate_control(p, false); gen6_resolve_implied_move(p, &header, inst->base_mrf); generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1), index); brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D), retype(src, BRW_REGISTER_TYPE_D)); uint32_t msg_type; if (brw->gen >= 7) msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; else if (brw->gen == 6) msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; else msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE; brw_set_default_predicate_control(p, inst->predicate); /* Pre-gen6, we have to specify write commits to ensure ordering * between reads and writes within a thread. Afterwards, that's * guaranteed and write commits only matter for inter-thread * synchronization. */ if (brw->gen >= 6) { write_commit = false; } else { /* The visitor set up our destination register to be g0. This * means that when the next read comes along, we will end up * reading from g0 and causing a block on the write commit. For * write-after-read, we are relying on the value of the previous * read being used (and thus blocking on completion) before our * write is executed. This means we have to be careful in * instruction scheduling to not violate this assumption. */ write_commit = true; } /* Each of the 8 channel enables is considered for whether each * dword is written. */ brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); brw_set_dest(p, send, dst); brw_set_src0(p, send, header); if (brw->gen < 6) brw_inst_set_cond_modifier(brw, send, inst->base_mrf); brw_set_dp_write_message(p, send, 255, /* binding table index: stateless access */ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, 3, /* mlen */ true, /* header present */ false, /* not a render target write */ write_commit, /* rlen */ false, /* eot */ write_commit); }