/** Emits the interpolation for the varying inputs. */ void fs_visitor::emit_interpolation_setup_gen6() { struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); fs_builder abld = bld.annotate("compute pixel centers"); if (devinfo->gen >= 8 || dispatch_width == 8) { /* The "Register Region Restrictions" page says for BDW (and newer, * presumably): * * "When destination spans two registers, the source may be one or * two registers. The destination elements must be evenly split * between the two registers." * * Thus we can do a single add(16) in SIMD8 or an add(32) in SIMD16 to * compute our pixel centers. */ fs_reg int_pixel_xy(VGRF, alloc.allocate(dispatch_width / 8), BRW_REGISTER_TYPE_UW); const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0); dbld.ADD(int_pixel_xy, fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)), fs_reg(brw_imm_v(0x11001010))); this->pixel_x = vgrf(glsl_type::float_type); this->pixel_y = vgrf(glsl_type::float_type); abld.emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy); abld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy); } else { /* The "Register Region Restrictions" page says for SNB, IVB, HSW: * * "When destination spans two registers, the source MUST span two * registers." * * Since the GRF source of the ADD will only read a single register, we * must do two separate ADDs in SIMD16. */ fs_reg int_pixel_x = vgrf(glsl_type::uint_type); fs_reg int_pixel_y = vgrf(glsl_type::uint_type); int_pixel_x.type = BRW_REGISTER_TYPE_UW; int_pixel_y.type = BRW_REGISTER_TYPE_UW; abld.ADD(int_pixel_x, fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), fs_reg(brw_imm_v(0x10101010))); abld.ADD(int_pixel_y, fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), fs_reg(brw_imm_v(0x11001100))); /* As of gen6, we can no longer mix float and int sources. We have * to turn the integer pixel centers into floats for their actual * use. */ this->pixel_x = vgrf(glsl_type::float_type); this->pixel_y = vgrf(glsl_type::float_type); abld.MOV(this->pixel_x, int_pixel_x); abld.MOV(this->pixel_y, int_pixel_y); } abld = bld.annotate("compute pos.w"); this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0)); this->wpos_w = vgrf(glsl_type::float_type); abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) { uint8_t reg = payload.barycentric_coord_reg[i]; this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0)); } }
/** Emits the interpolation for the varying inputs. */ void fs_visitor::emit_interpolation_setup_gen6() { struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); fs_builder abld = bld.annotate("compute pixel centers"); if (devinfo->gen >= 8 || dispatch_width == 8) { /* The "Register Region Restrictions" page says for BDW (and newer, * presumably): * * "When destination spans two registers, the source may be one or * two registers. The destination elements must be evenly split * between the two registers." * * Thus we can do a single add(16) in SIMD8 or an add(32) in SIMD16 to * compute our pixel centers. */ fs_reg int_pixel_xy(VGRF, alloc.allocate(dispatch_width / 8), BRW_REGISTER_TYPE_UW); const fs_builder dbld = abld.exec_all().group(dispatch_width * 2, 0); dbld.ADD(int_pixel_xy, fs_reg(stride(suboffset(g1_uw, 4), 1, 4, 0)), fs_reg(brw_imm_v(0x11001010))); this->pixel_x = vgrf(glsl_type::float_type); this->pixel_y = vgrf(glsl_type::float_type); abld.emit(FS_OPCODE_PIXEL_X, this->pixel_x, int_pixel_xy); abld.emit(FS_OPCODE_PIXEL_Y, this->pixel_y, int_pixel_xy); } else { /* The "Register Region Restrictions" page says for SNB, IVB, HSW: * * "When destination spans two registers, the source MUST span two * registers." * * Since the GRF source of the ADD will only read a single register, we * must do two separate ADDs in SIMD16. */ fs_reg int_pixel_x = vgrf(glsl_type::uint_type); fs_reg int_pixel_y = vgrf(glsl_type::uint_type); int_pixel_x.type = BRW_REGISTER_TYPE_UW; int_pixel_y.type = BRW_REGISTER_TYPE_UW; abld.ADD(int_pixel_x, fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)), fs_reg(brw_imm_v(0x10101010))); abld.ADD(int_pixel_y, fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)), fs_reg(brw_imm_v(0x11001100))); /* As of gen6, we can no longer mix float and int sources. We have * to turn the integer pixel centers into floats for their actual * use. */ this->pixel_x = vgrf(glsl_type::float_type); this->pixel_y = vgrf(glsl_type::float_type); abld.MOV(this->pixel_x, int_pixel_x); abld.MOV(this->pixel_y, int_pixel_y); } abld = bld.annotate("compute pos.w"); this->pixel_w = fs_reg(brw_vec8_grf(payload.source_w_reg, 0)); this->wpos_w = vgrf(glsl_type::float_type); abld.emit(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(prog_data); uint32_t centroid_modes = wm_prog_data->barycentric_interp_modes & (1 << BRW_BARYCENTRIC_PERSPECTIVE_CENTROID | 1 << BRW_BARYCENTRIC_NONPERSPECTIVE_CENTROID); for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) { uint8_t reg = payload.barycentric_coord_reg[i]; this->delta_xy[i] = fs_reg(brw_vec16_grf(reg, 0)); if (devinfo->needs_unlit_centroid_workaround && (centroid_modes & (1 << i))) { /* Get the pixel/sample mask into f0 so that we know which * pixels are lit. Then, for each channel that is unlit, * replace the centroid data with non-centroid data. */ bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); uint8_t pixel_reg = payload.barycentric_coord_reg[i - 1]; set_predicate_inv(BRW_PREDICATE_NORMAL, true, bld.half(0).MOV(brw_vec8_grf(reg, 0), brw_vec8_grf(pixel_reg, 0))); set_predicate_inv(BRW_PREDICATE_NORMAL, true, bld.half(0).MOV(brw_vec8_grf(reg + 1, 0), brw_vec8_grf(pixel_reg + 1, 0))); if (dispatch_width == 16) { set_predicate_inv(BRW_PREDICATE_NORMAL, true, bld.half(1).MOV(brw_vec8_grf(reg + 2, 0), brw_vec8_grf(pixel_reg + 2, 0))); set_predicate_inv(BRW_PREDICATE_NORMAL, true, bld.half(1).MOV(brw_vec8_grf(reg + 3, 0), brw_vec8_grf(pixel_reg + 3, 0))); } assert(dispatch_width != 32); /* not implemented yet */ } } }