/** * Emit a typed surface atomic opcode. \p dims determines the number of * components of the address and \p rsize the number of components of * the returned value (either zero or one). */ src_reg emit_typed_atomic(const vec4_builder &bld, const src_reg &surface, const src_reg &addr, const src_reg &src0, const src_reg &src1, unsigned dims, unsigned rsize, unsigned op, brw_predicate pred) { const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || bld.shader->devinfo->is_haswell); /* Zip the components of both sources, they are represented as the X * and Y components of the same vector. */ const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); if (size >= 1) bld.MOV(writemask(srcs, WRITEMASK_X), src0); if (size >= 2) bld.MOV(writemask(srcs, WRITEMASK_Y), src1); return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC, emit_typed_message_header(bld), emit_insert(bld, addr, dims, has_simd4x2), has_simd4x2 ? 1 : dims, emit_insert(bld, src_reg(srcs), size, has_simd4x2), has_simd4x2 ? 1 : size, surface, op, rsize, pred); }
void vec4_vs_visitor::emit_prolog() { dst_reg sign_recovery_shift; dst_reg normalize_factor; dst_reg es3_normalize_factor; for (int i = 0; i < VERT_ATTRIB_MAX; i++) { if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) { uint8_t wa_flags = key->gl_attrib_wa_flags[i]; dst_reg reg(ATTR, i); dst_reg reg_d = reg; reg_d.type = BRW_REGISTER_TYPE_D; dst_reg reg_ud = reg; reg_ud.type = BRW_REGISTER_TYPE_UD; /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes * come in as floating point conversions of the integer values. */ if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) { dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1; emit(MUL(dst, src_reg(dst), src_reg(1.0f / 65536.0f))); } /* Do sign recovery for 2101010 formats if required. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { if (sign_recovery_shift.file == BAD_FILE) { /* shift constant: <22,22,22,30> */ sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type); emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), src_reg(22u))); emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), src_reg(30u))); } emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift))); emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift))); } /* Apply BGRA swizzle if required. */ if (wa_flags & BRW_ATTRIB_WA_BGRA) { src_reg temp = src_reg(reg); temp.swizzle = BRW_SWIZZLE4(2,1,0,3); emit(MOV(reg, temp)); } if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) { /* ES 3.0 has different rules for converting signed normalized * fixed-point numbers than desktop GL. */ if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) { /* According to equation 2.2 of the ES 3.0 specification, * signed normalization conversion is done by: * * f = c / (2^(b-1)-1) */ if (es3_normalize_factor.file == BAD_FILE) { /* mul constant: 1 / (2^(b-1) - 1) */ es3_normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ), src_reg(1.0f / ((1<<9) - 1)))); emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W), src_reg(1.0f / ((1<<1) - 1)))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg(reg_d))); emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor))); emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), src_reg(-1.0f)); } else { /* The following equations are from the OpenGL 3.2 specification: * * 2.1 unsigned normalization * f = c/(2^n-1) * * 2.2 signed normalization * f = (2c+1)/(2^n-1) * * Both of these share a common divisor, which is represented by * "normalize_factor" in the code below. */ if (normalize_factor.file == BAD_FILE) { /* 1 / (2^b - 1) for b=<10,10,10,2> */ normalize_factor = dst_reg(this, glsl_type::vec4_type); emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ), src_reg(1.0f / ((1<<10) - 1)))); emit(MOV(writemask(normalize_factor, WRITEMASK_W), src_reg(1.0f / ((1<<2) - 1)))); } dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud))); /* For signed normalization, we want the numerator to be 2c+1. */ if (wa_flags & BRW_ATTRIB_WA_SIGN) { emit(MUL(dst, src_reg(dst), src_reg(2.0f))); emit(ADD(dst, src_reg(dst), src_reg(1.0f))); } emit(MUL(dst, src_reg(dst), src_reg(normalize_factor))); } } if (wa_flags & BRW_ATTRIB_WA_SCALE) { dst_reg dst = reg; dst.type = brw_type_for_base_type(glsl_type::vec4_type); emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud))); } } } }
magnify () { struct pixrect *prr, *prw, *prc; RCOLORS * rcolors; WCOLORS * wcolors; WCOLORS * ccolors; int x1, x2, y1, y2; int last_x1, last_x2; int last_y1, last_y2; int x, y; int i, j, k, l; prr = mem_create ( 40, 40, 8 ); prw = mem_create ( 200, 200, 8 ); prc = mem_create ( 200, 200, 8 ); rcolors = (RCOLORS *)mpr_d(prr)->md_image; wcolors = (WCOLORS *)mpr_d(prw)->md_image; ccolors = (WCOLORS *)mpr_d(prw)->md_image; for ( i = 0; i < 200; i++ ) for ( j = 0; j < 200; j++ ) { wcolors[i][j] = ccolors[i][j] = 8; } for ( i = (1<<3); i < (1<<3)+8; i++ ) mapcolor ( i, 0, 0, 0 ); for ( i = (2<<3); i < (2<<3)+8; i++ ) mapcolor ( i, 255, 0, 0 ); for ( i = (3<<3); i < (3<<3)+8; i++ ) mapcolor ( i, 0, 255, 0 ); for ( i = (4<<3); i < (4<<3)+8; i++ ) mapcolor ( i, 255, 255, 0 ); for ( i = (5<<3); i < (5<<3)+8; i++ ) mapcolor ( i, 0, 0, 255 ); for ( i = (6<<3); i < (6<<3)+8; i++ ) mapcolor ( i, 255, 0, 255 ); for ( i = (7<<3); i < (7<<3)+8; i++ ) mapcolor ( i, 0, 255, 255 ); for ( i = (8<<3); i < (8<<3)+8; i++ ) mapcolor ( i, 255, 255, 255 ); pw_putcolormap ( pw, 0, 64, red, green, blue ); writemask ( 7 << 3 ); last_x1 = last_x2 = 0; last_y1 = last_y2 = 0; for ( ;; ) { /* for ( dev = 0; dev == 0; ) { */ notify_dispatch (); x = dev_x; y = dev_y; if ( x < 20 || x > (width-20) ) continue; if ( y < 20 || y > (height-20) ) continue; x1 = x - 20; x2 = x + 20; y1 = y - 20; y2 = y + 20; if ( x1 == last_x1 && y1 == last_y1 ) continue; color = 0; recti ( last_x1, last_y1, last_x1+40, last_y1+40 ); color = 16; recti ( x1, y1, x2, y2 ); last_x1 = x1; last_y1 = y1; /* } */ if ( dev == MS_LEFT ) break; pw_write ( pw, last_x2, last_y2, 200, 200, PIX_SRC, prc, 0, 0 ); pw_read ( prr, 0, 0, 40, 40, PIX_SRC, pw, x-20, height-y-20 ); recti ( x1, y1, x2, y2 ); for ( i = 0; i < 40; i++ ) { for ( j = 0; j < 40; j++ ) { for ( k = 1; k <= 3; k++ ) for ( l = 0; l <= 4; l++ ) { wcolors[i*5+k][j*5+l] = ((rcolors[i][j]&7)+1) << 3; } } } x1 = x + 60; if ( x1 > (width-200) ) x1 = x - 260; x2 = x1 + 199; y1 = y - 100; if ( y1 < 0 ) y1 = 0; if ( y1 > (height-200) ) y1 = height-200; y2 = y1 + 199; pw_write ( pw, x1, height - y2, 200, 200, PIX_SRC, prw, 0, 0 ); color = 16; recti ( x1, y1, x2, y2 ); last_x2 = x1; last_y2 = height - y2; } pr_destroy ( prr ); pr_destroy ( prw ); pr_destroy ( prc ); clear (); writemask ( -1 ); }
/** * Process a PS input declaration. * We'll emit a declaration like "dcl_texcoord1 v2" */ static boolean ps30_input(struct svga_shader_emitter *emit, struct tgsi_declaration_semantic semantic, unsigned idx) { unsigned usage, index; SVGA3dShaderDestToken reg; if (semantic.Name == TGSI_SEMANTIC_POSITION) { emit->ps_true_pos = src_register( SVGA3DREG_MISCTYPE, SVGA3DMISCREG_POSITION ); emit->ps_true_pos.base.swizzle = TRANSLATE_SWIZZLE( TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y ); reg = writemask( dst(emit->ps_true_pos), TGSI_WRITEMASK_XY ); emit->ps_reads_pos = TRUE; if (emit->info.reads_z) { emit->ps_temp_pos = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp ); emit->input_map[idx] = src_register( SVGA3DREG_TEMP, emit->nr_hw_temp ); emit->nr_hw_temp++; if (!ps30_input_emit_depth_fog( emit, &emit->ps_depth_pos )) return FALSE; emit->ps_depth_pos.base.swizzle = TRANSLATE_SWIZZLE( TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W ); } else { emit->input_map[idx] = emit->ps_true_pos; } return emit_decl( emit, reg, 0, 0 ); } else if (emit->key.fs.light_twoside && (semantic.Name == TGSI_SEMANTIC_COLOR)) { if (!translate_vs_ps_semantic( emit, semantic, &usage, &index )) return FALSE; emit->internal_color_idx[emit->internal_color_count] = idx; emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count ); emit->ps30_input_count++; emit->internal_color_count++; reg = dst( emit->input_map[idx] ); if (!emit_decl( emit, reg, usage, index )) return FALSE; semantic.Name = TGSI_SEMANTIC_BCOLOR; if (!translate_vs_ps_semantic( emit, semantic, &usage, &index )) return FALSE; if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX) return FALSE; reg = dst_register( SVGA3DREG_INPUT, emit->ps30_input_count++ ); if (!emit_decl( emit, reg, usage, index )) return FALSE; if (!emit_vface_decl( emit )) return FALSE; return TRUE; } else if (semantic.Name == TGSI_SEMANTIC_FACE) { if (!emit_vface_decl( emit )) return FALSE; emit->emit_frontface = TRUE; emit->internal_frontface_idx = idx; return TRUE; } else if (semantic.Name == TGSI_SEMANTIC_FOG) { assert(semantic.Index == 0); if (!ps30_input_emit_depth_fog( emit, &emit->input_map[idx] )) return FALSE; emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE( TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X ); return TRUE; } else { if (!translate_vs_ps_semantic( emit, semantic, &usage, &index )) return FALSE; if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX) return FALSE; emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count++ ); reg = dst( emit->input_map[idx] ); if (!emit_decl( emit, reg, usage, index )) return FALSE; if (semantic.Name == TGSI_SEMANTIC_GENERIC && emit->key.sprite_origin_lower_left && index >= 1 && emit->key.tex[index - 1].sprite_texgen) { /* This is a sprite texture coord with lower-left origin. * We need to invert the texture T coordinate since the SVGA3D * device only supports an upper-left origin. */ unsigned unit = index - 1; emit->inverted_texcoords |= (1 << unit); /* save original texcoord reg */ emit->ps_true_texcoord[unit] = emit->input_map[idx]; /* this temp register will be the results of the MAD instruction */ emit->ps_inverted_texcoord[unit] = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); emit->nr_hw_temp++; emit->ps_inverted_texcoord_input[unit] = idx; /* replace input_map entry with the temp register */ emit->input_map[idx] = emit->ps_inverted_texcoord[unit]; } return TRUE; } }
void vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) { switch (instr->intrinsic) { case nir_intrinsic_load_invocation_id: emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD), invocation_id)); break; case nir_intrinsic_load_primitive_id: emit(TCS_OPCODE_GET_PRIMITIVE_ID, get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); break; case nir_intrinsic_load_patch_vertices_in: emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), brw_imm_d(key->input_vertices))); break; case nir_intrinsic_load_per_vertex_input: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; nir_const_value *vertex_const = nir_src_as_const_value(instr->src[0]); src_reg vertex_index = vertex_const ? src_reg(brw_imm_ud(vertex_const->u32[0])) : get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1); dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); emit_input_urb_read(dst, vertex_index, imm_offset, nir_intrinsic_component(instr), indirect_offset); break; } case nir_intrinsic_load_input: unreachable("nir_lower_io should use load_per_vertex_input intrinsics"); break; case nir_intrinsic_load_output: case nir_intrinsic_load_per_vertex_output: { src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); dst.writemask = brw_writemask_for_size(instr->num_components); if (imm_offset == 0 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; /* This is a read of gl_TessLevelInner[], which lives in the * Patch URB header. The layout depends on the domain. */ switch (key->tes_primitive_mode) { case GL_QUADS: { /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */ dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 0, 0, src_reg()); emit(MOV(writemask(dst, WRITEMASK_XY), swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX))); break; } case GL_TRIANGLES: /* DWord 4; use offset 1 but normal swizzle/writemask. */ emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0, src_reg()); break; case GL_ISOLINES: /* All channels are undefined. */ return; default: unreachable("Bogus tessellation domain"); } } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) { dst.type = BRW_REGISTER_TYPE_F; unsigned swiz = BRW_SWIZZLE_WZYX; /* This is a read of gl_TessLevelOuter[], which lives in the * high 4 DWords of the Patch URB header, in reverse order. */ switch (key->tes_primitive_mode) { case GL_QUADS: dst.writemask = WRITEMASK_XYZW; break; case GL_TRIANGLES: dst.writemask = WRITEMASK_XYZ; break; case GL_ISOLINES: /* Isolines are not reversed; swizzle .zw -> .xy */ swiz = BRW_SWIZZLE_ZWZW; dst.writemask = WRITEMASK_XY; return; default: unreachable("Bogus tessellation domain"); } dst_reg tmp(this, glsl_type::vec4_type); emit_output_urb_read(tmp, 1, 0, src_reg()); emit(MOV(dst, swizzle(src_reg(tmp), swiz))); } else { emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr), indirect_offset); } break; } case nir_intrinsic_store_output: case nir_intrinsic_store_per_vertex_output: { src_reg value = get_nir_src(instr->src[0]); unsigned mask = instr->const_index[1]; unsigned swiz = BRW_SWIZZLE_XYZW; src_reg indirect_offset = get_indirect_offset(instr); unsigned imm_offset = instr->const_index[0]; /* The passthrough shader writes the whole patch header as two vec4s; * skip all the gl_TessLevelInner/Outer swizzling. */ if (indirect_offset.file == BAD_FILE && !is_passthrough_shader) { if (imm_offset == 0) { value.type = BRW_REGISTER_TYPE_F; mask &= (1 << tesslevel_inner_components(key->tes_primitive_mode)) - 1; /* This is a write to gl_TessLevelInner[], which lives in the * Patch URB header. The layout depends on the domain. */ switch (key->tes_primitive_mode) { case GL_QUADS: /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed). * We use an XXYX swizzle to reverse put .xy in the .wz * channels, and use a .zw writemask. */ swiz = BRW_SWIZZLE4(0, 0, 1, 0); mask = writemask_for_backwards_vector(mask); break; case GL_TRIANGLES: /* gl_TessLevelInner[].x lives at DWord 4, so we set the * writemask to X and bump the URB offset by 1. */ imm_offset = 1; break; case GL_ISOLINES: /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */ return; default: unreachable("Bogus tessellation domain"); } } else if (imm_offset == 1) { value.type = BRW_REGISTER_TYPE_F; mask &= (1 << tesslevel_outer_components(key->tes_primitive_mode)) - 1; /* This is a write to gl_TessLevelOuter[] which lives in the * Patch URB Header at DWords 4-7. However, it's reversed, so * instead of .xyzw we have .wzyx. */ if (key->tes_primitive_mode == GL_ISOLINES) { /* Isolines .xy should be stored in .zw, in order. */ swiz = BRW_SWIZZLE4(0, 0, 0, 1); mask <<= 2; } else { /* Other domains are reversed; store .wzyx instead of .xyzw. */ swiz = BRW_SWIZZLE_WZYX; mask = writemask_for_backwards_vector(mask); } } } unsigned first_component = nir_intrinsic_component(instr); if (first_component) { assert(swiz == BRW_SWIZZLE_XYZW); swiz = BRW_SWZ_COMP_OUTPUT(first_component); mask = mask << first_component; } emit_urb_write(swizzle(value, swiz), mask, imm_offset, indirect_offset); break; } case nir_intrinsic_barrier: { dst_reg header = dst_reg(this, glsl_type::uvec4_type); emit(TCS_OPCODE_CREATE_BARRIER_HEADER, header); emit(SHADER_OPCODE_BARRIER, dst_null_ud(), src_reg(header)); break; } default: vec4_visitor::nir_emit_intrinsic(instr); } }