void * util_make_geometry_passthrough_shader(struct pipe_context *pipe, uint num_attribs, const ubyte *semantic_names, const ubyte *semantic_indexes) { static const unsigned zero[4] = {0, 0, 0, 0}; struct ureg_program *ureg; struct ureg_dst dst[PIPE_MAX_SHADER_OUTPUTS]; struct ureg_src src[PIPE_MAX_SHADER_INPUTS]; struct ureg_src imm; unsigned i; ureg = ureg_create(PIPE_SHADER_GEOMETRY); if (!ureg) return NULL; ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_POINTS); ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_POINTS); ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 1); ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, 1); imm = ureg_DECL_immediate_uint(ureg, zero, 4); /** * Loop over all the attribs and declare the corresponding * declarations in the geometry shader */ for (i = 0; i < num_attribs; i++) { src[i] = ureg_DECL_input(ureg, semantic_names[i], semantic_indexes[i], 0, 1); src[i] = ureg_src_dimension(src[i], 0); dst[i] = ureg_DECL_output(ureg, semantic_names[i], semantic_indexes[i]); } /* MOV dst[i] src[i] */ for (i = 0; i < num_attribs; i++) { ureg_MOV(ureg, dst[i], src[i]); } /* EMIT IMM[0] */ ureg_insn(ureg, TGSI_OPCODE_EMIT, NULL, 0, &imm, 1, 0); /* END */ ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); }
void * util_make_vertex_passthrough_shader_with_so(struct pipe_context *pipe, uint num_attribs, const uint *semantic_names, const uint *semantic_indexes, bool window_space, const struct pipe_stream_output_info *so) { struct ureg_program *ureg; uint i; ureg = ureg_create( PIPE_SHADER_VERTEX ); if (!ureg) return NULL; if (window_space) ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); for (i = 0; i < num_attribs; i++) { struct ureg_src src; struct ureg_dst dst; src = ureg_DECL_vs_input( ureg, i ); dst = ureg_DECL_output( ureg, semantic_names[i], semantic_indexes[i]); ureg_MOV( ureg, dst, src ); } ureg_END( ureg ); return ureg_create_shader_with_so_and_destroy( ureg, pipe, so ); }
void * st_pbo_create_gs(struct st_context *st) { static const int zero = 0; struct ureg_program *ureg; struct ureg_dst out_pos; struct ureg_dst out_layer; struct ureg_src in_pos; struct ureg_src imm; unsigned i; ureg = ureg_create(PIPE_SHADER_GEOMETRY); if (!ureg) return NULL; ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES); ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP); ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, 3); out_pos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); out_layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); in_pos = ureg_DECL_input(ureg, TGSI_SEMANTIC_POSITION, 0, 0, 1); imm = ureg_DECL_immediate_int(ureg, &zero, 1); for (i = 0; i < 3; ++i) { struct ureg_src in_pos_vertex = ureg_src_dimension(in_pos, i); /* out_pos = in_pos[i] */ ureg_MOV(ureg, out_pos, in_pos_vertex); /* out_layer.x = f2i(in_pos[i].z) */ ureg_F2I(ureg, ureg_writemask(out_layer, TGSI_WRITEMASK_X), ureg_scalar(in_pos_vertex, TGSI_SWIZZLE_Z)); ureg_EMIT(ureg, ureg_scalar(imm, TGSI_SWIZZLE_X)); } ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, st->pipe); }
/** * Emit fragment position/coordinate code. */ static void emit_wpos(struct st_context *st, struct st_translate *t, const struct gl_program *program, struct ureg_program *ureg) { const struct gl_fragment_program *fp = (const struct gl_fragment_program *) program; struct pipe_screen *pscreen = st->pipe->screen; GLfloat adjX = 0.0f; GLfloat adjY[2] = { 0.0f, 0.0f }; boolean invert = FALSE; /* Query the pixel center conventions supported by the pipe driver and set * adjX, adjY to help out if it cannot handle the requested one internally. * * The bias of the y-coordinate depends on whether y-inversion takes place * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are * drawing to an FBO (causes additional inversion), and whether the the pipe * driver origin and the requested origin differ (the latter condition is * stored in the 'invert' variable). * * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): * * center shift only: * i -> h: +0.5 * h -> i: -0.5 * * inversion only: * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 * * inversion and center shift: * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 */ if (fp->OriginUpperLeft) { /* Fragment shader wants origin in upper-left */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { /* the driver supports upper-left origin */ } else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { /* the driver supports lower-left origin, need to invert Y */ ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); invert = TRUE; } else assert(0); } else { /* Fragment shader wants origin in lower-left */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) /* the driver supports lower-left origin */ ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) /* the driver supports upper-left origin, need to invert Y */ invert = TRUE; else assert(0); } if (fp->PixelCenterInteger) { /* Fragment shader wants pixel center integer */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer */ adjY[1] = 1.0f; ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); } else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { /* the driver supports pixel center half integer, need to bias X,Y */ adjX = -0.5f; adjY[0] = -0.5f; adjY[1] = 0.5f; } else assert(0); } else { /* Fragment shader wants pixel center half integer */ if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { /* the driver supports pixel center half integer */ } else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { /* the driver supports pixel center integer, need to bias X,Y */ adjX = adjY[0] = adjY[1] = 0.5f; ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); } else assert(0); } /* we invert after adjustment so that we avoid the MOV to temporary, * and reuse the adjustment ADD instead */ emit_wpos_adjustment(t, program, invert, adjX, adjY); }
/** * Translate a geometry program to create a new variant. */ static struct st_gp_variant * st_translate_geometry_program(struct st_context *st, struct st_geometry_program *stgp, const struct st_gp_variant_key *key) { GLuint inputMapping[VARYING_SLOT_MAX]; GLuint outputMapping[VARYING_SLOT_MAX]; struct pipe_context *pipe = st->pipe; GLuint attr; uint gs_num_inputs = 0; ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte gs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; ubyte gs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; uint gs_num_outputs = 0; GLint i; struct ureg_program *ureg; struct pipe_shader_state state = {0}; struct st_gp_variant *gpv; gpv = CALLOC_STRUCT(st_gp_variant); if (!gpv) return NULL; ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY); if (ureg == NULL) { free(gpv); return NULL; } memset(inputMapping, 0, sizeof(inputMapping)); memset(outputMapping, 0, sizeof(outputMapping)); /* * Convert Mesa program inputs to TGSI input register semantics. */ for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { if ((stgp->Base.Base.InputsRead & BITFIELD64_BIT(attr)) != 0) { const GLuint slot = gs_num_inputs++; inputMapping[attr] = slot; switch (attr) { case VARYING_SLOT_PRIMITIVE_ID: input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID; input_semantic_index[slot] = 0; break; case VARYING_SLOT_POS: input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; input_semantic_index[slot] = 0; break; case VARYING_SLOT_COL0: input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; input_semantic_index[slot] = 0; break; case VARYING_SLOT_COL1: input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; input_semantic_index[slot] = 1; break; case VARYING_SLOT_FOGC: input_semantic_name[slot] = TGSI_SEMANTIC_FOG; input_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_VERTEX: input_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX; input_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_DIST0: input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; input_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_DIST1: input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; input_semantic_index[slot] = 1; break; case VARYING_SLOT_PSIZ: input_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; input_semantic_index[slot] = 0; break; case VARYING_SLOT_TEX0: case VARYING_SLOT_TEX1: case VARYING_SLOT_TEX2: case VARYING_SLOT_TEX3: case VARYING_SLOT_TEX4: case VARYING_SLOT_TEX5: case VARYING_SLOT_TEX6: case VARYING_SLOT_TEX7: if (st->needs_texcoord_semantic) { input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; input_semantic_index[slot] = attr - VARYING_SLOT_TEX0; break; } /* fall through */ case VARYING_SLOT_VAR0: default: assert(attr >= VARYING_SLOT_VAR0 && attr < VARYING_SLOT_MAX); input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; input_semantic_index[slot] = st_get_generic_varying_index(st, attr); break; } } } /* initialize output semantics to defaults */ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { gs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC; gs_output_semantic_index[i] = 0; } /* * Determine number of outputs, the (default) output register * mapping and the semantic information for each output. */ for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { if (stgp->Base.Base.OutputsWritten & BITFIELD64_BIT(attr)) { GLuint slot = gs_num_outputs++; outputMapping[attr] = slot; switch (attr) { case VARYING_SLOT_POS: assert(slot == 0); gs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_COL0: gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_COL1: gs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; gs_output_semantic_index[slot] = 1; break; case VARYING_SLOT_BFC0: gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_BFC1: gs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; gs_output_semantic_index[slot] = 1; break; case VARYING_SLOT_FOGC: gs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_PSIZ: gs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_VERTEX: gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPVERTEX; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_DIST0: gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_CLIP_DIST1: gs_output_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; gs_output_semantic_index[slot] = 1; break; case VARYING_SLOT_LAYER: gs_output_semantic_name[slot] = TGSI_SEMANTIC_LAYER; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_PRIMITIVE_ID: gs_output_semantic_name[slot] = TGSI_SEMANTIC_PRIMID; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_VIEWPORT: gs_output_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX; gs_output_semantic_index[slot] = 0; break; case VARYING_SLOT_TEX0: case VARYING_SLOT_TEX1: case VARYING_SLOT_TEX2: case VARYING_SLOT_TEX3: case VARYING_SLOT_TEX4: case VARYING_SLOT_TEX5: case VARYING_SLOT_TEX6: case VARYING_SLOT_TEX7: if (st->needs_texcoord_semantic) { gs_output_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; gs_output_semantic_index[slot] = attr - VARYING_SLOT_TEX0; break; } /* fall through */ case VARYING_SLOT_VAR0: default: assert(slot < ARRAY_SIZE(gs_output_semantic_name)); assert(attr >= VARYING_SLOT_VAR0); gs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; gs_output_semantic_index[slot] = st_get_generic_varying_index(st, attr); break; } } } ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM, stgp->Base.InputType); ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, stgp->Base.OutputType); ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, stgp->Base.VerticesOut); ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS, stgp->Base.Invocations); st_translate_program(st->ctx, TGSI_PROCESSOR_GEOMETRY, ureg, stgp->glsl_to_tgsi, &stgp->Base.Base, /* inputs */ gs_num_inputs, inputMapping, input_semantic_name, input_semantic_index, NULL, NULL, /* outputs */ gs_num_outputs, outputMapping, gs_output_semantic_name, gs_output_semantic_index, FALSE, FALSE); state.tokens = ureg_get_tokens(ureg, NULL); ureg_destroy(ureg); st_translate_stream_output_info(stgp->glsl_to_tgsi, outputMapping, &state.stream_output); if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) { _mesa_print_program(&stgp->Base.Base); debug_printf("\n"); } if (ST_DEBUG & DEBUG_TGSI) { tgsi_dump(state.tokens, 0); debug_printf("\n"); } /* fill in new variant */ gpv->driver_shader = pipe->create_gs_state(pipe, &state); gpv->key = *key; ureg_free_tokens(state.tokens); return gpv; }
/** * Translate a Mesa fragment shader into a TGSI shader using extra info in * the key. * \return new fragment program variant */ static struct st_fp_variant * st_translate_fragment_program(struct st_context *st, struct st_fragment_program *stfp, const struct st_fp_variant_key *key) { struct pipe_context *pipe = st->pipe; struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant); GLboolean deleteFP = GL_FALSE; GLuint outputMapping[FRAG_RESULT_MAX]; GLuint inputMapping[VARYING_SLOT_MAX]; GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ GLuint interpLocation[PIPE_MAX_SHADER_INPUTS]; GLuint attr; GLbitfield64 inputsRead; struct ureg_program *ureg; GLboolean write_all = GL_FALSE; ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; uint fs_num_inputs = 0; ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; uint fs_num_outputs = 0; if (!variant) return NULL; assert(!(key->bitmap && key->drawpixels)); if (key->bitmap) { /* glBitmap drawing */ struct gl_fragment_program *fp; /* we free this temp program below */ st_make_bitmap_fragment_program(st, &stfp->Base, &fp, &variant->bitmap_sampler); variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters); stfp = st_fragment_program(fp); deleteFP = GL_TRUE; } else if (key->drawpixels) { /* glDrawPixels drawing */ struct gl_fragment_program *fp; /* we free this temp program below */ if (key->drawpixels_z || key->drawpixels_stencil) { fp = st_make_drawpix_z_stencil_program(st, key->drawpixels_z, key->drawpixels_stencil); } else { /* RGBA */ st_make_drawpix_fragment_program(st, &stfp->Base, &fp); variant->parameters = _mesa_clone_parameter_list(fp->Base.Parameters); deleteFP = GL_TRUE; } stfp = st_fragment_program(fp); } if (!stfp->glsl_to_tgsi) _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT); /* * Convert Mesa program inputs to TGSI input register semantics. */ inputsRead = stfp->Base.Base.InputsRead; for (attr = 0; attr < VARYING_SLOT_MAX; attr++) { if ((inputsRead & BITFIELD64_BIT(attr)) != 0) { const GLuint slot = fs_num_inputs++; inputMapping[attr] = slot; if (stfp->Base.IsCentroid & BITFIELD64_BIT(attr)) interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTROID; else if (stfp->Base.IsSample & BITFIELD64_BIT(attr)) interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE; else interpLocation[slot] = TGSI_INTERPOLATE_LOC_CENTER; if (key->persample_shading) interpLocation[slot] = TGSI_INTERPOLATE_LOC_SAMPLE; switch (attr) { case VARYING_SLOT_POS: input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_LINEAR; break; case VARYING_SLOT_COL0: input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; input_semantic_index[slot] = 0; interpMode[slot] = st_translate_interp(stfp->Base.InterpQualifier[attr], TRUE); break; case VARYING_SLOT_COL1: input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; input_semantic_index[slot] = 1; interpMode[slot] = st_translate_interp(stfp->Base.InterpQualifier[attr], TRUE); break; case VARYING_SLOT_FOGC: input_semantic_name[slot] = TGSI_SEMANTIC_FOG; input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; break; case VARYING_SLOT_FACE: input_semantic_name[slot] = TGSI_SEMANTIC_FACE; input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; break; case VARYING_SLOT_PRIMITIVE_ID: input_semantic_name[slot] = TGSI_SEMANTIC_PRIMID; input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; break; case VARYING_SLOT_LAYER: input_semantic_name[slot] = TGSI_SEMANTIC_LAYER; input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; break; case VARYING_SLOT_VIEWPORT: input_semantic_name[slot] = TGSI_SEMANTIC_VIEWPORT_INDEX; input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_CONSTANT; break; case VARYING_SLOT_CLIP_DIST0: input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; break; case VARYING_SLOT_CLIP_DIST1: input_semantic_name[slot] = TGSI_SEMANTIC_CLIPDIST; input_semantic_index[slot] = 1; interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; break; /* In most cases, there is nothing special about these * inputs, so adopt a convention to use the generic * semantic name and the mesa VARYING_SLOT_ number as the * index. * * All that is required is that the vertex shader labels * its own outputs similarly, and that the vertex shader * generates at least every output required by the * fragment shader plus fixed-function hardware (such as * BFC). * * However, some drivers may need us to identify the PNTC and TEXi * varyings if, for example, their capability to replace them with * sprite coordinates is limited. */ case VARYING_SLOT_PNTC: if (st->needs_texcoord_semantic) { input_semantic_name[slot] = TGSI_SEMANTIC_PCOORD; input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_LINEAR; break; } /* fall through */ case VARYING_SLOT_TEX0: case VARYING_SLOT_TEX1: case VARYING_SLOT_TEX2: case VARYING_SLOT_TEX3: case VARYING_SLOT_TEX4: case VARYING_SLOT_TEX5: case VARYING_SLOT_TEX6: case VARYING_SLOT_TEX7: if (st->needs_texcoord_semantic) { input_semantic_name[slot] = TGSI_SEMANTIC_TEXCOORD; input_semantic_index[slot] = attr - VARYING_SLOT_TEX0; interpMode[slot] = st_translate_interp(stfp->Base.InterpQualifier[attr], FALSE); break; } /* fall through */ case VARYING_SLOT_VAR0: default: /* Semantic indices should be zero-based because drivers may choose * to assign a fixed slot determined by that index. * This is useful because ARB_separate_shader_objects uses location * qualifiers for linkage, and if the semantic index corresponds to * these locations, linkage passes in the driver become unecessary. * * If needs_texcoord_semantic is true, no semantic indices will be * consumed for the TEXi varyings, and we can base the locations of * the user varyings on VAR0. Otherwise, we use TEX0 as base index. */ assert(attr >= VARYING_SLOT_TEX0); input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; input_semantic_index[slot] = st_get_generic_varying_index(st, attr); if (attr == VARYING_SLOT_PNTC) interpMode[slot] = TGSI_INTERPOLATE_LINEAR; else interpMode[slot] = st_translate_interp(stfp->Base.InterpQualifier[attr], FALSE); break; } } else { inputMapping[attr] = -1; } } /* * Semantics and mapping for outputs */ { uint numColors = 0; GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; /* if z is written, emit that first */ if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; fs_output_semantic_index[fs_num_outputs] = 0; outputMapping[FRAG_RESULT_DEPTH] = fs_num_outputs; fs_num_outputs++; outputsWritten &= ~(1 << FRAG_RESULT_DEPTH); } if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL; fs_output_semantic_index[fs_num_outputs] = 0; outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs; fs_num_outputs++; outputsWritten &= ~(1 << FRAG_RESULT_STENCIL); } if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) { fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_SAMPLEMASK; fs_output_semantic_index[fs_num_outputs] = 0; outputMapping[FRAG_RESULT_SAMPLE_MASK] = fs_num_outputs; fs_num_outputs++; outputsWritten &= ~(1 << FRAG_RESULT_SAMPLE_MASK); } /* handle remaining outputs (color) */ for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { if (outputsWritten & BITFIELD64_BIT(attr)) { switch (attr) { case FRAG_RESULT_DEPTH: case FRAG_RESULT_STENCIL: case FRAG_RESULT_SAMPLE_MASK: /* handled above */ assert(0); break; case FRAG_RESULT_COLOR: write_all = GL_TRUE; /* fallthrough */ default: assert(attr == FRAG_RESULT_COLOR || (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; fs_output_semantic_index[fs_num_outputs] = numColors; outputMapping[attr] = fs_num_outputs; numColors++; break; } fs_num_outputs++; } } } ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT ); if (ureg == NULL) { free(variant); return NULL; } if (ST_DEBUG & DEBUG_MESA) { _mesa_print_program(&stfp->Base.Base); _mesa_print_program_parameters(st->ctx, &stfp->Base.Base); debug_printf("\n"); } if (write_all == GL_TRUE) ureg_property(ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1); if (stfp->Base.FragDepthLayout != FRAG_DEPTH_LAYOUT_NONE) { switch (stfp->Base.FragDepthLayout) { case FRAG_DEPTH_LAYOUT_ANY: ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT, TGSI_FS_DEPTH_LAYOUT_ANY); break; case FRAG_DEPTH_LAYOUT_GREATER: ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT, TGSI_FS_DEPTH_LAYOUT_GREATER); break; case FRAG_DEPTH_LAYOUT_LESS: ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT, TGSI_FS_DEPTH_LAYOUT_LESS); break; case FRAG_DEPTH_LAYOUT_UNCHANGED: ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT, TGSI_FS_DEPTH_LAYOUT_UNCHANGED); break; default: assert(0); } } if (stfp->glsl_to_tgsi) st_translate_program(st->ctx, TGSI_PROCESSOR_FRAGMENT, ureg, stfp->glsl_to_tgsi, &stfp->Base.Base, /* inputs */ fs_num_inputs, inputMapping, input_semantic_name, input_semantic_index, interpMode, interpLocation, /* outputs */ fs_num_outputs, outputMapping, fs_output_semantic_name, fs_output_semantic_index, FALSE, key->clamp_color ); else st_translate_mesa_program(st->ctx, TGSI_PROCESSOR_FRAGMENT, ureg, &stfp->Base.Base, /* inputs */ fs_num_inputs, inputMapping, input_semantic_name, input_semantic_index, interpMode, /* outputs */ fs_num_outputs, outputMapping, fs_output_semantic_name, fs_output_semantic_index, FALSE, key->clamp_color); variant->tgsi.tokens = ureg_get_tokens( ureg, NULL ); ureg_destroy( ureg ); if (ST_DEBUG & DEBUG_TGSI) { tgsi_dump(variant->tgsi.tokens, 0/*TGSI_DUMP_VERBOSE*/); debug_printf("\n"); } /* fill in variant */ variant->driver_shader = pipe->create_fs_state(pipe, &variant->tgsi); variant->key = *key; if (deleteFP) { /* Free the temporary program made above */ struct gl_fragment_program *fp = &stfp->Base; _mesa_reference_fragprog(st->ctx, &fp, NULL); } return variant; }
void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type, unsigned num_layers) { unsigned vs_blit_property; void **vs; switch (type) { case UTIL_BLITTER_ATTRIB_NONE: vs = num_layers > 1 ? &sctx->vs_blit_pos_layered : &sctx->vs_blit_pos; vs_blit_property = SI_VS_BLIT_SGPRS_POS; break; case UTIL_BLITTER_ATTRIB_COLOR: vs = num_layers > 1 ? &sctx->vs_blit_color_layered : &sctx->vs_blit_color; vs_blit_property = SI_VS_BLIT_SGPRS_POS_COLOR; break; case UTIL_BLITTER_ATTRIB_TEXCOORD_XY: case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW: assert(num_layers == 1); vs = &sctx->vs_blit_texcoord; vs_blit_property = SI_VS_BLIT_SGPRS_POS_TEXCOORD; break; default: assert(0); return NULL; } if (*vs) return *vs; struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); if (!ureg) return NULL; /* Tell the shader to load VS inputs from SGPRs: */ ureg_property(ureg, TGSI_PROPERTY_VS_BLIT_SGPRS, vs_blit_property); ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true); /* This is just a pass-through shader with 1-3 MOV instructions. */ ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0), ureg_DECL_vs_input(ureg, 0)); if (type != UTIL_BLITTER_ATTRIB_NONE) { ureg_MOV(ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0), ureg_DECL_vs_input(ureg, 1)); } if (num_layers > 1) { struct ureg_src instance_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_INSTANCEID, 0); struct ureg_dst layer = ureg_DECL_output(ureg, TGSI_SEMANTIC_LAYER, 0); ureg_MOV(ureg, ureg_writemask(layer, TGSI_WRITEMASK_X), ureg_scalar(instance_id, TGSI_SWIZZLE_X)); } ureg_END(ureg); *vs = ureg_create_shader_and_destroy(ureg, &sctx->b); return *vs; }
/* Create a compute shader implementing clear_buffer or copy_buffer. */ void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy) { assert(util_is_power_of_two_nonzero(num_dwords_per_thread)); unsigned store_qualifier = TGSI_MEMORY_COHERENT | TGSI_MEMORY_RESTRICT; if (dst_stream_cache_policy) store_qualifier |= TGSI_MEMORY_STREAM_CACHE_POLICY; /* Don't cache loads, because there is no reuse. */ unsigned load_qualifier = store_qualifier | TGSI_MEMORY_STREAM_CACHE_POLICY; unsigned num_mem_ops = MAX2(1, num_dwords_per_thread / 4); unsigned *inst_dwords = alloca(num_mem_ops * sizeof(unsigned)); for (unsigned i = 0; i < num_mem_ops; i++) { if (i*4 < num_dwords_per_thread) inst_dwords[i] = MIN2(4, num_dwords_per_thread - i*4); } struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE); if (!ureg) return NULL; ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 64); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1); struct ureg_src value; if (!is_copy) { ureg_property(ureg, TGSI_PROPERTY_CS_USER_DATA_DWORDS, inst_dwords[0]); value = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_CS_USER_DATA, 0); } struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0); struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0); struct ureg_dst store_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); struct ureg_dst load_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); struct ureg_dst dstbuf = ureg_dst(ureg_DECL_buffer(ureg, 0, false)); struct ureg_src srcbuf; struct ureg_src *values = NULL; if (is_copy) { srcbuf = ureg_DECL_buffer(ureg, 1, false); values = malloc(num_mem_ops * sizeof(struct ureg_src)); } /* If there are multiple stores, the first store writes into 0+tid, * the 2nd store writes into 64+tid, the 3rd store writes into 128+tid, etc. */ ureg_UMAD(ureg, store_addr, blk, ureg_imm1u(ureg, 64 * num_mem_ops), tid); /* Convert from a "store size unit" into bytes. */ ureg_UMUL(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[0])); ureg_MOV(ureg, load_addr, ureg_src(store_addr)); /* Distance between a load and a store for latency hiding. */ unsigned load_store_distance = is_copy ? 8 : 0; for (unsigned i = 0; i < num_mem_ops + load_store_distance; i++) { int d = i - load_store_distance; if (is_copy && i < num_mem_ops) { if (i) { ureg_UADD(ureg, load_addr, ureg_src(load_addr), ureg_imm1u(ureg, 4 * inst_dwords[i] * 64)); } values[i] = ureg_src(ureg_DECL_temporary(ureg)); struct ureg_dst dst = ureg_writemask(ureg_dst(values[i]), u_bit_consecutive(0, inst_dwords[i])); struct ureg_src srcs[] = {srcbuf, ureg_src(load_addr)}; ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dst, 1, srcs, 2, load_qualifier, TGSI_TEXTURE_BUFFER, 0); } if (d >= 0) { if (d) { ureg_UADD(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[d] * 64)); } struct ureg_dst dst = ureg_writemask(dstbuf, u_bit_consecutive(0, inst_dwords[d])); struct ureg_src srcs[] = {ureg_src(store_addr), is_copy ? values[d] : value}; ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst, 1, srcs, 2, store_qualifier, TGSI_TEXTURE_BUFFER, 0); } } ureg_END(ureg); struct pipe_compute_state state = {}; state.ir_type = PIPE_SHADER_IR_TGSI; state.prog = ureg_get_tokens(ureg, NULL); void *cs = ctx->create_compute_state(ctx, &state); ureg_destroy(ureg); free(values); return cs; }