/** * Emit the TGSI instructions for inverting and adjusting WPOS. * This code is unavoidable because it also depends on whether * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). */ static void emit_wpos_adjustment(struct gl_context *ctx, struct st_translate *t, const struct gl_program *program, boolean invert, GLfloat adjX, GLfloat adjY[2]) { struct ureg_program *ureg = t->ureg; /* Fragment program uses fragment position input. * Need to replace instances of INPUT[WPOS] with temp T * where T = INPUT[WPOS] by y is inverted. */ static const gl_state_index wposTransformState[STATE_LENGTH] = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 }; /* XXX: note we are modifying the incoming shader here! Need to * do this before emitting the constant decls below, or this * will be missed: */ unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, wposTransformState); struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); struct ureg_src *wpos = ctx->Const.GLSLFragCoordIsSysVal ? &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : &t->inputs[t->inputMapping[VARYING_SLOT_POS]]; struct ureg_src wpos_input = *wpos; /* First, apply the coordinate shift: */ if (adjX || adjY[0] || adjY[1]) { if (adjY[0] != adjY[1]) { /* Adjust the y coordinate by adjY[1] or adjY[0] respectively * depending on whether inversion is actually going to be applied * or not, which is determined by testing against the inversion * state variable used below, which will be either +1 or -1. */ struct ureg_dst adj_temp = ureg_DECL_temporary(ureg); ureg_CMP(ureg, adj_temp, ureg_scalar(wpostrans, invert ? 2 : 0), ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); } else { ureg_ADD(ureg, wpos_temp, wpos_input, ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); } wpos_input = ureg_src(wpos_temp); } else { /* MOV wpos_temp, input[wpos] */ ureg_MOV( ureg, wpos_temp, wpos_input ); } /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be * inversion/identity, or the other way around if we're drawing to an FBO. */ if (invert) { /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy */ ureg_MAD( ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), wpos_input, ureg_scalar(wpostrans, 0), ureg_scalar(wpostrans, 1)); } else { /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww */ ureg_MAD( ureg, ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), wpos_input, ureg_scalar(wpostrans, 2), ureg_scalar(wpostrans, 3)); } /* Use wpos_temp as position input from here on: */ *wpos = ureg_src(wpos_temp); }
/** * Translate Mesa program to TGSI format. * \param program the program to translate * \param numInputs number of input registers used * \param inputMapping maps Mesa fragment program inputs to TGSI generic * input indexes * \param inputSemanticName the TGSI_SEMANTIC flag for each input * \param inputSemanticIndex the semantic index (ex: which texcoord) for * each input * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input * \param numOutputs number of output registers used * \param outputMapping maps Mesa fragment program outputs to TGSI * generic outputs * \param outputSemanticName the TGSI_SEMANTIC flag for each output * \param outputSemanticIndex the semantic index (ex: which texcoord) for * each output * * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY */ enum pipe_error st_translate_mesa_program( struct gl_context *ctx, uint procType, struct ureg_program *ureg, const struct gl_program *program, GLuint numInputs, const GLuint inputMapping[], const ubyte inputSemanticName[], const ubyte inputSemanticIndex[], const GLuint interpMode[], GLuint numOutputs, const GLuint outputMapping[], const ubyte outputSemanticName[], const ubyte outputSemanticIndex[]) { struct st_translate translate, *t; unsigned i; enum pipe_error ret = PIPE_OK; assert(numInputs <= ARRAY_SIZE(t->inputs)); assert(numOutputs <= ARRAY_SIZE(t->outputs)); t = &translate; memset(t, 0, sizeof *t); t->procType = procType; t->inputMapping = inputMapping; t->outputMapping = outputMapping; t->ureg = ureg; /*_mesa_print_program(program);*/ /* * Declare input attributes. */ if (procType == TGSI_PROCESSOR_FRAGMENT) { for (i = 0; i < numInputs; i++) { t->inputs[i] = ureg_DECL_fs_input(ureg, inputSemanticName[i], inputSemanticIndex[i], interpMode[i]); } if (program->InputsRead & VARYING_BIT_POS) { /* Must do this after setting up t->inputs, and before * emitting constant references, below: */ emit_wpos(st_context(ctx), t, program, ureg); } /* * Declare output attributes. */ for (i = 0; i < numOutputs; i++) { switch (outputSemanticName[i]) { case TGSI_SEMANTIC_POSITION: t->outputs[i] = ureg_DECL_output( ureg, TGSI_SEMANTIC_POSITION, /* Z / Depth */ outputSemanticIndex[i] ); t->outputs[i] = ureg_writemask( t->outputs[i], TGSI_WRITEMASK_Z ); break; case TGSI_SEMANTIC_STENCIL: t->outputs[i] = ureg_DECL_output( ureg, TGSI_SEMANTIC_STENCIL, /* Stencil */ outputSemanticIndex[i] ); t->outputs[i] = ureg_writemask( t->outputs[i], TGSI_WRITEMASK_Y ); break; case TGSI_SEMANTIC_COLOR: t->outputs[i] = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, outputSemanticIndex[i] ); break; default: debug_assert(0); return 0; } } } else if (procType == TGSI_PROCESSOR_GEOMETRY) { for (i = 0; i < numInputs; i++) { t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i], inputSemanticIndex[i], 0, 1); } for (i = 0; i < numOutputs; i++) { t->outputs[i] = ureg_DECL_output( ureg, outputSemanticName[i], outputSemanticIndex[i] ); } } else { assert(procType == TGSI_PROCESSOR_VERTEX); for (i = 0; i < numInputs; i++) { t->inputs[i] = ureg_DECL_vs_input(ureg, i); } for (i = 0; i < numOutputs; i++) { t->outputs[i] = ureg_DECL_output( ureg, outputSemanticName[i], outputSemanticIndex[i] ); if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ ureg_MOV(ureg, ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); } } } /* Declare address register. */ if (program->NumAddressRegs > 0) { debug_assert( program->NumAddressRegs == 1 ); t->address[0] = ureg_DECL_address( ureg ); } /* Declare misc input registers */ { GLbitfield sysInputs = program->SystemValuesRead; for (i = 0; sysInputs; i++) { if (sysInputs & (1 << i)) { unsigned semName = _mesa_sysval_to_semantic[i]; t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); if (semName == TGSI_SEMANTIC_INSTANCEID || semName == TGSI_SEMANTIC_VERTEXID) { /* From Gallium perspective, these system values are always * integer, and require native integer support. However, if * native integer is supported on the vertex stage but not the * pixel stage (e.g, i915g + draw), Mesa will generate IR that * assumes these system values are floats. To resolve the * inconsistency, we insert a U2F. */ struct st_context *st = st_context(ctx); struct pipe_screen *pscreen = st->pipe->screen; assert(procType == TGSI_PROCESSOR_VERTEX); assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); (void) pscreen; /* silence non-debug build warnings */ if (!ctx->Const.NativeIntegers) { struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); } } if (procType == TGSI_PROCESSOR_FRAGMENT && semName == TGSI_SEMANTIC_POSITION) emit_wpos(st_context(ctx), t, program, ureg); sysInputs &= ~(1 << i); } } } if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) { /* If temps are accessed with indirect addressing, declare temporaries * in sequential order. Else, we declare them on demand elsewhere. */ for (i = 0; i < program->NumTemporaries; i++) { /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ t->temps[i] = ureg_DECL_temporary( t->ureg ); } } /* Emit constants and immediates. Mesa uses a single index space * for these, so we put all the translated regs in t->constants. */ if (program->Parameters) { t->constants = calloc( program->Parameters->NumParameters, sizeof t->constants[0] ); if (t->constants == NULL) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out; } for (i = 0; i < program->Parameters->NumParameters; i++) { switch (program->Parameters->Parameters[i].Type) { case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: t->constants[i] = ureg_DECL_constant( ureg, i ); break; /* Emit immediates only when there's no indirect addressing of * the const buffer. * FIXME: Be smarter and recognize param arrays: * indirect addressing is only valid within the referenced * array. */ case PROGRAM_CONSTANT: if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST) t->constants[i] = ureg_DECL_constant( ureg, i ); else t->constants[i] = ureg_DECL_immediate( ureg, (const float*) program->Parameters->ParameterValues[i], 4 ); break; default: break; } } } /* texture samplers */ for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { if (program->SamplersUsed & (1 << i)) { t->samplers[i] = ureg_DECL_sampler( ureg, i ); } } /* Emit each instruction in turn: */ for (i = 0; i < program->NumInstructions; i++) { set_insn_start( t, ureg_get_instruction_number( ureg )); compile_instruction(ctx, t, &program->Instructions[i]); } /* Fix up all emitted labels: */ for (i = 0; i < t->labels_count; i++) { ureg_fixup_label( ureg, t->labels[i].token, t->insn[t->labels[i].branch_target] ); } out: free(t->insn); free(t->labels); free(t->constants); if (t->error) { debug_printf("%s: translate error flag set\n", __func__); } return ret; }
static void * create_deint_frag_shader(struct vl_deint_filter *filter, unsigned field, struct vertex2f *sizes, bool spatial_filter) { struct ureg_program *shader; struct ureg_src i_vtex; struct ureg_src sampler_cur; struct ureg_src sampler_prevprev; struct ureg_src sampler_prev; struct ureg_src sampler_next; struct ureg_dst o_fragment; struct ureg_dst t_tex; struct ureg_dst t_comp_top, t_comp_bot; struct ureg_dst t_diff; struct ureg_dst t_a, t_b; struct ureg_dst t_weave, t_linear; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) { return NULL; } t_tex = ureg_DECL_temporary(shader); t_comp_top = ureg_DECL_temporary(shader); t_comp_bot = ureg_DECL_temporary(shader); t_diff = ureg_DECL_temporary(shader); t_a = ureg_DECL_temporary(shader); t_b = ureg_DECL_temporary(shader); t_weave = ureg_DECL_temporary(shader); t_linear = ureg_DECL_temporary(shader); i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); sampler_prevprev = ureg_DECL_sampler(shader, 0); sampler_prev = ureg_DECL_sampler(shader, 1); sampler_cur = ureg_DECL_sampler(shader, 2); sampler_next = ureg_DECL_sampler(shader, 3); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); // we don't care about ZW interpolation (allows better optimization) ureg_MOV(shader, t_tex, i_vtex); ureg_MOV(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 0)); // sample between texels for cheap lowpass ureg_ADD(shader, t_comp_top, ureg_src(t_tex), ureg_imm4f(shader, sizes->x * 0.5f, sizes->y * -0.5f, 0, 0)); ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, sizes->x * -0.5f, sizes->y * 0.5f, 1.0f, 0)); if (field == 0) { /* interpolating top field -> current field is a bottom field */ // cur vs prev2 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prevprev); ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b)); // prev vs next ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prev); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_next); ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b)); } else { /* interpolating bottom field -> current field is a top field */ // cur vs prev2 ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_prevprev); ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_a), ureg_src(t_b)); // prev vs next ureg_TEX(shader, t_a, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev); ureg_TEX(shader, t_b, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_next); ureg_SUB(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_Y), ureg_src(t_a), ureg_src(t_b)); } // absolute maximum of differences ureg_MAX(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_abs(ureg_src(t_diff)), ureg_scalar(ureg_abs(ureg_src(t_diff)), TGSI_SWIZZLE_Y)); if (field == 0) { /* weave with prev top field */ ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_tex), sampler_prev); /* get linear interpolation from current bottom field */ ureg_ADD(shader, t_comp_top, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * -1.0f, 1.0f, 0)); ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_top), sampler_cur); } else { /* weave with prev bottom field */ ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, 0, 1.0f, 0)); ureg_TEX(shader, t_weave, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_prev); /* get linear interpolation from current top field */ ureg_ADD(shader, t_comp_bot, ureg_src(t_tex), ureg_imm4f(shader, 0, sizes->y * 1.0f, 0, 0)); ureg_TEX(shader, t_linear, TGSI_TEXTURE_2D_ARRAY, ureg_src(t_comp_bot), sampler_cur); } // mix between weave and linear // fully weave if diff < 6 (0.02353), fully interpolate if diff > 14 (0.05490) ureg_ADD(shader, ureg_writemask(t_diff, TGSI_WRITEMASK_X), ureg_src(t_diff), ureg_imm4f(shader, -0.02353f, 0, 0, 0)); ureg_MUL(shader, ureg_saturate(ureg_writemask(t_diff, TGSI_WRITEMASK_X)), ureg_src(t_diff), ureg_imm4f(shader, 31.8750f, 0, 0, 0)); ureg_LRP(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X), ureg_src(t_diff), ureg_src(t_linear), ureg_src(t_weave)); ureg_release_temporary(shader, t_tex); ureg_release_temporary(shader, t_comp_top); ureg_release_temporary(shader, t_comp_bot); ureg_release_temporary(shader, t_diff); ureg_release_temporary(shader, t_a); ureg_release_temporary(shader, t_b); ureg_release_temporary(shader, t_weave); ureg_release_temporary(shader, t_linear); ureg_END(shader); return ureg_create_shader_and_destroy(shader, filter->pipe); }
/* Create a compute shader implementing clear_buffer or copy_buffer. */ void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread, bool dst_stream_cache_policy, bool is_copy) { assert(util_is_power_of_two_nonzero(num_dwords_per_thread)); unsigned store_qualifier = TGSI_MEMORY_COHERENT | TGSI_MEMORY_RESTRICT; if (dst_stream_cache_policy) store_qualifier |= TGSI_MEMORY_STREAM_CACHE_POLICY; /* Don't cache loads, because there is no reuse. */ unsigned load_qualifier = store_qualifier | TGSI_MEMORY_STREAM_CACHE_POLICY; unsigned num_mem_ops = MAX2(1, num_dwords_per_thread / 4); unsigned *inst_dwords = alloca(num_mem_ops * sizeof(unsigned)); for (unsigned i = 0; i < num_mem_ops; i++) { if (i*4 < num_dwords_per_thread) inst_dwords[i] = MIN2(4, num_dwords_per_thread - i*4); } struct ureg_program *ureg = ureg_create(PIPE_SHADER_COMPUTE); if (!ureg) return NULL; ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 64); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 1); ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 1); struct ureg_src value; if (!is_copy) { ureg_property(ureg, TGSI_PROPERTY_CS_USER_DATA_DWORDS, inst_dwords[0]); value = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_CS_USER_DATA, 0); } struct ureg_src tid = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_THREAD_ID, 0); struct ureg_src blk = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_BLOCK_ID, 0); struct ureg_dst store_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); struct ureg_dst load_addr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_X); struct ureg_dst dstbuf = ureg_dst(ureg_DECL_buffer(ureg, 0, false)); struct ureg_src srcbuf; struct ureg_src *values = NULL; if (is_copy) { srcbuf = ureg_DECL_buffer(ureg, 1, false); values = malloc(num_mem_ops * sizeof(struct ureg_src)); } /* If there are multiple stores, the first store writes into 0+tid, * the 2nd store writes into 64+tid, the 3rd store writes into 128+tid, etc. */ ureg_UMAD(ureg, store_addr, blk, ureg_imm1u(ureg, 64 * num_mem_ops), tid); /* Convert from a "store size unit" into bytes. */ ureg_UMUL(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[0])); ureg_MOV(ureg, load_addr, ureg_src(store_addr)); /* Distance between a load and a store for latency hiding. */ unsigned load_store_distance = is_copy ? 8 : 0; for (unsigned i = 0; i < num_mem_ops + load_store_distance; i++) { int d = i - load_store_distance; if (is_copy && i < num_mem_ops) { if (i) { ureg_UADD(ureg, load_addr, ureg_src(load_addr), ureg_imm1u(ureg, 4 * inst_dwords[i] * 64)); } values[i] = ureg_src(ureg_DECL_temporary(ureg)); struct ureg_dst dst = ureg_writemask(ureg_dst(values[i]), u_bit_consecutive(0, inst_dwords[i])); struct ureg_src srcs[] = {srcbuf, ureg_src(load_addr)}; ureg_memory_insn(ureg, TGSI_OPCODE_LOAD, &dst, 1, srcs, 2, load_qualifier, TGSI_TEXTURE_BUFFER, 0); } if (d >= 0) { if (d) { ureg_UADD(ureg, store_addr, ureg_src(store_addr), ureg_imm1u(ureg, 4 * inst_dwords[d] * 64)); } struct ureg_dst dst = ureg_writemask(dstbuf, u_bit_consecutive(0, inst_dwords[d])); struct ureg_src srcs[] = {ureg_src(store_addr), is_copy ? values[d] : value}; ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &dst, 1, srcs, 2, store_qualifier, TGSI_TEXTURE_BUFFER, 0); } } ureg_END(ureg); struct pipe_compute_state state = {}; state.ir_type = PIPE_SHADER_IR_TGSI; state.prog = ureg_get_tokens(ureg, NULL); void *cs = ctx->create_compute_state(ctx, &state); ureg_destroy(ureg); free(values); return cs; }
static void * create_mismatch_frag_shader(struct vl_idct *idct) { struct ureg_program *shader; struct ureg_src addr[2]; struct ureg_dst m[8][2]; struct ureg_dst fragment; unsigned i; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); for (i = 0; i < 8; ++i) { m[i][0] = ureg_DECL_temporary(shader); m[i][1] = ureg_DECL_temporary(shader); } for (i = 0; i < 8; ++i) { increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height); } for (i = 0; i < 8; ++i) { struct ureg_src s_addr[2]; s_addr[0] = ureg_src(m[i][0]); s_addr[1] = ureg_src(m[i][1]); fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false); } for (i = 1; i < 8; ++i) { ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0])); ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1])); } ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1])); ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14)); ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14)); ureg_FRC(shader, m[0][0], ureg_src(m[0][0])); ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0]))); ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])), ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15))); ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X)); ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1])); ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1])); for (i = 0; i < 8; ++i) { ureg_release_temporary(shader, m[i][0]); ureg_release_temporary(shader, m[i][1]); } ureg_END(shader); return ureg_create_shader_and_destroy(shader, idct->pipe); }
static void radial_gradient(struct ureg_program *ureg, struct ureg_dst out, struct ureg_src pos, struct ureg_src sampler, struct ureg_src coords, struct ureg_src const0124, struct ureg_src matrow0, struct ureg_src matrow1, struct ureg_src matrow2) { struct ureg_dst temp0 = ureg_DECL_temporary(ureg); struct ureg_dst temp1 = ureg_DECL_temporary(ureg); struct ureg_dst temp2 = ureg_DECL_temporary(ureg); struct ureg_dst temp3 = ureg_DECL_temporary(ureg); struct ureg_dst temp4 = ureg_DECL_temporary(ureg); struct ureg_dst temp5 = ureg_DECL_temporary(ureg); ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), pos); ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_Z), ureg_scalar(const0124, TGSI_SWIZZLE_Y)); ureg_DP3(ureg, temp1, matrow0, ureg_src(temp0)); ureg_DP3(ureg, temp2, matrow1, ureg_src(temp0)); ureg_DP3(ureg, temp3, matrow2, ureg_src(temp0)); ureg_RCP(ureg, temp3, ureg_src(temp3)); ureg_MUL(ureg, temp1, ureg_src(temp1), ureg_src(temp3)); ureg_MUL(ureg, temp2, ureg_src(temp2), ureg_src(temp3)); ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_X), ureg_src(temp1)); ureg_MOV(ureg, ureg_writemask(temp5, TGSI_WRITEMASK_Y), ureg_src(temp2)); ureg_MUL(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y)); ureg_MAD(ureg, temp1, ureg_scalar(coords, TGSI_SWIZZLE_X), ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp0)); ureg_ADD(ureg, temp1, ureg_src(temp1), ureg_src(temp1)); ureg_MUL(ureg, temp3, ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_Y)); ureg_MAD(ureg, temp4, ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(temp5), TGSI_SWIZZLE_X), ureg_src(temp3)); ureg_MOV(ureg, temp4, ureg_negate(ureg_src(temp4))); ureg_MUL(ureg, temp2, ureg_scalar(coords, TGSI_SWIZZLE_Z), ureg_src(temp4)); ureg_MUL(ureg, temp0, ureg_scalar(const0124, TGSI_SWIZZLE_W), ureg_src(temp2)); ureg_MUL(ureg, temp3, ureg_src(temp1), ureg_src(temp1)); ureg_SUB(ureg, temp2, ureg_src(temp3), ureg_src(temp0)); ureg_RSQ(ureg, temp2, ureg_abs(ureg_src(temp2))); ureg_RCP(ureg, temp2, ureg_src(temp2)); ureg_SUB(ureg, temp1, ureg_src(temp2), ureg_src(temp1)); ureg_ADD(ureg, temp0, ureg_scalar(coords, TGSI_SWIZZLE_Z), ureg_scalar(coords, TGSI_SWIZZLE_Z)); ureg_RCP(ureg, temp0, ureg_src(temp0)); ureg_MUL(ureg, temp2, ureg_src(temp1), ureg_src(temp0)); ureg_TEX(ureg, out, TGSI_TEXTURE_1D, ureg_src(temp2), sampler); ureg_release_temporary(ureg, temp0); ureg_release_temporary(ureg, temp1); ureg_release_temporary(ureg, temp2); ureg_release_temporary(ureg, temp3); ureg_release_temporary(ureg, temp4); ureg_release_temporary(ureg, temp5); }
/** * Make simple fragment texture shader: * IMM {0,0,0,1} // (if writemask != 0xf) * MOV TEMP[0], IMM[0] // (if writemask != 0xf) * TEX TEMP[0].writemask, IN[0], SAMP[0], 2D; * .. optional SINT <-> UINT clamping .. * MOV OUT[0], TEMP[0] * END; * * \param tex_target one of PIPE_TEXTURE_x * \parma interp_mode either TGSI_INTERPOLATE_LINEAR or PERSPECTIVE * \param writemask mask of TGSI_WRITEMASK_x */ void * util_make_fragment_tex_shader_writemask(struct pipe_context *pipe, unsigned tex_target, unsigned interp_mode, unsigned writemask, enum tgsi_return_type stype, enum tgsi_return_type dtype, bool load_level_zero, bool use_txf) { struct ureg_program *ureg; struct ureg_src sampler; struct ureg_src tex; struct ureg_dst temp; struct ureg_dst out; assert((stype == TGSI_RETURN_TYPE_FLOAT) == (dtype == TGSI_RETURN_TYPE_FLOAT)); assert(interp_mode == TGSI_INTERPOLATE_LINEAR || interp_mode == TGSI_INTERPOLATE_PERSPECTIVE); ureg = ureg_create( PIPE_SHADER_FRAGMENT ); if (!ureg) return NULL; sampler = ureg_DECL_sampler( ureg, 0 ); ureg_DECL_sampler_view(ureg, 0, tex_target, stype, stype, stype, stype); tex = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_GENERIC, 0, interp_mode ); out = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 ); temp = ureg_DECL_temporary(ureg); if (writemask != TGSI_WRITEMASK_XYZW) { struct ureg_src imm = ureg_imm4f( ureg, 0, 0, 0, 1 ); ureg_MOV( ureg, out, imm ); } if (tex_target == TGSI_TEXTURE_BUFFER) ureg_TXF(ureg, ureg_writemask(temp, writemask), tex_target, tex, sampler); else ureg_load_tex(ureg, ureg_writemask(temp, writemask), tex, sampler, tex_target, load_level_zero, use_txf); if (stype != dtype) { if (stype == TGSI_RETURN_TYPE_SINT) { assert(dtype == TGSI_RETURN_TYPE_UINT); ureg_IMAX(ureg, temp, ureg_src(temp), ureg_imm1i(ureg, 0)); } else { assert(stype == TGSI_RETURN_TYPE_UINT); assert(dtype == TGSI_RETURN_TYPE_SINT); ureg_UMIN(ureg, temp, ureg_src(temp), ureg_imm1u(ureg, (1u << 31) - 1)); } } ureg_MOV(ureg, out, ureg_src(temp)); ureg_END( ureg ); return ureg_create_shader_and_destroy( ureg, pipe ); }
static void * create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv) { struct ureg_program *shader; struct ureg_src vrect, vpos; struct ureg_dst t_vpos, t_vtex; struct ureg_dst o_vpos, o_flags; struct vertex2f scale = { (float)VL_BLOCK_WIDTH / r->buffer_width * VL_MACROBLOCK_WIDTH / r->macroblock_size, (float)VL_BLOCK_HEIGHT / r->buffer_height * VL_MACROBLOCK_HEIGHT / r->macroblock_size }; unsigned label; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y)); t_vtex = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS); /* * o_vtex.xy = t_vpos * o_flags.z = intra * 0.5 * * if(interlaced) { * t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 } * t_vtex.z = vpos.y % 2 * t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y * o_vpos.y = t_vtex.y + t_vpos.y * * o_flags.w = t_vtex.z ? 0 : 1 * } * */ vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos); ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z), ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f)); ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); if (r->macroblock_size == VL_MACROBLOCK_HEIGHT) { //TODO ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label); ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)), ureg_imm2f(shader, 0.0f, scale.y), ureg_imm2f(shader, -scale.y, 0.0f)); ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f)); ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex)); ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y)); ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y), ureg_src(t_vpos), ureg_src(t_vtex)); ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f)); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); } ureg_release_temporary(shader, t_vtex); ureg_release_temporary(shader, t_vpos); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
static void * create_frag_shader(struct vl_zscan *zscan) { struct ureg_program *shader; struct ureg_src *vtex; struct ureg_src samp_src, samp_scan, samp_quant; struct ureg_dst *tmp; struct ureg_dst quant, fragment; unsigned i; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_src)); tmp = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); for (i = 0; i < zscan->num_channels; ++i) vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); samp_src = ureg_DECL_sampler(shader, 0); samp_scan = ureg_DECL_sampler(shader, 1); samp_quant = ureg_DECL_sampler(shader, 2); for (i = 0; i < zscan->num_channels; ++i) tmp[i] = ureg_DECL_temporary(shader); quant = ureg_DECL_temporary(shader); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* * tmp.x = tex(vtex, 1) * tmp.y = vtex.z * fragment = tex(tmp, 0) * quant */ for (i = 0; i < zscan->num_channels; ++i) ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan); for (i = 0; i < zscan->num_channels; ++i) ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W)); for (i = 0; i < zscan->num_channels; ++i) { ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src); ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant); } ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f)); ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant)); for (i = 0; i < zscan->num_channels; ++i) ureg_release_temporary(shader, tmp[i]); ureg_END(shader); FREE(vtex); FREE(tmp); return ureg_create_shader_and_destroy(shader, zscan->pipe); }
static void * create_vert_shader(struct vl_zscan *zscan) { struct ureg_program *shader; struct ureg_src scale; struct ureg_src vrect, vpos, block_num; struct ureg_dst tmp; struct ureg_dst o_vpos; struct ureg_dst *o_vtex; signed i; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); scale = ureg_imm2f(shader, (float)VL_BLOCK_WIDTH / zscan->buffer_width, (float)VL_BLOCK_HEIGHT / zscan->buffer_height); vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM); tmp = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); for (i = 0; i < zscan->num_channels; ++i) o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i); /* * o_vpos.xy = (vpos + vrect) * scale * o_vpos.zw = 1.0f * * tmp.xy = InstanceID / blocks_per_line * tmp.x = frac(tmp.x) * tmp.y = floor(tmp.y) * * o_vtex.x = vrect.x / blocks_per_line + tmp.x * o_vtex.y = vrect.y * o_vtex.z = tmp.z * blocks_per_line / blocks_total */ ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect); ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp)); for (i = 0; i < zscan->num_channels; ++i) { ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH) * (i - (signed)zscan->num_channels / 2))); ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos); ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp), ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); } ureg_release_temporary(shader, tmp); ureg_END(shader); FREE(o_vtex); return ureg_create_shader_and_destroy(shader, zscan->pipe); }
static void * create_frag_shader(struct vl_median_filter *filter, struct vertex2f *offsets, unsigned num_offsets) { struct pipe_screen *screen = filter->pipe->screen; struct ureg_program *shader; struct ureg_src i_vtex; struct ureg_src sampler; struct ureg_dst *t_array = MALLOC(sizeof(struct ureg_dst) * num_offsets); struct ureg_dst o_fragment; const unsigned median = num_offsets >> 1; unsigned i, j; assert(num_offsets & 1); /* we need an odd number of offsets */ if (!(num_offsets & 1)) { /* yeah, we REALLY need an odd number of offsets!!! */ FREE(t_array); return NULL; } if (num_offsets > screen->get_shader_param( screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS)) { FREE(t_array); return NULL; } shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) { FREE(t_array); return NULL; } i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); sampler = ureg_DECL_sampler(shader, 0); ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); for (i = 0; i < num_offsets; ++i) t_array[i] = ureg_DECL_temporary(shader); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* * t_array[0..*] = vtex + offset[0..*] * t_array[0..*] = tex(t_array[0..*], sampler) * result = partial_bubblesort(t_array)[mid] */ for (i = 0; i < num_offsets; ++i) { if (!is_vec_zero(offsets[i])) { ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY), i_vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y)); ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 0.0f)); } } for (i = 0; i < num_offsets; ++i) { struct ureg_src src = is_vec_zero(offsets[i]) ? i_vtex : ureg_src(t_array[i]); ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, src, sampler); } // TODO: Couldn't this be improved even more? for (i = 0; i <= median; ++i) { for (j = 1; j < (num_offsets - i - 1); ++j) { struct ureg_dst tmp = ureg_DECL_temporary(shader); ureg_MOV(shader, tmp, ureg_src(t_array[j])); ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); ureg_MIN(shader, t_array[j - 1], ureg_src(tmp), ureg_src(t_array[j - 1])); ureg_release_temporary(shader, tmp); } if (i == median) ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); else ureg_MIN(shader, t_array[j - 1], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); } ureg_MOV(shader, o_fragment, ureg_src(t_array[median])); ureg_END(shader); FREE(t_array); return ureg_create_shader_and_destroy(shader, filter->pipe); }
static void * create_fs(struct st_context *st, bool download, enum pipe_texture_target target) { struct pipe_context *pipe = st->pipe; struct pipe_screen *screen = pipe->screen; struct ureg_program *ureg; bool have_layer; struct ureg_dst out; struct ureg_src sampler; struct ureg_src pos; struct ureg_src layer; struct ureg_src const0; struct ureg_src const1; struct ureg_dst temp0; have_layer = st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY || target == PIPE_TEXTURE_2D_ARRAY || target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY); ureg = ureg_create(PIPE_SHADER_FRAGMENT); if (!ureg) return NULL; if (!download) { out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); } else { struct ureg_src image; /* writeonly images do not require an explicitly given format. */ image = ureg_DECL_image(ureg, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE, true, false); out = ureg_dst(image); } sampler = ureg_DECL_sampler(ureg, 0); if (screen->get_param(screen, PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { pos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); } else { pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_LINEAR); } if (have_layer) { layer = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_LAYER, 0, TGSI_INTERPOLATE_CONSTANT); } const0 = ureg_DECL_constant(ureg, 0); const1 = ureg_DECL_constant(ureg, 1); temp0 = ureg_DECL_temporary(ureg); /* Note: const0 = [ -xoffset + skip_pixels, -yoffset, stride, image_height ] */ /* temp0.xy = f2i(temp0.xy) */ ureg_F2I(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), ureg_swizzle(pos, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); /* temp0.xy = temp0.xy + const0.xy */ ureg_UADD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_XY), ureg_swizzle(ureg_src(temp0), TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y), ureg_swizzle(const0, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y)); /* temp0.x = const0.z * temp0.y + temp0.x */ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), ureg_scalar(const0, TGSI_SWIZZLE_Z), ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); if (have_layer) { /* temp0.x = const0.w * layer + temp0.x */ ureg_UMAD(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_X), ureg_scalar(const0, TGSI_SWIZZLE_W), ureg_scalar(layer, TGSI_SWIZZLE_X), ureg_scalar(ureg_src(temp0), TGSI_SWIZZLE_X)); } /* temp0.w = 0 */ ureg_MOV(ureg, ureg_writemask(temp0, TGSI_WRITEMASK_W), ureg_imm1u(ureg, 0)); if (download) { struct ureg_dst temp1; struct ureg_src op[2]; temp1 = ureg_DECL_temporary(ureg); /* temp1.xy = pos.xy */ ureg_F2I(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_XY), pos); /* temp1.zw = 0 */ ureg_MOV(ureg, ureg_writemask(temp1, TGSI_WRITEMASK_ZW), ureg_imm1u(ureg, 0)); if (have_layer) { struct ureg_dst temp1_layer = ureg_writemask(temp1, target == PIPE_TEXTURE_1D_ARRAY ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_Z); /* temp1.y/z = layer */ ureg_MOV(ureg, temp1_layer, ureg_scalar(layer, TGSI_SWIZZLE_X)); if (target == PIPE_TEXTURE_3D) { /* temp1.z += layer_offset */ ureg_UADD(ureg, temp1_layer, ureg_scalar(ureg_src(temp1), TGSI_SWIZZLE_Z), ureg_scalar(const1, TGSI_SWIZZLE_X)); } } /* temp1 = txf(sampler, temp1) */ ureg_TXF(ureg, temp1, util_pipe_tex_to_tgsi_tex(target, 1), ureg_src(temp1), sampler); /* store(out, temp0, temp1) */ op[0] = ureg_src(temp0); op[1] = ureg_src(temp1); ureg_memory_insn(ureg, TGSI_OPCODE_STORE, &out, 1, op, 2, 0, TGSI_TEXTURE_BUFFER, PIPE_FORMAT_NONE); ureg_release_temporary(ureg, temp1); } else { /* out = txf(sampler, temp0.x) */ ureg_TXF(ureg, out, TGSI_TEXTURE_BUFFER, ureg_src(temp0), sampler); } ureg_release_temporary(ureg, temp0); ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); }
static void * create_fs(struct pipe_context *pipe, unsigned fs_traits) { struct ureg_program *ureg; struct ureg_src /*dst_sampler, */ src_sampler, mask_sampler; struct ureg_src /*dst_pos, */ src_input, mask_pos; struct ureg_dst src, mask; struct ureg_dst out; struct ureg_src imm0 = { 0 }; unsigned has_mask = (fs_traits & FS_MASK) != 0; unsigned is_fill = (fs_traits & FS_FILL) != 0; unsigned is_composite = (fs_traits & FS_COMPOSITE) != 0; unsigned is_solid = (fs_traits & FS_SOLID_FILL) != 0; unsigned is_lingrad = (fs_traits & FS_LINGRAD_FILL) != 0; unsigned is_radgrad = (fs_traits & FS_RADGRAD_FILL) != 0; unsigned comp_alpha_mask = fs_traits & FS_COMPONENT_ALPHA; unsigned is_yuv = (fs_traits & FS_YUV) != 0; unsigned src_repeat_none = (fs_traits & FS_SRC_REPEAT_NONE) != 0; unsigned mask_repeat_none = (fs_traits & FS_MASK_REPEAT_NONE) != 0; unsigned src_swizzle = (fs_traits & FS_SRC_SWIZZLE_RGB) != 0; unsigned mask_swizzle = (fs_traits & FS_MASK_SWIZZLE_RGB) != 0; unsigned src_set_alpha = (fs_traits & FS_SRC_SET_ALPHA) != 0; unsigned mask_set_alpha = (fs_traits & FS_MASK_SET_ALPHA) != 0; unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0; unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0; unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0; #if 0 print_fs_traits(fs_traits); #else (void)print_fs_traits; #endif ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (ureg == NULL) return 0; /* it has to be either a fill, a composite op or a yuv conversion */ debug_assert((is_fill ^ is_composite) ^ is_yuv); (void)is_yuv; out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); if (src_repeat_none || mask_repeat_none || src_set_alpha || mask_set_alpha || src_luminance) { imm0 = ureg_imm4f(ureg, 0, 0, 0, 1); } if (is_composite) { src_sampler = ureg_DECL_sampler(ureg, 0); src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_PERSPECTIVE); } else if (is_fill) { if (is_solid) src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE); else src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_PERSPECTIVE); } else { debug_assert(is_yuv); return create_yuv_shader(pipe, ureg); } if (has_mask) { mask_sampler = ureg_DECL_sampler(ureg, 1); mask_pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_PERSPECTIVE); } #if 0 /* unused right now */ dst_sampler = ureg_DECL_sampler(ureg, 2); dst_pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 2, TGSI_INTERPOLATE_PERSPECTIVE); #endif if (is_composite) { if (has_mask || src_luminance || dst_luminance) src = ureg_DECL_temporary(ureg); else src = out; xrender_tex(ureg, src, src_input, src_sampler, imm0, src_repeat_none, src_swizzle, src_set_alpha); } else if (is_fill) { if (is_solid) { if (has_mask || src_luminance || dst_luminance) src = ureg_dst(src_input); else ureg_MOV(ureg, out, src_input); } else if (is_lingrad || is_radgrad) { struct ureg_src coords, const0124, matrow0, matrow1, matrow2; if (has_mask || src_luminance || dst_luminance) src = ureg_DECL_temporary(ureg); else src = out; coords = ureg_DECL_constant(ureg, 0); const0124 = ureg_DECL_constant(ureg, 1); matrow0 = ureg_DECL_constant(ureg, 2); matrow1 = ureg_DECL_constant(ureg, 3); matrow2 = ureg_DECL_constant(ureg, 4); if (is_lingrad) { linear_gradient(ureg, src, src_input, src_sampler, coords, const0124, matrow0, matrow1, matrow2); } else if (is_radgrad) { radial_gradient(ureg, src, src_input, src_sampler, coords, const0124, matrow0, matrow1, matrow2); } } else debug_assert(!"Unknown fill type!"); } if (src_luminance) { ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X)); ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ), ureg_scalar(imm0, TGSI_SWIZZLE_X)); if (!has_mask && !dst_luminance) ureg_MOV(ureg, out, ureg_src(src)); } if (has_mask) { mask = ureg_DECL_temporary(ureg); xrender_tex(ureg, mask, mask_pos, mask_sampler, imm0, mask_repeat_none, mask_swizzle, mask_set_alpha); /* src IN mask */ src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src), ureg_src(mask), comp_alpha_mask, mask_luminance); ureg_release_temporary(ureg, mask); } if (dst_luminance) { /* * Make sure the alpha channel goes into the output L8 surface. */ ureg_MOV(ureg, out, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_W)); } ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); }
static void * create_ref_vert_shader(struct vl_mc *r) { struct ureg_program *shader; struct ureg_src mv_scale; struct ureg_src vmv[2]; struct ureg_dst t_vpos; struct ureg_dst o_vmv[2]; unsigned i; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP); vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM); t_vpos = calc_position(r, shader, ureg_imm2f(shader, (float)VL_MACROBLOCK_WIDTH / r->buffer_width, (float)VL_MACROBLOCK_HEIGHT / r->buffer_height) ); o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); /* * mv_scale.xy = 0.5 / (dst.width, dst.height); * mv_scale.z = 1.0f / 4.0f * mv_scale.w = 1.0f / 255.0f * * // Apply motion vectors * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos * o_vmv[0..1].zw = vmv[0..1] * mv_scale * */ mv_scale = ureg_imm4f(shader, 0.5f / r->buffer_width, 0.5f / r->buffer_height, 1.0f / 4.0f, 1.0f / PIPE_VIDEO_MV_WEIGHT_MAX); for (i = 0; i < 2; ++i) { ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos)); ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]); } ureg_release_temporary(shader, t_vpos); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
void * util_make_fs_msaa_resolve_bilinear(struct pipe_context *pipe, enum tgsi_texture_type tgsi_tex, unsigned nr_samples, enum tgsi_return_type stype) { struct ureg_program *ureg; struct ureg_src sampler, coord; struct ureg_dst out, tmp, top, bottom; struct ureg_dst tmp_coord[4], tmp_sum[4]; unsigned i, c; ureg = ureg_create(PIPE_SHADER_FRAGMENT); if (!ureg) return NULL; /* Declarations. */ sampler = ureg_DECL_sampler(ureg, 0); ureg_DECL_sampler_view(ureg, 0, tgsi_tex, stype, stype, stype, stype); coord = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR); out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); for (c = 0; c < 4; c++) tmp_sum[c] = ureg_DECL_temporary(ureg); for (c = 0; c < 4; c++) tmp_coord[c] = ureg_DECL_temporary(ureg); tmp = ureg_DECL_temporary(ureg); top = ureg_DECL_temporary(ureg); bottom = ureg_DECL_temporary(ureg); /* Instructions. */ for (c = 0; c < 4; c++) ureg_MOV(ureg, tmp_sum[c], ureg_imm1f(ureg, 0)); /* Get 4 texture coordinates for the bilinear filter. */ ureg_F2U(ureg, tmp_coord[0], coord); /* top-left */ ureg_UADD(ureg, tmp_coord[1], ureg_src(tmp_coord[0]), ureg_imm4u(ureg, 1, 0, 0, 0)); /* top-right */ ureg_UADD(ureg, tmp_coord[2], ureg_src(tmp_coord[0]), ureg_imm4u(ureg, 0, 1, 0, 0)); /* bottom-left */ ureg_UADD(ureg, tmp_coord[3], ureg_src(tmp_coord[0]), ureg_imm4u(ureg, 1, 1, 0, 0)); /* bottom-right */ for (i = 0; i < nr_samples; i++) { for (c = 0; c < 4; c++) { /* Read one sample. */ ureg_MOV(ureg, ureg_writemask(tmp_coord[c], TGSI_WRITEMASK_W), ureg_imm1u(ureg, i)); ureg_TXF(ureg, tmp, tgsi_tex, ureg_src(tmp_coord[c]), sampler); if (stype == TGSI_RETURN_TYPE_UINT) ureg_U2F(ureg, tmp, ureg_src(tmp)); else if (stype == TGSI_RETURN_TYPE_SINT) ureg_I2F(ureg, tmp, ureg_src(tmp)); /* Add it to the sum.*/ ureg_ADD(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_src(tmp)); } } /* Calculate the average. */ for (c = 0; c < 4; c++) ureg_MUL(ureg, tmp_sum[c], ureg_src(tmp_sum[c]), ureg_imm1f(ureg, 1.0 / nr_samples)); /* Take the 4 average values and apply a standard bilinear filter. */ ureg_FRC(ureg, tmp, coord); ureg_LRP(ureg, top, ureg_scalar(ureg_src(tmp), 0), ureg_src(tmp_sum[1]), ureg_src(tmp_sum[0])); ureg_LRP(ureg, bottom, ureg_scalar(ureg_src(tmp), 0), ureg_src(tmp_sum[3]), ureg_src(tmp_sum[2])); ureg_LRP(ureg, tmp, ureg_scalar(ureg_src(tmp), 1), ureg_src(bottom), ureg_src(top)); /* Convert to the texture format and return. */ if (stype == TGSI_RETURN_TYPE_UINT) ureg_F2U(ureg, out, ureg_src(tmp)); else if (stype == TGSI_RETURN_TYPE_SINT) ureg_F2I(ureg, out, ureg_src(tmp)); else ureg_MOV(ureg, out, ureg_src(tmp)); ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); }
static void * create_ref_frag_shader(struct vl_mc *r) { const float y_scale = r->buffer_height / 2 * r->macroblock_size / VL_MACROBLOCK_HEIGHT; struct ureg_program *shader; struct ureg_src tc[2], sampler; struct ureg_dst ref, field; struct ureg_dst fragment; unsigned label; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR); sampler = ureg_DECL_sampler(shader, 0); ref = ureg_DECL_temporary(shader); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); field = calc_line(shader); /* * ref = field.z ? tc[1] : tc[0] * * // Adjust tc acording to top/bottom field selection * if (|ref.z|) { * ref.y *= y_scale * ref.y = floor(ref.y) * ref.y += ref.z * ref.y /= y_scale * } * fragment.xyz = tex(ref, sampler[0]) */ ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[1], tc[0]); ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[1], tc[0]); ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label); ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref), ureg_imm1f(shader, y_scale)); ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref)); ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z)); ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale)); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler); ureg_release_temporary(shader, ref); ureg_release_temporary(shader, field); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
static void * create_frag_shader_weave(struct vl_compositor *c) { struct ureg_program *shader; struct ureg_src i_tc[2]; struct ureg_src csc[3]; struct ureg_src sampler[3]; struct ureg_dst t_tc[2]; struct ureg_dst t_texel[2]; struct ureg_dst o_fragment; unsigned i, j; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return false; i_tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR); i_tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR); for (i = 0; i < 3; ++i) { csc[i] = ureg_DECL_constant(shader, i); sampler[i] = ureg_DECL_sampler(shader, i); } for (i = 0; i < 2; ++i) { t_tc[i] = ureg_DECL_temporary(shader); t_texel[i] = ureg_DECL_temporary(shader); } o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* calculate the texture offsets * t_tc.x = i_tc.x * t_tc.y = (round(i_tc.y - 0.5) + 0.5) / height * 2 */ for (i = 0; i < 2; ++i) { ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_X), i_tc[i]); ureg_SUB(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), i_tc[i], ureg_imm1f(shader, 0.5f)); ureg_ROUND(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i])); ureg_MOV(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_W), ureg_imm1f(shader, i ? 1.0f : 0.0f)); ureg_ADD(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_YZ), ureg_src(t_tc[i]), ureg_imm1f(shader, 0.5f)); ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Y), ureg_src(t_tc[i]), ureg_scalar(i_tc[0], TGSI_SWIZZLE_W)); ureg_MUL(shader, ureg_writemask(t_tc[i], TGSI_WRITEMASK_Z), ureg_src(t_tc[i]), ureg_scalar(i_tc[1], TGSI_SWIZZLE_W)); } /* fetch the texels * texel[0..1].x = tex(t_tc[0..1][0]) * texel[0..1].y = tex(t_tc[0..1][1]) * texel[0..1].z = tex(t_tc[0..1][2]) */ for (i = 0; i < 2; ++i) for (j = 0; j < 3; ++j) { struct ureg_src src = ureg_swizzle(ureg_src(t_tc[i]), TGSI_SWIZZLE_X, j ? TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); ureg_TEX(shader, ureg_writemask(t_texel[i], TGSI_WRITEMASK_X << j), TGSI_TEXTURE_2D_ARRAY, src, sampler[j]); } /* calculate linear interpolation factor * factor = |round(i_tc.y) - i_tc.y| * 2 */ ureg_ROUND(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), i_tc[0]); ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), ureg_src(t_tc[0]), ureg_negate(i_tc[0])); ureg_MUL(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_YZ), ureg_abs(ureg_src(t_tc[0])), ureg_imm1f(shader, 2.0f)); ureg_LRP(shader, t_texel[0], ureg_swizzle(ureg_src(t_tc[0]), TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z), ureg_src(t_texel[0]), ureg_src(t_texel[1])); /* and finally do colour space transformation * fragment = csc * texel */ ureg_MOV(shader, ureg_writemask(t_texel[0], TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); for (i = 0; i < 3; ++i) ureg_DP4(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(t_texel[0])); ureg_MOV(shader, ureg_writemask(o_fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); for (i = 0; i < 2; ++i) { ureg_release_temporary(shader, t_texel[i]); ureg_release_temporary(shader, t_tc[i]); } ureg_END(shader); return ureg_create_shader_and_destroy(shader, c->pipe); }
static void * create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert, vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv) { struct ureg_program *shader; struct ureg_src flags; struct ureg_dst tmp; struct ureg_dst fragment; unsigned label; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); tmp = calc_line(shader); /* * if (field == tc.w) * kill(); * else { * fragment.xyz = tex(tc, sampler) * scale + tc.z * fragment.w = 1.0f * } */ ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp)); ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label); ureg_KILL(shader); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ELSE(shader, &label); fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp); if (scale != 1.0f) ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, scale), ureg_scalar(flags, TGSI_SWIZZLE_Z)); else ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z)); ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f)); ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f)); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); ureg_release_temporary(shader, tmp); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
static void * create_vert_shader(struct vl_compositor *c) { struct ureg_program *shader; struct ureg_src vpos, vtex, color; struct ureg_dst tmp; struct ureg_dst o_vpos, o_vtex, o_color; struct ureg_dst o_vtop, o_vbottom; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return false; vpos = ureg_DECL_vs_input(shader, 0); vtex = ureg_DECL_vs_input(shader, 1); color = ureg_DECL_vs_input(shader, 2); tmp = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_color = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, VS_O_COLOR); o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX); o_vtop = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vbottom = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); /* * o_vpos = vpos * o_vtex = vtex * o_color = color */ ureg_MOV(shader, o_vpos, vpos); ureg_MOV(shader, o_vtex, vtex); ureg_MOV(shader, o_color, color); /* * tmp.x = vtex.w / 2 * tmp.y = vtex.w / 4 * * o_vtop.x = vtex.x * o_vtop.y = vtex.y * tmp.x + 0.25f * o_vtop.z = vtex.y * tmp.y + 0.25f * o_vtop.w = 1 / tmp.x * * o_vbottom.x = vtex.x * o_vbottom.y = vtex.y * tmp.x - 0.25f * o_vbottom.z = vtex.y * tmp.y - 0.25f * o_vbottom.w = 1 / tmp.y */ ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.5f)); ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.25f)); ureg_MOV(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_X), vtex); ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 0.25f)); ureg_MAD(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.25f)); ureg_RCP(shader, ureg_writemask(o_vtop, TGSI_WRITEMASK_W), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); ureg_MOV(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_X), vtex); ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Y), ureg_scalar(vtex, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, -0.25f)); ureg_MAD(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, -0.25f)); ureg_RCP(shader, ureg_writemask(o_vbottom, TGSI_WRITEMASK_W), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); ureg_END(shader); return ureg_create_shader_and_destroy(shader, c->pipe); }
static void * create_stage1_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; struct ureg_src vrect, vpos; struct ureg_src scale; struct ureg_dst t_tex, t_start; struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2]; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_tex = ureg_DECL_temporary(shader); t_start = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0); o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1); /* * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) * * t_vpos = vpos + vrect * o_vpos.xy = t_vpos * scale * o_vpos.zw = vpos * * o_l_addr = calc_addr(...) * o_r_addr = calc_addr(...) * */ scale = ureg_imm2f(shader, (float)VL_BLOCK_WIDTH / idct->buffer_width, (float)VL_BLOCK_HEIGHT / idct->buffer_height); ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect); ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex)); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4); calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, VL_BLOCK_WIDTH / 4); ureg_release_temporary(shader, t_tex); ureg_release_temporary(shader, t_start); ureg_END(shader); return ureg_create_shader_and_destroy(shader, idct->pipe); }
static void * create_stage1_frag_shader(struct vl_idct *idct) { struct ureg_program *shader; struct ureg_src l_addr[2], r_addr[2]; struct ureg_dst l[4][2], r[2]; struct ureg_dst *fragment; int i, j; shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) return NULL; fragment = MALLOC(idct->nr_of_render_targets * sizeof(struct ureg_dst)); l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR); r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR); for (i = 0; i < idct->nr_of_render_targets; ++i) fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i); for (i = 0; i < 4; ++i) { l[i][0] = ureg_DECL_temporary(shader); l[i][1] = ureg_DECL_temporary(shader); } r[0] = ureg_DECL_temporary(shader); r[1] = ureg_DECL_temporary(shader); for (i = 0; i < 4; ++i) { increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height); } for (i = 0; i < 4; ++i) { struct ureg_src s_addr[2]; s_addr[0] = ureg_src(l[i][0]); s_addr[1] = ureg_src(l[i][1]); fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false); } for (i = 0; i < idct->nr_of_render_targets; ++i) { struct ureg_src s_addr[2]; increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, VL_BLOCK_HEIGHT); s_addr[0] = ureg_src(r[0]); s_addr[1] = ureg_src(r[1]); fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false); for (j = 0; j < 4; ++j) { matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); } } for (i = 0; i < 4; ++i) { ureg_release_temporary(shader, l[i][0]); ureg_release_temporary(shader, l[i][1]); } ureg_release_temporary(shader, r[0]); ureg_release_temporary(shader, r[1]); ureg_END(shader); FREE(fragment); return ureg_create_shader_and_destroy(shader, idct->pipe); }