static void * create_ref_vert_shader(struct vl_mc *r) { struct ureg_program *shader; struct ureg_src mv_scale; struct ureg_src vmv[2]; struct ureg_dst t_vpos; struct ureg_dst o_vmv[2]; unsigned i; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP); vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM); t_vpos = calc_position(r, shader, ureg_imm2f(shader, (float)VL_MACROBLOCK_WIDTH / r->buffer_width, (float)VL_MACROBLOCK_HEIGHT / r->buffer_height) ); o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP); o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM); /* * mv_scale.xy = 0.5 / (dst.width, dst.height); * mv_scale.z = 1.0f / 4.0f * mv_scale.w = 1.0f / 255.0f * * // Apply motion vectors * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos * o_vmv[0..1].zw = vmv[0..1] * mv_scale * */ mv_scale = ureg_imm4f(shader, 0.5f / r->buffer_width, 0.5f / r->buffer_height, 1.0f / 4.0f, 1.0f / PIPE_VIDEO_MV_WEIGHT_MAX); for (i = 0; i < 2; ++i) { ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos)); ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]); } ureg_release_temporary(shader, t_vpos); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
static void * create_frag_shader(struct vl_matrix_filter *filter, unsigned num_offsets, struct vertex2f *offsets, const float *matrix_values) { struct ureg_program *shader; struct ureg_src i_vtex; struct ureg_src sampler; struct ureg_dst tmp; struct ureg_dst t_sum; struct ureg_dst o_fragment; unsigned i; shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) { return NULL; } i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); sampler = ureg_DECL_sampler(shader, 0); ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); tmp = ureg_DECL_temporary(shader); t_sum = ureg_DECL_temporary(shader); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); ureg_MOV(shader, t_sum, ureg_imm1f(shader, 0.0f)); for (i = 0; i < num_offsets; ++i) { if (matrix_values[i] == 0.0f) continue; if (!is_vec_zero(offsets[i])) { ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), i_vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y)); ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 0.0f)); ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, ureg_src(tmp), sampler); } else { ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, i_vtex, sampler); } ureg_MAD(shader, t_sum, ureg_src(tmp), ureg_imm1f(shader, matrix_values[i]), ureg_src(t_sum)); } ureg_MOV(shader, o_fragment, ureg_src(t_sum)); ureg_END(shader); return ureg_create_shader_and_destroy(shader, filter->pipe); }
static void * create_mismatch_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; struct ureg_src vpos; struct ureg_src scale; struct ureg_dst t_tex; struct ureg_dst o_vpos, o_addr[2]; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_tex = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); /* * scale = (VL_BLOCK_WIDTH, VL_BLOCK_HEIGHT) / (dst.width, dst.height) * * t_vpos = vpos + 7 / VL_BLOCK_WIDTH * o_vpos.xy = t_vpos * scale * * o_addr = calc_addr(...) * */ scale = ureg_imm2f(shader, (float)VL_BLOCK_WIDTH / idct->buffer_width, (float)VL_BLOCK_HEIGHT / idct->buffer_height); ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale); calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4); ureg_release_temporary(shader, t_tex); ureg_END(shader); return ureg_create_shader_and_destroy(shader, idct->pipe); }
void vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader, unsigned first_output, struct ureg_dst tex) { struct ureg_src vrect, vpos; struct ureg_src scale; struct ureg_dst t_start; struct ureg_dst o_l_addr[2], o_r_addr[2]; vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_start = ureg_DECL_temporary(shader); --first_output; o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0); o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1); o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0); o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1); scale = ureg_imm2f(shader, (float)VL_BLOCK_WIDTH / idct->buffer_width, (float)VL_BLOCK_HEIGHT / idct->buffer_height); ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z), ureg_scalar(vrect, TGSI_SWIZZLE_X), ureg_imm1f(shader, VL_BLOCK_WIDTH / idct->nr_of_render_targets)); ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale); calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, VL_BLOCK_WIDTH / 4); calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4); ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex)); ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex)); }
static void * create_vert_shader(struct vl_zscan *zscan) { struct ureg_program *shader; struct ureg_src scale; struct ureg_src vrect, vpos, block_num; struct ureg_dst tmp; struct ureg_dst o_vpos; struct ureg_dst *o_vtex; unsigned i; shader = ureg_create(PIPE_SHADER_VERTEX); if (!shader) return NULL; o_vtex = MALLOC(zscan->num_channels * sizeof(struct ureg_dst)); scale = ureg_imm2f(shader, (float)VL_BLOCK_WIDTH / zscan->buffer_width, (float)VL_BLOCK_HEIGHT / zscan->buffer_height); vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM); tmp = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); for (i = 0; i < zscan->num_channels; ++i) o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i); /* * o_vpos.xy = (vpos + vrect) * scale * o_vpos.zw = 1.0f * * tmp.xy = InstanceID / blocks_per_line * tmp.x = frac(tmp.x) * tmp.y = floor(tmp.y) * * o_vtex.x = vrect.x / blocks_per_line + tmp.x * o_vtex.y = vrect.y * o_vtex.z = tmp.z * blocks_per_line / blocks_total */ ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect); ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp)); for (i = 0; i < zscan->num_channels; ++i) { ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH) * (i - (signed)zscan->num_channels / 2))); ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos); ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp), ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); } ureg_release_temporary(shader, tmp); ureg_END(shader); FREE(o_vtex); return ureg_create_shader_and_destroy(shader, zscan->pipe); }
static void * create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv) { struct ureg_program *shader; struct ureg_src vrect, vpos; struct ureg_dst t_vpos, t_vtex; struct ureg_dst o_vpos, o_flags; struct vertex2f scale = { (float)VL_BLOCK_WIDTH / r->buffer_width * VL_MACROBLOCK_WIDTH / r->macroblock_size, (float)VL_BLOCK_HEIGHT / r->buffer_height * VL_MACROBLOCK_HEIGHT / r->macroblock_size }; unsigned label; shader = ureg_create(TGSI_PROCESSOR_VERTEX); if (!shader) return NULL; vrect = ureg_DECL_vs_input(shader, VS_I_RECT); vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y)); t_vtex = ureg_DECL_temporary(shader); o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS); /* * o_vtex.xy = t_vpos * o_flags.z = intra * 0.5 * * if(interlaced) { * t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 } * t_vtex.z = vpos.y % 2 * t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y * o_vpos.y = t_vtex.y + t_vpos.y * * o_flags.w = t_vtex.z ? 0 : 1 * } * */ vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos); ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z), ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f)); ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f)); if (r->macroblock_size == VL_MACROBLOCK_HEIGHT) { //TODO ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label); ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)), ureg_imm2f(shader, 0.0f, scale.y), ureg_imm2f(shader, -scale.y, 0.0f)); ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f)); ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex)); ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y)); ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y), ureg_src(t_vpos), ureg_src(t_vtex)); ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)), ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f)); ureg_fixup_label(shader, label, ureg_get_instruction_number(shader)); ureg_ENDIF(shader); } ureg_release_temporary(shader, t_vtex); ureg_release_temporary(shader, t_vpos); ureg_END(shader); return ureg_create_shader_and_destroy(shader, r->pipe); }
static void * create_frag_shader(struct vl_median_filter *filter, struct vertex2f *offsets, unsigned num_offsets) { struct pipe_screen *screen = filter->pipe->screen; struct ureg_program *shader; struct ureg_src i_vtex; struct ureg_src sampler; struct ureg_dst *t_array = MALLOC(sizeof(struct ureg_dst) * num_offsets); struct ureg_dst o_fragment; const unsigned median = num_offsets >> 1; unsigned i, j; assert(num_offsets & 1); /* we need an odd number of offsets */ if (!(num_offsets & 1)) { /* yeah, we REALLY need an odd number of offsets!!! */ FREE(t_array); return NULL; } if (num_offsets > screen->get_shader_param( screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS)) { FREE(t_array); return NULL; } shader = ureg_create(PIPE_SHADER_FRAGMENT); if (!shader) { FREE(t_array); return NULL; } i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX, TGSI_INTERPOLATE_LINEAR); sampler = ureg_DECL_sampler(shader, 0); ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT, TGSI_RETURN_TYPE_FLOAT); for (i = 0; i < num_offsets; ++i) t_array[i] = ureg_DECL_temporary(shader); o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); /* * t_array[0..*] = vtex + offset[0..*] * t_array[0..*] = tex(t_array[0..*], sampler) * result = partial_bubblesort(t_array)[mid] */ for (i = 0; i < num_offsets; ++i) { if (!is_vec_zero(offsets[i])) { ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY), i_vtex, ureg_imm2f(shader, offsets[i].x, offsets[i].y)); ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 0.0f)); } } for (i = 0; i < num_offsets; ++i) { struct ureg_src src = is_vec_zero(offsets[i]) ? i_vtex : ureg_src(t_array[i]); ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, src, sampler); } // TODO: Couldn't this be improved even more? for (i = 0; i <= median; ++i) { for (j = 1; j < (num_offsets - i - 1); ++j) { struct ureg_dst tmp = ureg_DECL_temporary(shader); ureg_MOV(shader, tmp, ureg_src(t_array[j])); ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); ureg_MIN(shader, t_array[j - 1], ureg_src(tmp), ureg_src(t_array[j - 1])); ureg_release_temporary(shader, tmp); } if (i == median) ureg_MAX(shader, t_array[j], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); else ureg_MIN(shader, t_array[j - 1], ureg_src(t_array[j]), ureg_src(t_array[j - 1])); } ureg_MOV(shader, o_fragment, ureg_src(t_array[median])); ureg_END(shader); FREE(t_array); return ureg_create_shader_and_destroy(shader, filter->pipe); }