예제 #1
0
파일: filters_msa.c 프로젝트: 1vanK/Urho3D
static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
                                            const uint8_t* ppred,
                                            uint8_t* poutput, int stride,
                                            int size) {
  int w;
  const v16i8 zero = { 0 };
  while (size >= 16) {
    v16u8 pred0, dst0;
    v8i16 a0, a1, b0, b1, c0, c1;
    const v16u8 tmp0 = LD_UB(ppred - 1);
    const v16u8 tmp1 = LD_UB(ppred - stride);
    const v16u8 tmp2 = LD_UB(ppred - stride - 1);
    const v16u8 src0 = LD_UB(pinput);
    ILVRL_B2_SH(zero, tmp0, a0, a1);
    ILVRL_B2_SH(zero, tmp1, b0, b1);
    ILVRL_B2_SH(zero, tmp2, c0, c1);
    ADD2(a0, b0, a1, b1, a0, a1);
    SUB2(a0, c0, a1, c1, a0, a1);
    CLIP_SH2_0_255(a0, a1);
    pred0 = (v16u8)__msa_pckev_b((v16i8)a1, (v16i8)a0);
    dst0 = src0 - pred0;
    ST_UB(dst0, poutput);
    ppred += 16;
    pinput += 16;
    poutput += 16;
    size -= 16;
  }
  for (w = 0; w < size; ++w) {
    const int pred = ppred[w - 1] + ppred[w - stride] - ppred[w - stride - 1];
    poutput[w] = pinput[w] - (pred < 0 ? 0 : pred > 255 ? 255 : pred);
  }
}
예제 #2
0
static void hevc_addblk_4x4_msa(int16_t *coeffs, uint8_t *dst, int32_t stride)
{
    uint32_t dst0, dst1, dst2, dst3;
    v8i16 dst_r0, dst_l0, in0, in1;
    v4i32 dst_vec = { 0 };
    v16u8 zeros = { 0 };

    LD_SH2(coeffs, 8, in0, in1);
    LW4(dst, stride, dst0, dst1, dst2, dst3);
    INSERT_W4_SW(dst0, dst1, dst2, dst3, dst_vec);
    ILVRL_B2_SH(zeros, dst_vec, dst_r0, dst_l0);
    ADD2(dst_r0, in0, dst_l0, in1, dst_r0, dst_l0);
    CLIP_SH2_0_255(dst_r0, dst_l0);
    dst_vec = (v4i32) __msa_pckev_b((v16i8) dst_l0, (v16i8) dst_r0);
    ST4x4_UB(dst_vec, dst_vec, 0, 1, 2, 3, dst, stride);
}
예제 #3
0
파일: dct-c.c 프로젝트: 0x0B501E7E/x264
static void avc_idct4x4_addblk_dc_msa( uint8_t *p_dst, int16_t *p_src,
                                       int32_t i_dst_stride )
{
    int16_t i_dc;
    uint32_t i_src0, i_src1, i_src2, i_src3;
    v16u8 pred = { 0 };
    v16i8 out;
    v8i16 input_dc, pred_r, pred_l;

    i_dc = ( p_src[0] + 32 ) >> 6;
    input_dc = __msa_fill_h( i_dc );
    p_src[ 0 ] = 0;

    LW4( p_dst, i_dst_stride, i_src0, i_src1, i_src2, i_src3 );
    INSERT_W4_UB( i_src0, i_src1, i_src2, i_src3, pred );
    UNPCK_UB_SH( pred, pred_r, pred_l );

    pred_r += input_dc;
    pred_l += input_dc;

    CLIP_SH2_0_255( pred_r, pred_l );
    out = __msa_pckev_b( ( v16i8 ) pred_l, ( v16i8 ) pred_r );
    ST4x4_UB( out, out, 0, 1, 2, 3, p_dst, i_dst_stride );
}