Example #1
0
static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
                           const int16_t **src, uint8_t *dest, int dstW,
                           const uint8_t *dither, int offset)
{
    if(((int)dest) & 15){
        yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
        return;
    }
    if (offset) {
        __asm__ volatile("movq       (%0), %%xmm3\n\t"
                         "movdqa    %%xmm3, %%xmm4\n\t"
                         "psrlq       $24, %%xmm3\n\t"
                         "psllq       $40, %%xmm4\n\t"
                         "por       %%xmm4, %%xmm3\n\t"
                         :: "r"(dither)
                         );
    } else {
Example #2
0
static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
                           const int16_t **src, uint8_t *dest, int dstW,
                           const uint8_t *dither, int offset)
{
    if(((uintptr_t)dest) & 15){
        yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset);
        return;
    }
    filterSize--;
#define MAIN_FUNCTION \
        "pxor       %%xmm0, %%xmm0 \n\t" \
        "punpcklbw  %%xmm0, %%xmm3 \n\t" \
        "movd           %4, %%xmm1 \n\t" \
        "punpcklwd  %%xmm1, %%xmm1 \n\t" \
        "punpckldq  %%xmm1, %%xmm1 \n\t" \
        "punpcklqdq %%xmm1, %%xmm1 \n\t" \
        "psllw          $3, %%xmm1 \n\t" \
        "paddw      %%xmm1, %%xmm3 \n\t" \
        "psraw          $4, %%xmm3 \n\t" \
        "movdqa     %%xmm3, %%xmm4 \n\t" \
        "movdqa     %%xmm3, %%xmm7 \n\t" \
        "movl           %3, %%ecx  \n\t" \
        "mov                                 %0, %%"FF_REG_d"        \n\t"\
        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\
        ".p2align                             4             \n\t" /* FIXME Unroll? */\
        "1:                                                 \n\t"\
        "movddup                  8(%%"FF_REG_d"), %%xmm0   \n\t" /* filterCoeff */\
        "movdqa              (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
        "movdqa            16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
        "add                                $16, %%"FF_REG_d"        \n\t"\
        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\
        "test                         %%"FF_REG_S", %%"FF_REG_S"     \n\t"\
        "pmulhw                           %%xmm0, %%xmm2      \n\t"\
        "pmulhw                           %%xmm0, %%xmm5      \n\t"\
        "paddw                            %%xmm2, %%xmm3      \n\t"\
        "paddw                            %%xmm5, %%xmm4      \n\t"\
        " jnz                                1b             \n\t"\
        "psraw                               $3, %%xmm3      \n\t"\
        "psraw                               $3, %%xmm4      \n\t"\
        "packuswb                         %%xmm4, %%xmm3      \n\t"\
        "movntdq                          %%xmm3, (%1, %%"FF_REG_c") \n\t"\
        "add                         $16, %%"FF_REG_c"        \n\t"\
        "cmp                          %2, %%"FF_REG_c"        \n\t"\
        "movdqa                   %%xmm7, %%xmm3            \n\t" \
        "movdqa                   %%xmm7, %%xmm4            \n\t" \
        "mov                                 %0, %%"FF_REG_d"        \n\t"\
        "mov                        (%%"FF_REG_d"), %%"FF_REG_S"     \n\t"\
        "jb                                  1b             \n\t"

    if (offset) {
        __asm__ volatile(
            "movq          %5, %%xmm3  \n\t"
            "movdqa    %%xmm3, %%xmm4  \n\t"
            "psrlq        $24, %%xmm3  \n\t"
            "psllq        $40, %%xmm4  \n\t"
            "por       %%xmm4, %%xmm3  \n\t"
            MAIN_FUNCTION
              :: "g" (filter),
              "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
              "m"(filterSize), "m"(((uint64_t *) dither)[0])
              : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
                "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
              );
    } else {