static void fn(yuv2yuv)(uint8_t *_dst[3], const ptrdiff_t dst_stride[3],
                        uint8_t *_src[3], const ptrdiff_t src_stride[3],
                        int w, int h, const int16_t c[3][3][8],
                        const int16_t yuv_offset[2][8])
{
    opixel **dst = (opixel **) _dst;
    ipixel **src = (ipixel **) _src;
    const ipixel *src0 = src[0], *src1 = src[1], *src2 = src[2];
    opixel *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2];
    int y, x;
    const int sh = 14 + IN_BIT_DEPTH - OUT_BIT_DEPTH;
    const int rnd = 1 << (sh - 1);
    int y_off_in = yuv_offset[0][0];
    int y_off_out = yuv_offset[1][0] << sh;
    const int uv_off_in = 128 << (IN_BIT_DEPTH - 8);
    const int uv_off_out = rnd + (128 << (OUT_BIT_DEPTH - 8 + sh));
    int cyy = c[0][0][0], cyu = c[0][1][0], cyv = c[0][2][0];
    int cuu = c[1][1][0], cuv = c[1][2][0], cvu = c[2][1][0], cvv = c[2][2][0];

    av_assert2(c[1][0][0] == 0);
    av_assert2(c[2][0][0] == 0);
    w = AV_CEIL_RSHIFT(w, SS_W);
    h = AV_CEIL_RSHIFT(h, SS_H);
    for (y = 0; y < h; y++) {
        for (x = 0; x < w; x++) {
            int y00 = src0[x << SS_W] - y_off_in;
#if SS_W == 1
            int y01 = src0[2 * x + 1] - y_off_in;
#if SS_H == 1
            int y10 = src0[src_stride[0] / sizeof(ipixel) + 2 * x] - y_off_in;
            int y11 = src0[src_stride[0] / sizeof(ipixel) + 2 * x + 1] - y_off_in;
#endif
#endif
            int u = src1[x] - uv_off_in, v = src2[x] - uv_off_in;
            int uv_val = cyu * u + cyv * v + rnd + y_off_out;

            dst0[x << SS_W] = av_clip_pixel((cyy * y00 + uv_val) >> sh);
#if SS_W == 1
            dst0[x * 2 + 1] = av_clip_pixel((cyy * y01 + uv_val) >> sh);
#if SS_H == 1
            dst0[x * 2 + 0 + dst_stride[0] / sizeof(opixel)] =
                              av_clip_pixel((cyy * y10 + uv_val) >> sh);
            dst0[x * 2 + 1 + dst_stride[0] / sizeof(opixel)] =
                              av_clip_pixel((cyy * y11 + uv_val) >> sh);
#endif
#endif

            dst1[x] = av_clip_pixel((u * cuu + v * cuv + uv_off_out) >> sh);
            dst2[x] = av_clip_pixel((u * cvu + v * cvv + uv_off_out) >> sh);
        }

        dst0 += (dst_stride[0] * (1 << SS_H)) / sizeof(opixel);
        dst1 += dst_stride[1] / sizeof(opixel);
        dst2 += dst_stride[2] / sizeof(opixel);
        src0 += (src_stride[0] * (1 << SS_H)) / sizeof(ipixel);
        src1 += src_stride[1] / sizeof(ipixel);
        src2 += src_stride[2] / sizeof(ipixel);
    }
}
示例#2
0
static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
        ptrdiff_t dst_stride,
        ptrdiff_t src_stride,
        x86_reg block_w, x86_reg block_h,
        x86_reg src_x, x86_reg src_y,
        x86_reg w, x86_reg h,
        emu_edge_vfix_func * const *vfix_tbl,
        emu_edge_vvar_func *v_extend_var,
        emu_edge_hfix_func * const *hfix_tbl,
        emu_edge_hvar_func *h_extend_var)
{
    x86_reg start_y, start_x, end_y, end_x, src_y_add = 0, p;

    if (!w || !h)
        return;

    av_assert2(block_w <= FFABS(dst_stride));

    if (src_y >= h) {
        src -= src_y*src_stride;
        src_y_add = h - 1;
        src_y     = h - 1;
    } else if (src_y <= -block_h) {
        src -= src_y*src_stride;
        src_y_add = 1 - block_h;
        src_y     = 1 - block_h;
    }
    if (src_x >= w) {
        src   += w - 1 - src_x;
        src_x  = w - 1;
    } else if (src_x <= -block_w) {
        src   += 1 - block_w - src_x;
        src_x  = 1 - block_w;
    }

    start_y = FFMAX(0, -src_y);
    start_x = FFMAX(0, -src_x);
    end_y   = FFMIN(block_h, h-src_y);
    end_x   = FFMIN(block_w, w-src_x);
    av_assert2(start_x < end_x && block_w > 0);
    av_assert2(start_y < end_y && block_h > 0);

    // fill in the to-be-copied part plus all above/below
    src += (src_y_add + start_y) * src_stride + start_x;
    w = end_x - start_x;
    if (w <= 22) {
        vfix_tbl[w - 1](dst + start_x, dst_stride, src, src_stride,
                        start_y, end_y, block_h);
    } else {
        v_extend_var(dst + start_x, dst_stride, src, src_stride,
                     start_y, end_y, block_h, w);
    }

    // fill left
    if (start_x) {
        if (start_x <= 22) {
            hfix_tbl[(start_x - 1) >> 1](dst, dst_stride, start_x, block_h);
        } else {
示例#3
0
static inline void x8_select_ac_table(IntraX8Context * const w , int mode){
  MpegEncContext * const s= w->s;
  int table_index;

  av_assert2(mode<4);

  if( w->j_ac_vlc[mode] ) return;

  table_index = get_bits(&s->gb, 3);
  w->j_ac_vlc[mode] = &j_ac_vlc[w->quant<13][mode>>1][table_index];//2 modes use same tables
  av_assert2(w->j_ac_vlc[mode]);
}
示例#4
0
文件: transform.c 项目: 1c0n/xbmc
int avfilter_transform(const uint8_t *src, uint8_t *dst,
                        int src_stride, int dst_stride,
                        int width, int height, const float *matrix,
                        enum InterpolateMethod interpolate,
                        enum FillMethod fill)
{
    int x, y;
    float x_s, y_s;
    uint8_t def = 0;
    uint8_t (*func)(float, float, const uint8_t *, int, int, int, uint8_t) = NULL;

    switch(interpolate) {
        case INTERPOLATE_NEAREST:
            func = interpolate_nearest;
            break;
        case INTERPOLATE_BILINEAR:
            func = interpolate_bilinear;
            break;
        case INTERPOLATE_BIQUADRATIC:
            func = interpolate_biquadratic;
            break;
        default:
            return AVERROR(EINVAL);
    }

    for (y = 0; y < height; y++) {
        for(x = 0; x < width; x++) {
            x_s = x * matrix[0] + y * matrix[1] + matrix[2];
            y_s = x * matrix[3] + y * matrix[4] + matrix[5];

            switch(fill) {
                case FILL_ORIGINAL:
                    def = src[y * src_stride + x];
                    break;
                case FILL_CLAMP:
                    y_s = av_clipf(y_s, 0, height - 1);
                    x_s = av_clipf(x_s, 0, width - 1);
                    def = src[(int)y_s * src_stride + (int)x_s];
                    break;
                case FILL_MIRROR:
                    x_s = mirror(x_s,  width-1);
                    y_s = mirror(y_s, height-1);

                    av_assert2(x_s >= 0 && y_s >= 0);
                    av_assert2(x_s < width && y_s < height);
                    def = src[(int)y_s * src_stride + (int)x_s];
            }

            dst[y * dst_stride + x] = func(x_s, y_s, src, width, height, src_stride, def);
        }
    }
    return 0;
}
示例#5
0
文件: resample.c 项目: cmtsij/FFmpeg
/**
 * 0th order modified bessel function of the first kind.
 */
static double bessel(double x){
    double v=1;
    double lastv=0;
    double t=1;
    int i;
    static const double inv[100]={
 1.0/( 1* 1), 1.0/( 2* 2), 1.0/( 3* 3), 1.0/( 4* 4), 1.0/( 5* 5), 1.0/( 6* 6), 1.0/( 7* 7), 1.0/( 8* 8), 1.0/( 9* 9), 1.0/(10*10),
 1.0/(11*11), 1.0/(12*12), 1.0/(13*13), 1.0/(14*14), 1.0/(15*15), 1.0/(16*16), 1.0/(17*17), 1.0/(18*18), 1.0/(19*19), 1.0/(20*20),
 1.0/(21*21), 1.0/(22*22), 1.0/(23*23), 1.0/(24*24), 1.0/(25*25), 1.0/(26*26), 1.0/(27*27), 1.0/(28*28), 1.0/(29*29), 1.0/(30*30),
 1.0/(31*31), 1.0/(32*32), 1.0/(33*33), 1.0/(34*34), 1.0/(35*35), 1.0/(36*36), 1.0/(37*37), 1.0/(38*38), 1.0/(39*39), 1.0/(40*40),
 1.0/(41*41), 1.0/(42*42), 1.0/(43*43), 1.0/(44*44), 1.0/(45*45), 1.0/(46*46), 1.0/(47*47), 1.0/(48*48), 1.0/(49*49), 1.0/(50*50),
 1.0/(51*51), 1.0/(52*52), 1.0/(53*53), 1.0/(54*54), 1.0/(55*55), 1.0/(56*56), 1.0/(57*57), 1.0/(58*58), 1.0/(59*59), 1.0/(60*60),
 1.0/(61*61), 1.0/(62*62), 1.0/(63*63), 1.0/(64*64), 1.0/(65*65), 1.0/(66*66), 1.0/(67*67), 1.0/(68*68), 1.0/(69*69), 1.0/(70*70),
 1.0/(71*71), 1.0/(72*72), 1.0/(73*73), 1.0/(74*74), 1.0/(75*75), 1.0/(76*76), 1.0/(77*77), 1.0/(78*78), 1.0/(79*79), 1.0/(80*80),
 1.0/(81*81), 1.0/(82*82), 1.0/(83*83), 1.0/(84*84), 1.0/(85*85), 1.0/(86*86), 1.0/(87*87), 1.0/(88*88), 1.0/(89*89), 1.0/(90*90),
 1.0/(91*91), 1.0/(92*92), 1.0/(93*93), 1.0/(94*94), 1.0/(95*95), 1.0/(96*96), 1.0/(97*97), 1.0/(98*98), 1.0/(99*99), 1.0/(10000)
    };

    x= x*x/4;
    for(i=0; v != lastv; i++){
        lastv=v;
        t *= x*inv[i];
        v += t;
        av_assert2(i<99);
    }
    return v;
}
示例#6
0
static int tm2_read_deltas(TM2Context *ctx, int stream_id)
{
    int d, mb;
    int i, v;

    d  = get_bits(&ctx->gb, 9);
    mb = get_bits(&ctx->gb, 5);

    av_assert2(mb < 32);
    if ((d < 1) || (d > TM2_DELTAS) || (mb < 1)) {
        av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect delta table: %i deltas x %i bits\n", d, mb);
        return AVERROR_INVALIDDATA;
    }

    for (i = 0; i < d; i++) {
        v = get_bits_long(&ctx->gb, mb);
        if (v & (1 << (mb - 1)))
            ctx->deltas[stream_id][i] = v - (1 << mb);
        else
            ctx->deltas[stream_id][i] = v;
    }
    for (; i < TM2_DELTAS; i++)
        ctx->deltas[stream_id][i] = 0;

    return 0;
}
示例#7
0
int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int *consumed, int src_size, int dst_size, int update_ctx){
    int dst_index, i;
    int index= c->index;
    int frac= c->frac;
    int dst_incr_frac= c->dst_incr % c->src_incr;
    int dst_incr=      c->dst_incr / c->src_incr;

    av_assert1(c->filter_shift == FILTER_SHIFT);
    av_assert1(c->felem_size == sizeof(FELEM));

    if (c->filter_length == 1 && c->phase_shift == 0) {
        int64_t index2= (1LL<<32)*c->frac/c->src_incr + (1LL<<32)*index;
        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
        int new_size = (src_size * (int64_t)c->src_incr - frac + c->dst_incr - 1) / c->dst_incr;

        dst_size= FFMIN(dst_size, new_size);

        for(dst_index=0; dst_index < dst_size; dst_index++){
            dst[dst_index] = src[index2>>32];
            index2 += incr;
        }
        index += dst_index * dst_incr;
        index += (frac + dst_index * (int64_t)dst_incr_frac) / c->src_incr;
        frac   = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr;
        av_assert2(index >= 0);
        *consumed= index;
        index = 0;
    } else if (index >= 0 &&
示例#8
0
static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
                                  int16_t *block, int n, int qscale)
{
    int level, qmul, qadd;
    int nCoeffs;

    av_assert2(s->block_last_index[n]>=0);

    qmul = qscale << 1;

    if (!s->h263_aic) {
        if (n < 4)
            level = block[0] * s->y_dc_scale;
        else
            level = block[0] * s->c_dc_scale;
        qadd = (qscale - 1) | 1;
    }else{
        qadd = 0;
        level = block[0];
    }
    if(s->ac_pred)
        nCoeffs=63;
    else
        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];

    ff_dct_unquantize_h263_armv5te(block, qmul, qadd, nCoeffs + 1);
    block[0] = level;
}
int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int *consumed, int src_size, int dst_size, int update_ctx){
    int dst_index, i;
    int index= c->index;
    int frac= c->frac;
    int dst_incr_frac= c->dst_incr % c->src_incr;
    int dst_incr=      c->dst_incr / c->src_incr;
    int compensation_distance= c->compensation_distance;

    av_assert1(c->filter_shift == FILTER_SHIFT);
    av_assert1(c->felem_size == sizeof(FELEM));

    if(compensation_distance == 0 && c->filter_length == 1 && c->phase_shift==0){
        int64_t index2= ((int64_t)index)<<32;
        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
        dst_size= FFMIN(dst_size, (src_size-1-index) * (int64_t)c->src_incr / c->dst_incr);

        for(dst_index=0; dst_index < dst_size; dst_index++){
            dst[dst_index] = src[index2>>32];
            index2 += incr;
        }
        index += dst_index * dst_incr;
        index += (frac + dst_index * (int64_t)dst_incr_frac) / c->src_incr;
        frac   = (frac + dst_index * (int64_t)dst_incr_frac) % c->src_incr;
        av_assert2(index >= 0);
        *consumed= index >> c->phase_shift;
        index &= c->phase_mask;
    }else if(compensation_distance == 0 && !c->linear && index >= 0){
示例#10
0
文件: ra288.c 项目: AddictXQ/FFmpeg
/**
 * Hybrid window filtering, see blocks 36 and 49 of the G.728 specification.
 *
 * @param order   filter order
 * @param n       input length
 * @param non_rec number of non-recursive samples
 * @param out     filter output
 * @param hist    pointer to the input history of the filter
 * @param out     pointer to the non-recursive part of the output
 * @param out2    pointer to the recursive part of the output
 * @param window  pointer to the windowing function table
 */
static void do_hybrid_window(RA288Context *ractx,
                             int order, int n, int non_rec, float *out,
                             float *hist, float *out2, const float *window)
{
    int i;
    float buffer1[MAX_BACKWARD_FILTER_ORDER + 1];
    float buffer2[MAX_BACKWARD_FILTER_ORDER + 1];
    LOCAL_ALIGNED(32, float, work, [FFALIGN(MAX_BACKWARD_FILTER_ORDER +
                                            MAX_BACKWARD_FILTER_LEN   +
                                            MAX_BACKWARD_FILTER_NONREC, 16)]);

    av_assert2(order>=0);

    ractx->fdsp->vector_fmul(work, window, hist, FFALIGN(order + n + non_rec, 16));

    convolve(buffer1, work + order    , n      , order);
    convolve(buffer2, work + order + n, non_rec, order);

    for (i=0; i <= order; i++) {
        out2[i] = out2[i] * 0.5625 + buffer1[i];
        out [i] = out2[i]          + buffer2[i];
    }

    /* Multiply by the white noise correcting factor (WNCF). */
    *out *= 257.0 / 256.0;
}
示例#11
0
文件: aviobuf.c 项目: jskew/FFmpeg
void avio_w8(AVIOContext *s, int b)
{
    av_assert2(b>=-128 && b<=255);
    *s->buf_ptr++ = b;
    if (s->buf_ptr >= s->buf_end)
        flush_buffer(s);
}
示例#12
0
static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
                                  int16_t *block, int n, int qscale)
{
    x86_reg qmul, qadd, nCoeffs;

    qmul = qscale << 1;
    qadd = (qscale - 1) | 1;

    av_assert2(s->block_last_index[n]>=0 || s->h263_aic);

    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];

__asm__ volatile(
                "movd %1, %%mm6                 \n\t" //qmul
                "packssdw %%mm6, %%mm6          \n\t"
                "packssdw %%mm6, %%mm6          \n\t"
                "movd %2, %%mm5                 \n\t" //qadd
                "pxor %%mm7, %%mm7              \n\t"
                "packssdw %%mm5, %%mm5          \n\t"
                "packssdw %%mm5, %%mm5          \n\t"
                "psubw %%mm5, %%mm7             \n\t"
                "pxor %%mm4, %%mm4              \n\t"
                ".p2align 4                     \n\t"
                "1:                             \n\t"
                "movq (%0, %3), %%mm0           \n\t"
                "movq 8(%0, %3), %%mm1          \n\t"

                "pmullw %%mm6, %%mm0            \n\t"
                "pmullw %%mm6, %%mm1            \n\t"

                "movq (%0, %3), %%mm2           \n\t"
                "movq 8(%0, %3), %%mm3          \n\t"

                "pcmpgtw %%mm4, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
                "pcmpgtw %%mm4, %%mm3           \n\t" // block[i] < 0 ? -1 : 0

                "pxor %%mm2, %%mm0              \n\t"
                "pxor %%mm3, %%mm1              \n\t"

                "paddw %%mm7, %%mm0             \n\t"
                "paddw %%mm7, %%mm1             \n\t"

                "pxor %%mm0, %%mm2              \n\t"
                "pxor %%mm1, %%mm3              \n\t"

                "pcmpeqw %%mm7, %%mm0           \n\t" // block[i] == 0 ? -1 : 0
                "pcmpeqw %%mm7, %%mm1           \n\t" // block[i] == 0 ? -1 : 0

                "pandn %%mm2, %%mm0             \n\t"
                "pandn %%mm3, %%mm1             \n\t"

                "movq %%mm0, (%0, %3)           \n\t"
                "movq %%mm1, 8(%0, %3)          \n\t"

                "add $16, %3                    \n\t"
                "jng 1b                         \n\t"
                ::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
                : "memory"
        );
}
示例#13
0
文件: lzwenc.c 项目: 0Soul/FFmpeg
/**
 * Hash function adding character
 * @param head LZW code for prefix
 * @param add Character to add
 * @return New hash value
 */
static inline int hash(int head, const int add)
{
    head ^= (add << LZW_HASH_SHIFT);
    if (head >= LZW_HASH_SIZE)
        head -= LZW_HASH_SIZE;
    av_assert2(head >= 0 && head < LZW_HASH_SIZE);
    return head;
}
示例#14
0
文件: fifo.c 项目: jicky1984/FFmpeg
/** Discard data from the FIFO. */
void av_fifo_drain(AVFifoBuffer *f, int size)
{
    av_assert2(av_fifo_size(f) >= size);
    f->rptr += size;
    if (f->rptr >= f->end)
        f->rptr -= f->end - f->buffer;
    f->rndx += size;
}
示例#15
0
int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len)
{
    int ch;
    int off=0;
    const int os= (out->planar ? 1 :out->ch_count) *out->bps;
    unsigned misaligned = 0;

    av_assert0(ctx->channels == out->ch_count);

    if (ctx->in_simd_align_mask) {
        int planes = in->planar ? in->ch_count : 1;
        unsigned m = 0;
        for (ch = 0; ch < planes; ch++)
            m |= (intptr_t)in->ch[ch];
        misaligned |= m & ctx->in_simd_align_mask;
    }
    if (ctx->out_simd_align_mask) {
        int planes = out->planar ? out->ch_count : 1;
        unsigned m = 0;
        for (ch = 0; ch < planes; ch++)
            m |= (intptr_t)out->ch[ch];
        misaligned |= m & ctx->out_simd_align_mask;
    }

    //FIXME optimize common cases

    if(ctx->simd_f && !ctx->ch_map && !misaligned){
        off = len&~15;
        av_assert1(off>=0);
        av_assert1(off<=len);
        av_assert2(ctx->channels == SWR_CH_MAX || !in->ch[ctx->channels]);
        if(off>0){
            if(out->planar == in->planar){
                int planes = out->planar ? out->ch_count : 1;
                for(ch=0; ch<planes; ch++){
                    ctx->simd_f(out->ch+ch, (const uint8_t **)in->ch+ch, off * (out->planar ? 1 :out->ch_count));
                }
            }else{
                ctx->simd_f(out->ch, (const uint8_t **)in->ch, off);
            }
        }
        if(off == len)
            return 0;
    }

    for(ch=0; ch<ctx->channels; ch++){
        const int ich= ctx->ch_map ? ctx->ch_map[ch] : ch;
        const int is= ich < 0 ? 0 : (in->planar ? 1 : in->ch_count) * in->bps;
        const uint8_t *pi= ich < 0 ? ctx->silence : in->ch[ich];
        uint8_t       *po= out->ch[ch];
        uint8_t *end= po + os*len;
        if(!po)
            continue;
        ctx->conv_f(po+off*os, pi+off*is, is, os, end);
    }
    return 0;
}
示例#16
0
int av_reduce(int *dst_num, int *dst_den,
              int64_t num, int64_t den, int64_t max)
{
    AVRational a0 = { 0, 1 }, a1 = { 1, 0 };
    int sign = (num < 0) ^ (den < 0);
    int64_t gcd = av_gcd(FFABS(num), FFABS(den));

    if (gcd) {
        num = FFABS(num) / gcd;
        den = FFABS(den) / gcd;
    }
    if (num <= max && den <= max) {
        a1 = (AVRational) { num, den };
        den = 0;
    }

    while (den) {
        uint64_t x        = num / den;
        int64_t next_den  = num - den * x;
        int64_t a2n       = x * a1.num + a0.num;
        int64_t a2d       = x * a1.den + a0.den;

        if (a2n > max || a2d > max) {
            if (a1.num) x =          (max - a0.num) / a1.num;
            if (a1.den) x = FFMIN(x, (max - a0.den) / a1.den);

            if (den * (2 * x * a1.den + a0.den) > num * a1.den)
                a1 = (AVRational) { x * a1.num + a0.num, x * a1.den + a0.den };
            break;
        }

        a0  = a1;
        a1  = (AVRational) { a2n, a2d };
        num = den;
        den = next_den;
    }
    av_assert2(av_gcd(a1.num, a1.den) <= 1U);
    av_assert2(a1.num <= max && a1.den <= max);

    *dst_num = sign ? -a1.num : a1.num;
    *dst_den = a1.den;

    return den == 0;
}
示例#17
0
static av_always_inline void emulated_edge_mc(uint8_t *buf, const uint8_t *src,
                                              ptrdiff_t linesize_arg,
                                              int block_w, int block_h,
                                              int src_x, int src_y,
                                              int w, int h,
                                              emu_edge_core_func *core_fn)
{
    int start_y, start_x, end_y, end_x, src_y_add = 0;
    int linesize = linesize_arg;

    if(!w || !h)
        return;

    if (src_y >= h) {
        src -= src_y*linesize;
        src_y_add = h - 1;
        src_y     = h - 1;
    } else if (src_y <= -block_h) {
        src -= src_y*linesize;
        src_y_add = 1 - block_h;
        src_y     = 1 - block_h;
    }
    if (src_x >= w) {
        src   += w - 1 - src_x;
        src_x  = w - 1;
    } else if (src_x <= -block_w) {
        src   += 1 - block_w - src_x;
        src_x  = 1 - block_w;
    }

    start_y = FFMAX(0, -src_y);
    start_x = FFMAX(0, -src_x);
    end_y   = FFMIN(block_h, h-src_y);
    end_x   = FFMIN(block_w, w-src_x);
    av_assert2(start_x < end_x && block_w > 0);
    av_assert2(start_y < end_y && block_h > 0);

    // fill in the to-be-copied part plus all above/below
    src += (src_y_add + start_y) * linesize + start_x;
    buf += start_x;
    core_fn(buf, src, linesize, start_y, end_y,
            block_h, start_x, end_x, block_w);
}
示例#18
0
static int get_qPy_pred(HEVCContext *s, int xC, int yC,
                        int xBase, int yBase, int log2_cb_size)
{
    HEVCLocalContext *lc     = s->HEVClc;
    int ctb_size_mask        = (1 << s->sps->log2_ctb_size) - 1;
    int MinCuQpDeltaSizeMask = (1 << (s->sps->log2_ctb_size -
                                      s->pps->diff_cu_qp_delta_depth)) - 1;
    int xQgBase              = xBase - (xBase & MinCuQpDeltaSizeMask);
    int yQgBase              = yBase - (yBase & MinCuQpDeltaSizeMask);
    int min_cb_width         = s->sps->min_cb_width;
    int x_cb                 = xQgBase >> s->sps->log2_min_cb_size;
    int y_cb                 = yQgBase >> s->sps->log2_min_cb_size;
    int availableA           = (xBase   & ctb_size_mask) &&
                               (xQgBase & ctb_size_mask);
    int availableB           = (yBase   & ctb_size_mask) &&
                               (yQgBase & ctb_size_mask);
    int qPy_pred, qPy_a, qPy_b;

    // qPy_pred
    if (lc->first_qp_group || (!xQgBase && !yQgBase)) {
        lc->first_qp_group = !lc->tu.is_cu_qp_delta_coded;
        qPy_pred = s->sh.slice_qp;
    } else {
        qPy_pred = lc->qPy_pred;
    }

    // qPy_a
    if (availableA == 0)
        qPy_a = qPy_pred;
    else
        qPy_a = s->qp_y_tab[(x_cb - 1) + y_cb * min_cb_width];

    // qPy_b
    if (availableB == 0)
        qPy_b = qPy_pred;
    else
        qPy_b = s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width];

    av_assert2(qPy_a >= -s->sps->qp_bd_offset && qPy_a < 52);
    av_assert2(qPy_b >= -s->sps->qp_bd_offset && qPy_b < 52);

    return (qPy_a + qPy_b + 1) >> 1;
}
ERROR
#endif

void RENAME(swri_noise_shaping)(SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count){
    int pos = s->dither.ns_pos;
    int i, j, ch;
    int taps  = s->dither.ns_taps;
    float S   = s->dither.ns_scale;
    float S_1 = s->dither.ns_scale_1;

    av_assert2((taps&3) != 2);
    av_assert2((taps&3) != 3 || s->dither.ns_coeffs[taps] == 0);

    for (ch=0; ch<srcs->ch_count; ch++) {
        const float *noise = ((const float *)noises->ch[ch]) + s->dither.noise_pos;
        const DELEM *src = (const DELEM*)srcs->ch[ch];
        DELEM *dst = (DELEM*)dsts->ch[ch];
        float *ns_errors = s->dither.ns_errors[ch];
        const float *ns_coeffs = s->dither.ns_coeffs;
        pos  = s->dither.ns_pos;
        for (i=0; i<count; i++) {
            double d1, d = src[i]*S_1;
            for(j=0; j<taps-2; j+=4) {
                d -= ns_coeffs[j    ] * ns_errors[pos + j    ]
                    +ns_coeffs[j + 1] * ns_errors[pos + j + 1]
                    +ns_coeffs[j + 2] * ns_errors[pos + j + 2]
                    +ns_coeffs[j + 3] * ns_errors[pos + j + 3];
            }
            if(j < taps)
                d -= ns_coeffs[j] * ns_errors[pos + j];
            pos = pos ? pos - 1 : taps - 1;
            d1 = rint(d + noise[i]);
            ns_errors[pos + taps] = ns_errors[pos] = d1 - d;
            d1 *= S;
            CLIP(d1);
            dst[i] = d1;
        }
    }

    s->dither.ns_pos = pos;
}
示例#20
0
文件: me_cmp.c 项目: xkfz007/src.x265
static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
                                uint8_t *dummy, ptrdiff_t stride, int h)
{
    int i, temp[64], sum = 0;

    av_assert2(h == 8);

    for (i = 0; i < 8; i++) {
        // FIXME: try pointer walks
        BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
                   src[stride * i + 0], src[stride * i + 1]);
        BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
                   src[stride * i + 2], src[stride * i + 3]);
        BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
                   src[stride * i + 4], src[stride * i + 5]);
        BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
                   src[stride * i + 6], src[stride * i + 7]);

        BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
        BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
        BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
        BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);

        BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
        BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
        BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
        BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
    }

    for (i = 0; i < 8; i++) {
        BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
        BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
        BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
        BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);

        BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
        BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
        BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
        BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);

        sum +=
            BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i])
            + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
            + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
            + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
    }

    sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean

    return sum;
}
示例#21
0
static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s,
                                  int16_t *block, int n, int qscale)
{
    int qmul, qadd;
    int nCoeffs;

    av_assert2(s->block_last_index[n]>=0);

    qadd = (qscale - 1) | 1;
    qmul = qscale << 1;

    nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];

    ff_dct_unquantize_h263_armv5te(block, qmul, qadd, nCoeffs + 1);
}
static void  set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v){
    uint8_t *p;
    if(av_sample_fmt_is_planar(f)){
        f= av_get_alt_sample_fmt(f, 0);
        p= a[ch];
    }else{
        p= a[0];
        index= ch + index*ch_count;
    }
    switch(f){
    case AV_SAMPLE_FMT_U8 : ((uint8_t*)p)[index]= (v+1.0)*255.0/2; break;
    case AV_SAMPLE_FMT_S16: ((int16_t*)p)[index]= v*32767;         break;
    case AV_SAMPLE_FMT_S32: ((int32_t*)p)[index]= v*2147483647;    break;
    case AV_SAMPLE_FMT_FLT: ((float  *)p)[index]= v;               break;
    case AV_SAMPLE_FMT_DBL: ((double *)p)[index]= v;               break;
    default: av_assert2(0);
    }
}
示例#23
0
文件: resample.c 项目: KangLin/FFmpeg
static int swri_resample(ResampleContext *c,
                         uint8_t *dst, const uint8_t *src, int *consumed,
                         int src_size, int dst_size, int update_ctx)
{
    if (c->filter_length == 1 && c->phase_count == 1) {
        int index= c->index;
        int frac= c->frac;
        int64_t index2= (1LL<<32)*c->frac/c->src_incr + (1LL<<32)*index;
        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
        int new_size = (src_size * (int64_t)c->src_incr - frac + c->dst_incr - 1) / c->dst_incr;

        dst_size= FFMIN(dst_size, new_size);
        c->dsp.resample_one(dst, src, dst_size, index2, incr);

        index += dst_size * c->dst_incr_div;
        index += (frac + dst_size * (int64_t)c->dst_incr_mod) / c->src_incr;
        av_assert2(index >= 0);
        *consumed= index;
        if (update_ctx) {
            c->frac   = (frac + dst_size * (int64_t)c->dst_incr_mod) % c->src_incr;
            c->index = 0;
        }
    } else {
        int64_t end_index = (1LL + src_size - c->filter_length) * c->phase_count;
        int64_t delta_frac = (end_index - c->index) * c->src_incr - c->frac;
        int delta_n = (delta_frac + c->dst_incr - 1) / c->dst_incr;

        dst_size = FFMIN(dst_size, delta_n);
        if (dst_size > 0) {
            /* resample_linear and resample_common should have same behavior
             * when frac and dst_incr_mod are zero */
            if (c->linear && (c->frac || c->dst_incr_mod))
                *consumed = c->dsp.resample_linear(c, dst, src, dst_size, update_ctx);
            else
                *consumed = c->dsp.resample_common(c, dst, src, dst_size, update_ctx);
        } else {
            *consumed = 0;
        }
    }

    return dst_size;
}
示例#24
0
static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
{
    int i;

    for (i = 0; i < nb_coefs; i++) {
        int e;
        int v = abs(coef[i]);
        if (v == 0)
            e = 24;
        else {
            e = 23 - av_log2(v);
            if (e >= 24) {
                e = 24;
                coef[i] = 0;
            }
            av_assert2(e >= 0);
        }
        exp[i] = e;
    }
}
示例#25
0
文件: elbg.c 项目: aroelhaqi/FFmpeg
static int get_high_utility_cell(elbg_data *elbg)
{
    int i=0;
    /* Using linear search, do binary if it ever turns to be speed critical */
    uint64_t r;

    if (elbg->utility_inc[elbg->numCB-1] < INT_MAX) {
        r = av_lfg_get(elbg->rand_state) % (unsigned int)elbg->utility_inc[elbg->numCB-1] + 1;
    } else {
        r = av_lfg_get(elbg->rand_state);
        r = (av_lfg_get(elbg->rand_state) + (r<<32)) % elbg->utility_inc[elbg->numCB-1] + 1;
    }

    while (elbg->utility_inc[i] < r) {
        i++;
    }

    av_assert2(elbg->cells[i]);

    return i;
}
示例#26
0
static av_always_inline void decode_line(FFV1Context *s, int w,
                                         int16_t *sample[2],
                                         int plane_index, int bits)
{
    PlaneContext *const p = &s->plane[plane_index];
    RangeCoder *const c   = &s->c;
    int x;
    int run_count = 0;
    int run_mode  = 0;
    int run_index = s->run_index;

    if (s->slice_coding_mode == 1) {
        int i;
        for (x = 0; x < w; x++) {
            int v = 0;
            for (i=0; i<bits; i++) {
                uint8_t state = 128;
                v += v + get_rac(c, &state);
            }
            sample[1][x] = v;
        }
        return;
    }

    for (x = 0; x < w; x++) {
        int diff, context, sign;

        context = get_context(p, sample[1] + x, sample[0] + x, sample[1] + x);
        if (context < 0) {
            context = -context;
            sign    = 1;
        } else
            sign = 0;

        av_assert2(context < p->context_count);

        if (s->ac) {
            diff = get_symbol_inline(c, p->state[context], 1);
        } else {
            if (context == 0 && run_mode == 0)
                run_mode = 1;

            if (run_mode) {
                if (run_count == 0 && run_mode == 1) {
                    if (get_bits1(&s->gb)) {
                        run_count = 1 << ff_log2_run[run_index];
                        if (x + run_count <= w)
                            run_index++;
                    } else {
                        if (ff_log2_run[run_index])
                            run_count = get_bits(&s->gb, ff_log2_run[run_index]);
                        else
                            run_count = 0;
                        if (run_index)
                            run_index--;
                        run_mode = 2;
                    }
                }
                run_count--;
                if (run_count < 0) {
                    run_mode  = 0;
                    run_count = 0;
                    diff      = get_vlc_symbol(&s->gb, &p->vlc_state[context],
                                               bits);
                    if (diff >= 0)
                        diff++;
                } else
                    diff = 0;
            } else
                diff = get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);

            ff_dlog(s->avctx, "count:%d index:%d, mode:%d, x:%d pos:%d\n",
                    run_count, run_index, run_mode, x, get_bits_count(&s->gb));
        }

        if (sign)
            diff = -diff;

        sample[1][x] = av_mod_uintp2(predict(sample[1] + x, sample[0] + x) + diff, bits);
    }
    s->run_index = run_index;
}
示例#27
0
static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
                        int total_gain)
{
    int v, bsize, ch, coef_nb_bits, parse_exponents;
    float mdct_norm;
    int nb_coefs[MAX_CHANNELS];
    static const int fixed_exp[25] = {
        20, 20, 20, 20, 20,
        20, 20, 20, 20, 20,
        20, 20, 20, 20, 20,
        20, 20, 20, 20, 20,
        20, 20, 20, 20, 20
    };

    // FIXME remove duplication relative to decoder
    if (s->use_variable_block_len) {
        av_assert0(0); // FIXME not implemented
    } else {
        /* fixed block len */
        s->next_block_len_bits = s->frame_len_bits;
        s->prev_block_len_bits = s->frame_len_bits;
        s->block_len_bits      = s->frame_len_bits;
    }

    s->block_len = 1 << s->block_len_bits;
//     av_assert0((s->block_pos + s->block_len) <= s->frame_len);
    bsize = s->frame_len_bits - s->block_len_bits;

    // FIXME factor
    v = s->coefs_end[bsize] - s->coefs_start;
    for (ch = 0; ch < s->avctx->channels; ch++)
        nb_coefs[ch] = v;
    {
        int n4 = s->block_len / 2;
        mdct_norm = 1.0 / (float) n4;
        if (s->version == 1)
            mdct_norm *= sqrt(n4);
    }

    if (s->avctx->channels == 2)
        put_bits(&s->pb, 1, !!s->ms_stereo);

    for (ch = 0; ch < s->avctx->channels; ch++) {
        // FIXME only set channel_coded when needed, instead of always
        s->channel_coded[ch] = 1;
        if (s->channel_coded[ch])
            init_exp(s, ch, fixed_exp);
    }

    for (ch = 0; ch < s->avctx->channels; ch++) {
        if (s->channel_coded[ch]) {
            WMACoef *coefs1;
            float *coefs, *exponents, mult;
            int i, n;

            coefs1    = s->coefs1[ch];
            exponents = s->exponents[ch];
            mult      = pow(10, total_gain * 0.05) / s->max_exponent[ch];
            mult     *= mdct_norm;
            coefs     = src_coefs[ch];
            if (s->use_noise_coding && 0) {
                av_assert0(0); // FIXME not implemented
            } else {
                coefs += s->coefs_start;
                n      = nb_coefs[ch];
                for (i = 0; i < n; i++) {
                    double t = *coefs++ / (exponents[i] * mult);
                    if (t < -32768 || t > 32767)
                        return -1;

                    coefs1[i] = lrint(t);
                }
            }
        }
    }

    v = 0;
    for (ch = 0; ch < s->avctx->channels; ch++) {
        int a = s->channel_coded[ch];
        put_bits(&s->pb, 1, a);
        v |= a;
    }

    if (!v)
        return 1;

    for (v = total_gain - 1; v >= 127; v -= 127)
        put_bits(&s->pb, 7, 127);
    put_bits(&s->pb, 7, v);

    coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);

    if (s->use_noise_coding) {
        for (ch = 0; ch < s->avctx->channels; ch++) {
            if (s->channel_coded[ch]) {
                int i, n;
                n = s->exponent_high_sizes[bsize];
                for (i = 0; i < n; i++) {
                    put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
                    if (0)
                        nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
                }
            }
        }
    }

    parse_exponents = 1;
    if (s->block_len_bits != s->frame_len_bits)
        put_bits(&s->pb, 1, parse_exponents);

    if (parse_exponents) {
        for (ch = 0; ch < s->avctx->channels; ch++) {
            if (s->channel_coded[ch]) {
                if (s->use_exp_vlc) {
                    encode_exp_vlc(s, ch, fixed_exp);
                } else {
                    av_assert0(0); // FIXME not implemented
//                    encode_exp_lsp(s, ch);
                }
            }
        }
    } else
        av_assert0(0); // FIXME not implemented

    for (ch = 0; ch < s->avctx->channels; ch++) {
        if (s->channel_coded[ch]) {
            int run, tindex;
            WMACoef *ptr, *eptr;
            tindex = (ch == 1 && s->ms_stereo);
            ptr    = &s->coefs1[ch][0];
            eptr   = ptr + nb_coefs[ch];

            run = 0;
            for (; ptr < eptr; ptr++) {
                if (*ptr) {
                    int level     = *ptr;
                    int abs_level = FFABS(level);
                    int code      = 0;
                    if (abs_level <= s->coef_vlcs[tindex]->max_level)
                        if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
                            code = run + s->int_table[tindex][abs_level - 1];

                    av_assert2(code < s->coef_vlcs[tindex]->n);
                    put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
                             s->coef_vlcs[tindex]->huffcodes[code]);

                    if (code == 0) {
                        if (1 << coef_nb_bits <= abs_level)
                            return -1;

                        put_bits(&s->pb, coef_nb_bits, abs_level);
                        put_bits(&s->pb, s->frame_len_bits, run);
                    }
                    // FIXME the sign is flipped somewhere
                    put_bits(&s->pb, 1, level < 0);
                    run = 0;
                } else
                    run++;
            }
            if (run)
                put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
                         s->coef_vlcs[tindex]->huffcodes[1]);
        }
        if (s->version == 1 && s->avctx->channels >= 2)
            avpriv_align_put_bits(&s->pb);
    }
    return 0;
}
示例#28
0
static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
                                     int16_t *block, int n, int qscale)
{
    x86_reg nCoeffs;
    const uint16_t *quant_matrix;

    av_assert2(s->block_last_index[n]>=0);

    if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
    else                 qscale <<= 1;

    if(s->alternate_scan) nCoeffs= 63; //FIXME
    else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];

        quant_matrix = s->inter_matrix;
__asm__ volatile(
                "pcmpeqw %%mm7, %%mm7           \n\t"
                "psrlq $48, %%mm7               \n\t"
                "movd %2, %%mm6                 \n\t"
                "packssdw %%mm6, %%mm6          \n\t"
                "packssdw %%mm6, %%mm6          \n\t"
                "mov %3, %%"REG_a"              \n\t"
                ".p2align 4                     \n\t"
                "1:                             \n\t"
                "movq (%0, %%"REG_a"), %%mm0    \n\t"
                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
                "movq (%1, %%"REG_a"), %%mm4    \n\t"
                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
                "pxor %%mm2, %%mm2              \n\t"
                "pxor %%mm3, %%mm3              \n\t"
                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
                "pxor %%mm2, %%mm0              \n\t"
                "pxor %%mm3, %%mm1              \n\t"
                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
                "paddw %%mm0, %%mm0             \n\t" // abs(block[i])*2
                "paddw %%mm1, %%mm1             \n\t" // abs(block[i])*2
                "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*2*q
                "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*2*q
                "paddw %%mm4, %%mm0             \n\t" // (abs(block[i])*2 + 1)*q
                "paddw %%mm5, %%mm1             \n\t" // (abs(block[i])*2 + 1)*q
                "pxor %%mm4, %%mm4              \n\t"
                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
                "psrlw $5, %%mm0                \n\t"
                "psrlw $5, %%mm1                \n\t"
                "pxor %%mm2, %%mm0              \n\t"
                "pxor %%mm3, %%mm1              \n\t"
                "psubw %%mm2, %%mm0             \n\t"
                "psubw %%mm3, %%mm1             \n\t"
                "pandn %%mm0, %%mm4             \n\t"
                "pandn %%mm1, %%mm5             \n\t"
                "pxor %%mm4, %%mm7              \n\t"
                "pxor %%mm5, %%mm7              \n\t"
                "movq %%mm4, (%0, %%"REG_a")    \n\t"
                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"

                "add $16, %%"REG_a"             \n\t"
                "jng 1b                         \n\t"
                "movd 124(%0, %3), %%mm0        \n\t"
                "movq %%mm7, %%mm6              \n\t"
                "psrlq $32, %%mm7               \n\t"
                "pxor %%mm6, %%mm7              \n\t"
                "movq %%mm7, %%mm6              \n\t"
                "psrlq $16, %%mm7               \n\t"
                "pxor %%mm6, %%mm7              \n\t"
                "pslld $31, %%mm7               \n\t"
                "psrlq $15, %%mm7               \n\t"
                "pxor %%mm7, %%mm0              \n\t"
                "movd %%mm0, 124(%0, %3)        \n\t"

                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)
                : "%"REG_a, "memory"
        );
}
示例#29
0
static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
                                     int16_t *block, int n, int qscale)
{
    x86_reg nCoeffs;
    const uint16_t *quant_matrix;
    int block0;

    av_assert2(s->block_last_index[n]>=0);

    if (s->q_scale_type) qscale = ff_mpeg2_non_linear_qscale[qscale];
    else                 qscale <<= 1;

    if(s->alternate_scan) nCoeffs= 63; //FIXME
    else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];

    if (n < 4)
        block0 = block[0] * s->y_dc_scale;
    else
        block0 = block[0] * s->c_dc_scale;
    quant_matrix = s->intra_matrix;
__asm__ volatile(
                "pcmpeqw %%mm7, %%mm7           \n\t"
                "psrlw $15, %%mm7               \n\t"
                "movd %2, %%mm6                 \n\t"
                "packssdw %%mm6, %%mm6          \n\t"
                "packssdw %%mm6, %%mm6          \n\t"
                "mov %3, %%"REG_a"              \n\t"
                ".p2align 4                     \n\t"
                "1:                             \n\t"
                "movq (%0, %%"REG_a"), %%mm0    \n\t"
                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
                "movq (%1, %%"REG_a"), %%mm4    \n\t"
                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
                "pxor %%mm2, %%mm2              \n\t"
                "pxor %%mm3, %%mm3              \n\t"
                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
                "pxor %%mm2, %%mm0              \n\t"
                "pxor %%mm3, %%mm1              \n\t"
                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
                "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*q
                "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
                "pxor %%mm4, %%mm4              \n\t"
                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
                "psraw $4, %%mm0                \n\t"
                "psraw $4, %%mm1                \n\t"
                "pxor %%mm2, %%mm0              \n\t"
                "pxor %%mm3, %%mm1              \n\t"
                "psubw %%mm2, %%mm0             \n\t"
                "psubw %%mm3, %%mm1             \n\t"
                "pandn %%mm0, %%mm4             \n\t"
                "pandn %%mm1, %%mm5             \n\t"
                "movq %%mm4, (%0, %%"REG_a")    \n\t"
                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"

                "add $16, %%"REG_a"             \n\t"
                "jng 1b                         \n\t"
                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
                : "%"REG_a, "memory"
        );
    block[0]= block0;
        //Note, we do not do mismatch control for intra as errors cannot accumulate
}
示例#30
0
static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
                                     int16_t *block, int n, int qscale)
{
    x86_reg nCoeffs;
    const uint16_t *quant_matrix;

    av_assert2(s->block_last_index[n]>=0);

    nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;

        quant_matrix = s->inter_matrix;
__asm__ volatile(
                "pcmpeqw %%mm7, %%mm7           \n\t"
                "psrlw $15, %%mm7               \n\t"
                "movd %2, %%mm6                 \n\t"
                "packssdw %%mm6, %%mm6          \n\t"
                "packssdw %%mm6, %%mm6          \n\t"
                "mov %3, %%"REG_a"              \n\t"
                ".p2align 4                     \n\t"
                "1:                             \n\t"
                "movq (%0, %%"REG_a"), %%mm0    \n\t"
                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
                "movq (%1, %%"REG_a"), %%mm4    \n\t"
                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
                "pxor %%mm2, %%mm2              \n\t"
                "pxor %%mm3, %%mm3              \n\t"
                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
                "pxor %%mm2, %%mm0              \n\t"
                "pxor %%mm3, %%mm1              \n\t"
                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
                "paddw %%mm0, %%mm0             \n\t" // abs(block[i])*2
                "paddw %%mm1, %%mm1             \n\t" // abs(block[i])*2
                "paddw %%mm7, %%mm0             \n\t" // abs(block[i])*2 + 1
                "paddw %%mm7, %%mm1             \n\t" // abs(block[i])*2 + 1
                "pmullw %%mm4, %%mm0            \n\t" // (abs(block[i])*2 + 1)*q
                "pmullw %%mm5, %%mm1            \n\t" // (abs(block[i])*2 + 1)*q
                "pxor %%mm4, %%mm4              \n\t"
                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
                "psraw $4, %%mm0                \n\t"
                "psraw $4, %%mm1                \n\t"
                "psubw %%mm7, %%mm0             \n\t"
                "psubw %%mm7, %%mm1             \n\t"
                "por %%mm7, %%mm0               \n\t"
                "por %%mm7, %%mm1               \n\t"
                "pxor %%mm2, %%mm0              \n\t"
                "pxor %%mm3, %%mm1              \n\t"
                "psubw %%mm2, %%mm0             \n\t"
                "psubw %%mm3, %%mm1             \n\t"
                "pandn %%mm0, %%mm4             \n\t"
                "pandn %%mm1, %%mm5             \n\t"
                "movq %%mm4, (%0, %%"REG_a")    \n\t"
                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"

                "add $16, %%"REG_a"             \n\t"
                "js 1b                          \n\t"
                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
                : "%"REG_a, "memory"
        );
}