예제 #1
0
void BlockMatcher::RefineMatchSubp(const int xpos, const int ypos,
                                   const MVector& mv_prediction,
                                   const float lambda)
{

    BlockDiffParams dparams;
    dparams.SetBlockLimits( m_bparams , m_pic_data , xpos , ypos);

    m_cost_array[ypos][xpos].mvcost = GetVarUp( mv_prediction, 
                                                m_mv_array[ypos][xpos]<<m_precision );
    m_cost_array[ypos][xpos].SetTotal( lambda );

    // Initialise to the best pixel value
    MvCostData best_costs( m_cost_array[ypos][xpos] );
    MVector pel_mv( m_mv_array[ypos][xpos] );
    MVector best_mv( pel_mv );

    // If the integer value is good enough, bail out
    if ( best_costs.SAD < 2*dparams.Xl()*dparams.Yl() )
    {
        m_mv_array[ypos][xpos] = m_mv_array[ypos][xpos]<<m_precision;
        return;
    }

    // Next, test the predictor. If that's good enough, bail out
    MvCostData pred_costs;
    pred_costs.mvcost = 0;
    pred_costs.SAD = m_subpeldiff[m_precision-1]->Diff( dparams, mv_prediction);
    pred_costs.total = pred_costs.SAD;
    
    if (pred_costs.SAD<2*dparams.Xl()*dparams.Yl() )
    {
        m_mv_array[ypos][xpos] = mv_prediction;
        m_cost_array[ypos][xpos] = pred_costs;
        return;   
    }

    // Now, let's see if we can do better than this 
 
    MvCostData cand_costs;
    MVector cand_mv, old_best_mv;

    for (int i=1; i<=m_precision; ++i )
    {
        best_mv = best_mv<<1;
        MVector temp_best_mv = best_mv;

        // Do a neighbourhood of best_mv

        // Stage 1 - look at the 4 nearest points
        cand_mv.x = best_mv.x - 1;
        cand_mv.y = best_mv.y;
        m_subpeldiff[i-1]->Diff( dparams, cand_mv ,
                                 GetVarUp( mv_prediction, 
                                           cand_mv<<(m_precision-i) ) ,
                                 lambda , best_costs ,
                                 temp_best_mv);
        cand_mv.x = best_mv.x + 1;
        cand_mv.y = best_mv.y;
        m_subpeldiff[i-1]->Diff( dparams, cand_mv ,
                                 GetVarUp( mv_prediction, 
                                           cand_mv<<(m_precision-i) ) ,
                                 lambda , best_costs ,
                                 temp_best_mv);
        cand_mv.x = best_mv.x;
        cand_mv.y = best_mv.y - 1;
        m_subpeldiff[i-1]->Diff( dparams, cand_mv ,
                                 GetVarUp( mv_prediction, 
                                           cand_mv<<(m_precision-i) ) ,
                                 lambda , best_costs ,
                                 temp_best_mv);
        cand_mv.x = best_mv.x;
        cand_mv.y = best_mv.y + 1;
        m_subpeldiff[i-1]->Diff( dparams, cand_mv ,
                                 GetVarUp( mv_prediction, 
                                           cand_mv<<(m_precision-i) ) ,
                                 lambda , best_costs ,
                                 temp_best_mv);

        // Stage 2. If we've done better than the original value, 
        // look at the other two neighbours 
        if ( temp_best_mv.x != best_mv.x )
        {
            MVector new_best_mv = temp_best_mv;
            cand_mv.x = new_best_mv.x;
            cand_mv.y = new_best_mv.y - 1;
            m_subpeldiff[i-1]->Diff( dparams, cand_mv ,
                                     GetVarUp( mv_prediction, 
                                               cand_mv<<(m_precision-i) ) ,
                                     lambda , best_costs ,
                                     temp_best_mv);

            cand_mv.x = new_best_mv.x;
            cand_mv.y = new_best_mv.y + 1;
            m_subpeldiff[i-1]->Diff( dparams, cand_mv ,
                                     GetVarUp( mv_prediction, 
                                               cand_mv<<(m_precision-i) ) ,
                                     lambda , best_costs ,
                                     temp_best_mv);
        }
        else if ( temp_best_mv.y != best_mv.y )
        {
            MVector new_best_mv = temp_best_mv;
            cand_mv.x = new_best_mv.x - 1;
            cand_mv.y = new_best_mv.y;
            m_subpeldiff[i-1]->Diff( dparams, cand_mv ,
                                     GetVarUp( mv_prediction, 
                                               cand_mv<<(m_precision-i) ) ,
                                     lambda , best_costs ,
                                     temp_best_mv);

            cand_mv.x = new_best_mv.x + 1;
            cand_mv.y = new_best_mv.y;
            m_subpeldiff[i-1]->Diff( dparams, cand_mv ,
                                     GetVarUp( mv_prediction, 
                                               cand_mv<<(m_precision-i) ) ,
                                     lambda , best_costs ,
                                     temp_best_mv);
        } 

        best_mv = temp_best_mv;

        // Bail out if we can't do better than 10% worse than the predictor at
        // each stage
        if ( best_costs.total>1.1*pred_costs.total )
        {
            m_mv_array[ypos][xpos] = mv_prediction;
            m_cost_array[ypos][xpos] = pred_costs;
            return;   
        }

    }//i


    // Write the results in the arrays //
    /////////////////////////////////////

     m_mv_array[ypos][xpos] = best_mv;
     m_cost_array[ypos][xpos] = best_costs;   

}
예제 #2
0
    CalcValueType simple_intra_block_diff_mmx_4 ( 
            const BlockDiffParams& dparams, 
            const PicArray& pic_data, ValueType &dc_val)
    {
        __m64 tmp = _mm_set_pi16(0, 0, 0, 0);
        u_mmx_val u_sum;
        u_sum.i[0] = u_sum.i[1] = 0;

        ValueType *src = &(pic_data[dparams.Yp()][dparams.Xp()]);

        int height = dparams.Yl();
        int width = dparams.Xl();
        int stopX = (width>>2)<<2;
        int pic_next = (pic_data.LengthX() - width);
        CalcValueType mop_sum = 0;
        for (int j = 0; j < height; j++)
        {
            for (int i = 0; i < stopX; i+=4) 
            {
                __m64 pic = *(__m64 *)src;
                // sum += (pic)
                tmp = _mm_xor_si64(tmp, tmp);
                tmp = _mm_unpackhi_pi16(pic, tmp);
                tmp = _mm_slli_pi32 (tmp, 16);
                tmp = _mm_srai_pi32 (tmp, 16);
                pic = _mm_unpacklo_pi16(pic, pic);
                pic = _mm_srai_pi32 (pic, 16);
                pic = _mm_add_pi32 (pic, tmp);
                u_sum.m = _mm_add_pi32 (u_sum.m, pic);
                src += 4;
            }
            // Mop up
            for (int i = stopX; i < width; ++i)
            {
                mop_sum += *src;
                src++;
            }
            src += pic_next;
        }

        CalcValueType int_dc =  (u_sum.i[0] + u_sum.i[1] + mop_sum)/(width*height);

        dc_val = static_cast<ValueType>( int_dc );

        // Now compute the resulting SAD
        __m64 dc = _mm_set_pi16 ( dc_val, dc_val , dc_val , dc_val);
        u_sum.m = _mm_xor_si64(u_sum.m, u_sum.m); // initialise sum to 0
        mop_sum = 0;
        
        src = &(pic_data[dparams.Yp()][dparams.Xp()]);
        for (int j = 0; j < height; ++j)
        {
            for (int i = 0; i < stopX; i+=4)
            {
                __m64 pic = *(__m64 *)src;
                // pic - dc
                pic = _mm_sub_pi16 (pic, dc);
                // abs (pic - dc)
                tmp = _mm_srai_pi16(pic, 15);
                pic = _mm_xor_si64(pic, tmp);
                pic = _mm_sub_pi16 (pic, tmp);
                // sum += abs(pic -dc)
                tmp = _mm_xor_si64(tmp, tmp);
                tmp = _mm_unpackhi_pi16(pic, tmp);
                pic = _mm_unpacklo_pi16(pic, pic);
                pic = _mm_srai_pi32 (pic, 16);
                pic = _mm_add_pi32 (pic, tmp);
                u_sum.m = _mm_add_pi32 (u_sum.m, pic);
                src += 4;
            }
            // Mop up
            for (int i = stopX; i < width; ++i)
            {
                mop_sum += std::abs(*src - dc_val);
                src++;
            }
            src += pic_next;
        }
        CalcValueType intra_cost = u_sum.i[0] + u_sum.i[1] + mop_sum;
        _mm_empty();

        return intra_cost;

    }