void dct_16(Int32 vec[], Int flag)
{
    Int32 tmp0;
    Int32 tmp1;
    Int32 tmp2;
    Int32 tmp3;
    Int32 tmp4;
    Int32 tmp5;
    Int32 tmp6;
    Int32 tmp7;
    Int32 tmp_o0;
    Int32 tmp_o1;
    Int32 tmp_o2;
    Int32 tmp_o3;
    Int32 tmp_o4;
    Int32 tmp_o5;
    Int32 tmp_o6;
    Int32 tmp_o7;
    Int32 itmp_e0;
    Int32 itmp_e1;
    Int32 itmp_e2;

    /*  split input vector */


    tmp_o0 = fxp_mul32_by_16((vec[ 0] - vec[15]), Qfmt15(0.50241928618816F));
    tmp0   =  vec[ 0] + vec[15];

    tmp_o7 = fxp_mul32_Q31((vec[ 7] - vec[ 8]) << 3, Qfmt_31(0.63764357733614F));
    tmp7   =  vec[ 7] + vec[ 8];

    itmp_e0 = (tmp0 + tmp7);
    tmp7    = fxp_mul32_by_16((tmp0 - tmp7), Qfmt15(0.50979557910416F));

    tmp_o1 = fxp_mul32_by_16((vec[ 1] - vec[14]), Qfmt15(0.52249861493969F));
    tmp1   =  vec[ 1] + vec[14];
    tmp_o6 = fxp_mul32_by_16((vec[ 6] - vec[ 9]) << 1, Qfmt15(0.86122354911916F));
    tmp6   =  vec[ 6] + vec[ 9];

    itmp_e1 = (tmp1 + tmp6);
    tmp6    = fxp_mul32_by_16((tmp1 - tmp6), Qfmt15(0.60134488693505F));

    tmp_o2 = fxp_mul32_by_16((vec[ 2] - vec[13]), Qfmt15(0.56694403481636F));
    tmp2   =  vec[ 2] + vec[13];
    tmp_o5 = fxp_mul32_by_16((vec[ 5] - vec[10]) << 1, Qfmt15(0.53033884299517F));
    tmp5   =  vec[ 5] + vec[10];

    itmp_e2 = (tmp2 + tmp5);
    tmp5    = fxp_mul32_by_16((tmp2 - tmp5), Qfmt15(0.89997622313642F));

    tmp_o3 = fxp_mul32_by_16((vec[ 3] - vec[12]), Qfmt15(0.64682178335999F));
    tmp3   =  vec[ 3] + vec[12];
    tmp_o4 = fxp_mul32_by_16((vec[ 4] - vec[11]), Qfmt15(0.78815462345125F));
    tmp4   =  vec[ 4] + vec[11];

    tmp1   = (tmp3 + tmp4);
    tmp4   =  fxp_mul32_Q31((tmp3 - tmp4) << 2, Qfmt_31(0.64072886193538F));

    /*  split even part of tmp_e */

    tmp0 = (itmp_e0 + tmp1);
    tmp1 = fxp_mul32_by_16((itmp_e0 - tmp1), Qfmt15(0.54119610014620F));


    tmp3 = fxp_mul32_by_16((itmp_e1 - itmp_e2) << 1, Qfmt15(0.65328148243819F));
    tmp2 = (itmp_e1 + itmp_e2);

    vec[ 0]  = tmp0 >> 1;
    vec[ 0] += tmp2 >> 1;
    vec[ 8]  = fxp_mul32_by_16((tmp0 - tmp2), Qfmt15(0.70710678118655F));
    vec[12]  = fxp_mul32_by_16((tmp1 - tmp3) << 1, Qfmt15(0.70710678118655F));
    vec[ 4]  =  tmp1 + tmp3;
    vec[ 4] +=  vec[12];

    /*  split odd part of tmp_e */

    tmp1 = fxp_mul32_by_16((tmp7 - tmp4) << 1, Qfmt15(0.54119610014620F));
    tmp7 += tmp4;
    tmp3 = fxp_mul32_Q31((tmp6 - tmp5) << 2, Qfmt_31(0.65328148243819F));

    tmp6 += tmp5;

    vec[10]  = fxp_mul32_by_16((tmp7 - tmp6) << 1, Qfmt15(0.70710678118655F));
    vec[ 2]  =  tmp7 + tmp6;
    vec[14]  = fxp_mul32_by_16((tmp1 - tmp3) << 1, Qfmt15(0.70710678118655F));

    tmp1    +=  tmp3 + vec[14];
    vec[ 2] +=  tmp1;
    vec[ 6]  =  tmp1 + vec[10];

    vec[10] += vec[14];


    // dct8;

    tmp7 = tmp_o0 + tmp_o7;
    tmp_o7 = fxp_mul32_by_16((tmp_o0 - tmp_o7) << 1, Qfmt15(0.50979557910416F));

    tmp6 = tmp_o1 + tmp_o6;
    tmp_o1 = fxp_mul32_by_16((tmp_o1 - tmp_o6) << 1, Qfmt15(0.60134488693505F));

    tmp5 = tmp_o2 + tmp_o5;
    tmp_o5 = fxp_mul32_by_16((tmp_o2 - tmp_o5) << 1, Qfmt15(0.89997622313642F));

    tmp4 = tmp_o3 + tmp_o4;

    tmp_o3 = fxp_mul32_Q31((tmp_o3 - tmp_o4) << 3, Qfmt_31(0.6407288619354F));

    if (!flag)
    {
        tmp7   = -tmp7;
        tmp_o7 = -tmp_o7;
        tmp6   = -tmp6;
        tmp_o1 = -tmp_o1;
        tmp5   = -tmp5;
        tmp_o5 = -tmp_o5;
        tmp4   = -tmp4;
        tmp_o3 = -tmp_o3;
    }

    // even part

    tmp1 = fxp_mul32_by_16((tmp7 - tmp4) << 1, Qfmt15(0.54119610014620F));
    tmp0 =  tmp7 + tmp4;
    tmp3 = fxp_mul32_Q31((tmp6 - tmp5) << 2, Qfmt_31(0.65328148243819F));
    tmp2 =  tmp6 + tmp5;

    vec[ 9]  = fxp_mul32_Q31((tmp0 - tmp2) << 1, Qfmt_31(0.70710678118655F));
    vec[ 1]  =  tmp0 + tmp2;
    vec[13]  = fxp_mul32_Q31((tmp1 - tmp3) << 1, Qfmt_31(0.70710678118655F));

    vec[ 5]  =  tmp1 + tmp3 + vec[13];

    // odd part

    tmp0 =  tmp_o7 + tmp_o3;
    tmp1 = fxp_mul32_by_16((tmp_o7 - tmp_o3) << 1, Qfmt15(0.54119610014620F));
    tmp2 =  tmp_o1 + tmp_o5;
    tmp3 = fxp_mul32_Q31((tmp_o1 - tmp_o5) << 2, Qfmt_31(0.65328148243819F));

    vec[11]  = fxp_mul32_Q31((tmp0 - tmp2) << 1, Qfmt_31(0.70710678118655F));
    vec[ 3]  =  tmp0 + tmp2;
    vec[15]  = fxp_mul32_Q31((tmp1 - tmp3) << 1, Qfmt_31(0.70710678118655F));
    vec[ 7]  =  tmp1 + tmp3 + vec[15];


    vec[ 3] += vec[ 7];
    vec[ 7] += vec[11];
    vec[11] += vec[15];

    vec[ 1] += vec[ 3];
    vec[ 3] += vec[ 5];
    vec[ 5] += vec[ 7];
    vec[ 7] += vec[ 9];
    vec[ 9] += vec[11];
    vec[11] += vec[13];
    vec[13] += vec[15];


}
예제 #2
0
void idct_32(Int32 vec[], Int32 scratch_mem[])   /* scratch_mem size 32 */
{
    Int32 *temp_even = scratch_mem;

    Int32 i;
    const Int32 *pt_cos = CosTable_16;
    Int32 tmp1, tmp2;
    Int32 *pt_even = temp_even;
    Int32 *pt_odd  = vec;
    Int32 *pt_vec  = vec;
    Int32 *pt_vecN_1;
    Int32 tmp3;


    *(pt_even++) = *(pt_vec++);
    tmp1         = *(pt_vec++);
    tmp2 = 0;

    for (i = 7; i != 0; i--)
    {
        *(pt_odd++) = tmp2 + tmp1;
        *(pt_even++) = *(pt_vec++);
        tmp2         = *(pt_vec++);
        *(pt_even++) = *(pt_vec++);
        *(pt_odd++) = tmp2 + tmp1;
        tmp1         = *(pt_vec++);
    }

    *(pt_odd++) = tmp2 + tmp1;
    *(pt_even++) = *(pt_vec++);
    tmp2         = *(pt_vec++);
    *(pt_odd++) = tmp2 + tmp1;


    idct_16(temp_even, &scratch_mem[16]);
    idct_16(vec, &scratch_mem[24]);


    pt_cos = &CosTable_16[13];

    pt_vec  = &vec[15];

    pt_even = &temp_even[15];
    pt_vecN_1  = &vec[16];

    tmp1 = *(pt_even--);


    tmp3  = fxp_mul32_Q31(*(pt_vec) << 3, Qfmt3(0.63687550772175F)) << 2;
    tmp2 = *(pt_even--);
    *(pt_vecN_1++)  = tmp1 - tmp3;
    *(pt_vec--)     = tmp1 + tmp3;
    tmp3  = fxp_mul32_Q31(*(pt_vec) << 3, Qfmt3(0.85190210461718F));

    tmp1 = *(pt_even--);
    *(pt_vecN_1++)  = tmp2 - tmp3;
    *(pt_vec--)     = tmp2 + tmp3;

    for (i = 2; i != 0; i--)
    {
        tmp3  = fxp_mul32_Q29(*(pt_vec), *(pt_cos--));
        tmp2 = *(pt_even--);
        *(pt_vecN_1++)  = tmp1 - tmp3;
        *(pt_vec--)     = tmp1 + tmp3;
        tmp3  = fxp_mul32_Q29(*(pt_vec), *(pt_cos--));
        tmp1 = *(pt_even--);
        *(pt_vecN_1++)  = tmp2 - tmp3;
        *(pt_vec--)     = tmp2 + tmp3;
    }

    for (i = 5; i != 0; i--)
    {
        tmp3  = fxp_mul32_Q31(*(pt_vec) << 1, *(pt_cos--));
        tmp2 = *(pt_even--);
        *(pt_vecN_1++)  = tmp1 - tmp3;
        *(pt_vec--)     = tmp1 + tmp3;
        tmp3  = fxp_mul32_Q31(*(pt_vec) << 1, *(pt_cos--));
        tmp1 = *(pt_even--);
        *(pt_vecN_1++)  = tmp2 - tmp3;
        *(pt_vec--)     = tmp2 + tmp3;
    }
}
void dst_32(Int32 vec[], Int32 scratch_mem[])   /* scratch_mem size 32 */
{
    Int32 *temp_even = scratch_mem;

    Int32 i;
    const Int32 *pt_cos = &CosTable_16[13];
    Int32 tmp0 = vec[31] >> 1;
    Int32 tmp1, tmp2;
    Int32 *pt_even = temp_even;
    Int32 *pt_odd  = vec;
    Int32 *pt_vec  = vec;
    Int32 *pt_vecN_1  = vec;
    Int32 tmp3;


    tmp1 = 0;

    for (i = 5; i != 0; i--)
    {
        *(pt_even++) = *(pt_vec++);
        tmp2         = *(pt_vec++);
        *(pt_even++) = *(pt_vec++);
        tmp3         = *(pt_vec++);
        *(pt_even++) = *(pt_vec++);
        *(pt_odd++) = tmp2 + tmp1;
        tmp1         = *(pt_vec++);
        *(pt_odd++) = tmp3 + tmp2;
        *(pt_odd++) = tmp1 + tmp3;
    }

    *(pt_even) = *(pt_vec++);
    *(pt_odd)  = *(pt_vec) + tmp1;


    dst_16(temp_even, &scratch_mem[16]);
    dst_16(vec, &scratch_mem[24]);


    pt_vecN_1  = &vec[16];

    tmp1 = temp_even[15];

    tmp3  = fxp_mul32_Q31((vec[15] - tmp0) << 3, Qfmt31(0.63687550772175F)) << 2;
    tmp2  = temp_even[14];
    *(pt_vecN_1++)  = tmp3 - tmp1;
    vec[15]         = tmp3 + tmp1;
    tmp1  = temp_even[13];
    tmp3  = fxp_mul32_Q31((vec[14] + tmp0) << 3, Qfmt31(0.85190210461718F));
    *(pt_vecN_1++)  = tmp3 - tmp2;
    vec[14]         = tmp3 + tmp2;

    pt_even = &temp_even[12];
    pt_vec  = &vec[13];

    for (i = 2; i != 0; i--)
    {
        tmp3  = fxp_mul32_Q29((*(pt_vec) - tmp0), *(pt_cos--));
        tmp2 = *(pt_even--);
        *(pt_vec--)     = tmp3 + tmp1;
        *(pt_vecN_1++)  = tmp3 - tmp1;
        tmp3  = fxp_mul32_Q29((*(pt_vec) + tmp0), *(pt_cos--));
        tmp1 = *(pt_even--);
        *(pt_vec--)     = tmp3 + tmp2;
        *(pt_vecN_1++)  = tmp3 - tmp2;
    }

    for (i = 5; i != 0; i--)
    {
        tmp3  = fxp_mul32_Q31((*(pt_vec) - tmp0), *(pt_cos--)) << 1;
        tmp2 = *(pt_even--);
        *(pt_vec--)     = tmp3 + tmp1;
        *(pt_vecN_1++)  = tmp3 - tmp1;
        tmp3  = fxp_mul32_Q31((*(pt_vec) + tmp0), *(pt_cos--)) << 1;
        tmp1 = *(pt_even--);
        *(pt_vec--)     = tmp3 + tmp2;
        *(pt_vecN_1++)  = tmp3 - tmp2;
    }


}
void idct_16(Int32 vec[], Int32 scratch_mem[])    /* scratch_mem size 8 */
{
    Int32 *temp_even = scratch_mem;

    Int32 i;
    const Int32 *pt_cos = CosTable_8i;
    Int32 tmp1, tmp2;
    Int32 *pt_even = temp_even;
    Int32 *pt_odd  = vec;
    Int32 *pt_vec  = vec;

    Int32 tmp3;
    Int32 *pt_vecN_1;


    *(pt_even++) = *(pt_vec++);
    tmp1         = *(pt_vec++);
    *(pt_odd++) = tmp1;

    for (i = 2; i != 0; i--)
    {
        *(pt_even++) = *(pt_vec++);
        tmp2         = *(pt_vec++);
        *(pt_even++) = *(pt_vec++);
        tmp3         = *(pt_vec++);
        *(pt_odd++) = tmp2 + tmp1;
        *(pt_odd++) = tmp3 + tmp2;
        tmp1         = tmp3;
    }

    *(pt_even++) = *(pt_vec++);
    tmp2         = *(pt_vec++);
    *(pt_even++) = *(pt_vec++);
    tmp3         = *(pt_vec++);
    *(pt_odd++) = tmp2 + tmp1;
    *(pt_odd++) = tmp3 + tmp2;


    *(pt_even)   = *(pt_vec++);
    *(pt_odd++) = *(pt_vec) + tmp3;


    idct_8(temp_even);
    idct_8(vec);


    pt_cos = &CosTable_8i[7];

    pt_vec  = &vec[7];

    pt_even = &temp_even[7];
    pt_vecN_1  = &vec[8];

    tmp1 = *(pt_even--);

    for (i = 2; i != 0; i--)
    {
        tmp3  = fxp_mul32_Q28(*(pt_vec), *(pt_cos--));
        tmp2 = *(pt_even--);
        *(pt_vecN_1++)  = tmp1 - tmp3;
        *(pt_vec--)     = tmp1 + tmp3;
        tmp3  = fxp_mul32_Q28(*(pt_vec), *(pt_cos--));
        tmp1 = *(pt_even--);
        *(pt_vecN_1++)  = tmp2 - tmp3;
        *(pt_vec--)     = tmp2 + tmp3;
    }

    tmp3  = fxp_mul32_Q31(*(pt_vec), *(pt_cos--)) << 1;
    tmp2 = *(pt_even--);
    *(pt_vecN_1++)  = tmp1 - tmp3;
    *(pt_vec--)     = tmp1 + tmp3;
    tmp3  = fxp_mul32_Q31(*(pt_vec), *(pt_cos--)) << 1;
    tmp1 = *(pt_even--);
    *(pt_vecN_1++)  = tmp2 - tmp3;
    *(pt_vec--)     = tmp2 + tmp3;
    tmp3  = fxp_mul32_Q31(*(pt_vec), *(pt_cos--)) << 1;
    tmp2 = *(pt_even--);
    *(pt_vecN_1++)  = tmp1 - tmp3;
    *(pt_vec--)     = tmp1 + tmp3;
    tmp3  = fxp_mul32_Q31(*(pt_vec), *(pt_cos)) << 1;
    *(pt_vecN_1)  = tmp2 - tmp3;
    *(pt_vec)     = tmp2 + tmp3;

}
예제 #5
0
void synthesis_sub_band(Int32 Sr[], Int32 Si[], Int16 data[])
{


    Int32 i ;
    Int16 *pt_data_1;
    Int16 *pt_data_2;
    Int32 *pt_Sr_1;
    Int32 *pt_Sr_2;
    Int32 *pt_Si_1;
    Int32 *pt_Si_2;

    Int32 tmp1;
    Int32 tmp2;
    Int32 tmp3;
    Int32 tmp4;

    Int32 cosx;
    const Int32 *pt_CosTable = CosTable_64;


    pt_Sr_1 = &Sr[0];
    pt_Sr_2 = &Sr[63];

    pt_Si_1 = &Si[0];
    pt_Si_2 = &Si[63];


    tmp3 = *pt_Sr_1;

    for (i = 32; i != 0; i--)
    {
        tmp4 = *pt_Si_2;
        cosx = *(pt_CosTable++);
        *(pt_Sr_1++) = fxp_mul32_Q31(tmp3, cosx);
        tmp3 = *pt_Si_1;
        *(pt_Si_1++) = fxp_mul32_Q31(tmp4, cosx);
        tmp4 = *pt_Sr_2;
        cosx = *(pt_CosTable++);
        *(pt_Si_2--) = fxp_mul32_Q31(tmp3, cosx);
        *(pt_Sr_2--) = fxp_mul32_Q31(tmp4, cosx);
        tmp3 = *pt_Sr_1;
    }


    dct_64(Sr, (Int32 *)data);
    dct_64(Si, (Int32 *)data);


    pt_data_1 = &data[0];
    pt_data_2 = &data[127];

    pt_Sr_1 = &Sr[0];
    pt_Si_1 = &Si[0];

    tmp1 = *(pt_Sr_1++);
    tmp3 = *(pt_Sr_1++);
    tmp2 = *(pt_Si_1++);
    tmp4 = *(pt_Si_1++);

    for (i = 32; i != 0; i--)
    {
        *(pt_data_1++) = (Int16) fxp_mul32_Q31((tmp2 - tmp1), SCALE_DOWN_HQ);
        *(pt_data_1++) = (Int16) fxp_mul32_Q31(-(tmp3 + tmp4), SCALE_DOWN_HQ);
        *(pt_data_2--) = (Int16) fxp_mul32_Q31((tmp1 + tmp2), SCALE_DOWN_HQ);
        *(pt_data_2--) = (Int16) fxp_mul32_Q31((tmp3 - tmp4), SCALE_DOWN_HQ);

        tmp1 = *(pt_Sr_1++);
        tmp3 = *(pt_Sr_1++);
        tmp2 = *(pt_Si_1++);
        tmp4 = *(pt_Si_1++);
    }

}
예제 #6
0
void synthesis_sub_band_LC(Int32 Sr[], Int16 data[])
{

    Int32 *temp_o1 = (Int32 *) & data[0];

    Int   i;
    Int32 *pt_temp_e;
    Int32 *pt_temp_o = temp_o1;
    Int32 *pt_temp_x = &Sr[63];
    Int32 temp1;
    Int32 temp2;
    Int32 temp3;
    Int32 temp11;

    Int16 *pt_data_1;
    Int16 *pt_data_2;

    Int32 *pt_Sr_1 = Sr;
    Int16 tmp1;
    Int16 tmp2;
    Int16 tmp11;
    Int16 tmp22;
    const Int32 *pt_cosTerms = CosTable_48;


    temp2 = *(pt_temp_x--);
    for (i = 20; i != 0; i--)
    {
        temp1 = *(pt_Sr_1);
        temp3 = *(pt_cosTerms++);
        *(pt_Sr_1++) =   temp1  + temp2;
        *(pt_temp_o++) = fxp_mul32_Q31((temp1 - temp2), temp3) << 1;
        temp2 = *(pt_temp_x--);
    }

    for (i = 12; i != 0; i--)
    {
        temp1 = *(pt_Sr_1);
        temp3 = *(pt_cosTerms++);
        *(pt_Sr_1++) =   temp1  + temp2;
        *(pt_temp_o++) = fxp_mul32_Q26((temp1 - temp2), temp3);
        temp2 = *(pt_temp_x--);
    }


    pv_split_LC(temp_o1, &Sr[32]);

    dct_16(temp_o1, 1);     // Even terms
    dct_16(&Sr[32], 1);     // Odd  terms

    /* merge */


    pt_Sr_1 = &temp_o1[31];
    pt_temp_e   =  &temp_o1[15];
    pt_temp_o   =  &Sr[47];

    temp1 = *(pt_temp_o--);
    *(pt_Sr_1--) = temp1;
    for (i = 5; i != 0; i--)
    {
        temp2 = *(pt_temp_o--);
        *(pt_Sr_1--) = *(pt_temp_e--);
        *(pt_Sr_1--) = temp1 + temp2;
        temp3 = *(pt_temp_o--);
        *(pt_Sr_1--) = *(pt_temp_e--);
        *(pt_Sr_1--) = temp2 + temp3;
        temp1 = *(pt_temp_o--);
        *(pt_Sr_1--) = *(pt_temp_e--);
        *(pt_Sr_1--) = temp1 + temp3;
    }


    pv_split_LC(Sr, &Sr[32]);

    dct_16(Sr, 1);     // Even terms
    dct_16(&Sr[32], 1);     // Odd  terms


    pt_temp_x   =  &temp_o1[31];
    pt_temp_e   =  &Sr[15];
    pt_temp_o   =  &Sr[47];

    pt_data_1 = &data[95];

    temp2  = *(pt_temp_x--);
    temp11 = *(pt_temp_x--);
    temp1  = *(pt_temp_o--);

    *(pt_data_1--) = (Int16) fxp_mul32_Q31(temp2, SCALE_DOWN_LP);
    *(pt_data_1--) = (Int16) fxp_mul32_Q31(temp1, SCALE_DOWN_LP);

    for (i = 5; i != 0; i--)
    {
        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp11 + temp2), SCALE_DOWN_LP);
        temp3         = *(pt_temp_x--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31(*(pt_temp_e--), SCALE_DOWN_LP);
        temp2          = *(pt_temp_o--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp11 + temp3), SCALE_DOWN_LP);
        temp11         = *(pt_temp_x--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp1 + temp2), SCALE_DOWN_LP);


        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp11 + temp3), SCALE_DOWN_LP);
        temp1         = *(pt_temp_x--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31(*(pt_temp_e--), SCALE_DOWN_LP);
        temp3          = *(pt_temp_o--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp11 + temp1), SCALE_DOWN_LP);
        temp11         = *(pt_temp_x--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp2 + temp3), SCALE_DOWN_LP);


        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp11 + temp1), SCALE_DOWN_LP);
        temp2         = *(pt_temp_x--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31(*(pt_temp_e--), SCALE_DOWN_LP);
        temp1          = *(pt_temp_o--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp11 + temp2), SCALE_DOWN_LP);
        temp11         = *(pt_temp_x--);
        *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp1 + temp3), SCALE_DOWN_LP);
    }

    *(pt_data_1--) = (Int16) fxp_mul32_Q31((temp11 + temp2), SCALE_DOWN_LP);
    *(pt_data_1--) = (Int16) fxp_mul32_Q31(*(pt_temp_e), SCALE_DOWN_LP);

    /* ---- merge ends---- */


    pt_data_1 = &data[95];
    pt_data_2 = &data[96];

    *(pt_data_2++) =   0;
    tmp1  =  *(pt_data_1--);
    tmp2  =  *(pt_data_1--);
    tmp11 =  *(pt_data_1--);
    tmp22 =  *(pt_data_1--);

    for (i = 7; i != 0; i--)
    {
        *(pt_data_2++) = (-tmp1);
        *(pt_data_2++) = (-tmp2);
        *(pt_data_2++) = (-tmp11);
        *(pt_data_2++) = (-tmp22);
        tmp1  =  *(pt_data_1--);
        tmp2  =  *(pt_data_1--);
        tmp11 =  *(pt_data_1--);
        tmp22 =  *(pt_data_1--);
    }


    *(pt_data_2++) = (-tmp1);
    *(pt_data_2++) = (-tmp2);
    *(pt_data_2++) = (-tmp11);

    pt_data_2 = &data[0];

    *(pt_data_2++) =  tmp22;
    tmp1  =  *(pt_data_1--);
    tmp2  =  *(pt_data_1--);
    tmp11 =  *(pt_data_1--);
    tmp22 =  *(pt_data_1--);

    for (i = 7; i != 0; i--)
    {
        *(pt_data_2++) =  tmp1;
        *(pt_data_2++) =  tmp2;
        *(pt_data_2++) =  tmp11;
        *(pt_data_2++) =  tmp22;
        tmp1  =  *(pt_data_1--);
        tmp2  =  *(pt_data_1--);
        tmp11 =  *(pt_data_1--);
        tmp22 =  *(pt_data_1--);
    }

    *(pt_data_2++) =  tmp1;
    *(pt_data_2++) =  tmp2;
    *(pt_data_2++) =  tmp11;
    *(pt_data_2)   =  tmp22;

}
void eight_ch_filtering(const Int32 *pQmfReal,
                        const Int32 *pQmfImag,
                        Int32 *mHybridReal,
                        Int32 *mHybridImag,
                        Int32 scratch_mem[])

{

    Int32 real;
    Int32 imag;
    Int32 tmp1;
    Int32 tmp2;

    real  = fxp_mul32_Q29(Q29_fmt(-0.06989827306334f), pQmfReal[ 4]);

    real  = fxp_mac32_Q31(real, Qfmt31(0.01055120626280f), pQmfReal[12]);
    imag  = fxp_mul32_Q29(Q29_fmt(-0.06989827306334f), pQmfImag[ 4]);

    imag  = fxp_mac32_Q31(imag, Qfmt31(0.01055120626280f), pQmfImag[12]);

    mHybridReal[2] = (imag - real);
    mHybridImag[2] = -(imag + real);

    real  = fxp_mul32_Q29(Q29_fmt(-0.07266113929591f), pQmfReal[ 3]);

    real  = fxp_mac32_Q31(real, Qfmt31(0.04540841899650f), pQmfReal[11]);
    imag  = fxp_mul32_Q29(Q29_fmt(-0.07266113929591f), pQmfImag[ 3]);

    imag  = fxp_mac32_Q31(imag, Qfmt31(0.04540841899650f), pQmfImag[11]);

    tmp1           =  fxp_mul32_Q29(Q29_fmt(-0.38268343236509f), real);
    mHybridReal[3] =  fxp_mac32_Q29(Q29_fmt(0.92387953251129f), imag, tmp1);
    tmp2           =  fxp_mul32_Q29(Q29_fmt(-0.92387953251129f), real);
    mHybridImag[3] =  fxp_mac32_Q29(Q29_fmt(-0.38268343236509f), imag, tmp2);


    mHybridImag[4] = fxp_mul32_Q31(Qfmt31(0.09093731860946f), (pQmfReal[ 2] - pQmfReal[10]));
    mHybridReal[4] = fxp_mul32_Q31(Qfmt31(0.09093731860946f), (pQmfImag[10] - pQmfImag[ 2]));


    real  = fxp_mul32_Q29(Q29_fmt(-0.02270420949825f), pQmfReal[ 1]);

    real  = fxp_mac32_Q31(real, Qfmt31(0.14532227859182f), pQmfReal[ 9]);
    imag  = fxp_mul32_Q29(Q29_fmt(-0.02270420949825f), pQmfImag[ 1]);

    imag  = fxp_mac32_Q31(imag, Qfmt31(0.14532227859182f), pQmfImag[ 9]);

    tmp1           =  fxp_mul32_Q29(Q29_fmt(0.92387953251129f), imag);

    mHybridReal[5] =  fxp_mac32_Q31(tmp1, Qfmt31(0.76536686473018f), real);
    tmp2           =  fxp_mul32_Q29(Q29_fmt(-0.92387953251129f), real);

    mHybridImag[5] =  fxp_mac32_Q31(tmp2, Qfmt31(0.76536686473018f), imag);

    real  = fxp_mul32_Q29(Q29_fmt(-0.00527560313140f), pQmfReal[ 0]);

    real  = fxp_mac32_Q31(real, Qfmt31(0.13979654612668f), pQmfReal[ 8]);
    imag  = fxp_mul32_Q29(Q29_fmt(-0.00527560313140f), pQmfImag[ 0]);

    imag  = fxp_mac32_Q31(imag, Qfmt31(0.13979654612668f), pQmfImag[ 8]);

    mHybridReal[6] = (imag + real);
    mHybridImag[6] = (imag - real);

    tmp1            =  fxp_mul32_Q31(Qfmt31(0.21791935610828f), pQmfReal[ 7]);
    mHybridReal[7]  =  fxp_mac32_Q31(tmp1, Qfmt31(0.09026515280366f), pQmfImag[ 7]);

    tmp2            =  fxp_mul32_Q29(Q29_fmt(-0.04513257640183f), pQmfReal[ 7]);

    mHybridImag[7]  =  fxp_mac32_Q31(tmp2, Qfmt31(0.21791935610828f), pQmfImag[ 7]);

    mHybridReal[0] = pQmfReal[HYBRID_FILTER_DELAY] >> 3;
    mHybridImag[0] = pQmfImag[HYBRID_FILTER_DELAY] >> 3;

    tmp1           =  fxp_mul32_Q29(Q29_fmt(-0.04513257640183f), pQmfImag[ 5]);

    mHybridReal[1] =  fxp_mac32_Q31(tmp1, Qfmt31(0.21791935610828f), pQmfReal[ 5]);


    tmp2            =  fxp_mul32_Q31(Qfmt31(0.21791935610828f), pQmfImag[ 5]);
    mHybridImag[1]  =  fxp_mac32_Q31(tmp2, Qfmt31(0.09026515280366f), pQmfReal[ 5]);

    /*
     *  8*ifft
     */

    ps_fft_rx8(mHybridReal, mHybridImag, scratch_mem);

}
void two_ch_filtering(const Int32 *pQmf_r,
                      const Int32 *pQmf_i,
                      Int32 *mHybrid_r,
                      Int32 *mHybrid_i)
{

    Int32 cum0;
    Int32 cum1;
    Int32 cum2;
    Int32 tmp1;
    Int32 tmp2;

#ifndef ANDROID_DEFAULT_CODE
    tmp1 = qadd(pQmf_r[ 1], pQmf_r[11]);
    tmp2 = qadd(pQmf_i[ 1], pQmf_i[11]);
#else
    tmp1 = pQmf_r[ 1] + pQmf_r[11];
    tmp2 = pQmf_i[ 1] + pQmf_i[11];
#endif

    cum1 =   fxp_mul32_Q31(Qfmt31(0.03798975052098f), tmp1);
    cum2 =   fxp_mul32_Q31(Qfmt31(0.03798975052098f), tmp2);

#ifndef ANDROID_DEFAULT_CODE
    tmp1 = qadd(pQmf_r[ 3], pQmf_r[9]);
    tmp2 = qadd(pQmf_i[ 3], pQmf_i[9]);
#else    
    tmp1 = pQmf_r[ 3] + pQmf_r[ 9];
    tmp2 = pQmf_i[ 3] + pQmf_i[ 9];
#endif
    
    cum1 =   fxp_msu32_Q31(cum1, Qfmt31(0.14586278335076f), tmp1);
    cum2 =   fxp_msu32_Q31(cum2, Qfmt31(0.14586278335076f), tmp2);

#ifndef ANDROID_DEFAULT_CODE
    tmp1 = qadd(pQmf_r[ 5], pQmf_r[7]);
    tmp2 = qadd(pQmf_i[ 5], pQmf_i[7]);
#else    
    tmp1 = pQmf_r[ 5] + pQmf_r[ 7];
    tmp2 = pQmf_i[ 5] + pQmf_i[ 7];
#endif
    
    cum1 =   fxp_mac32_Q31(cum1, Qfmt31(0.61193261090336f), tmp1);
    cum2 =   fxp_mac32_Q31(cum2, Qfmt31(0.61193261090336f), tmp2);

    cum0 = pQmf_r[HYBRID_FILTER_DELAY] >> 1;  /* HYBRID_FILTER_DELAY == 6 */

#ifndef ANDROID_DEFAULT_CODE
    mHybrid_r[0] = qadd(cum0, cum1);
    mHybrid_r[1] = qsub(cum0, cum1);
#else
    mHybrid_r[0] = (cum0 + cum1);
    mHybrid_r[1] = (cum0 - cum1);
#endif

    cum0 = pQmf_i[HYBRID_FILTER_DELAY] >> 1;  /* HYBRID_FILTER_DELAY == 6 */

#ifndef ANDROID_DEFAULT_CODE
    mHybrid_i[0] = qadd(cum0, cum2);
    mHybrid_i[1] = qsub(cum0, cum2);
#else
    mHybrid_i[0] = (cum0 + cum2);
    mHybrid_i[1] = (cum0 - cum2);
#endif    

}