rfx_dwt_2d_encode_block_sse2(INT16* buffer, INT16* dwt, int subband_width) { INT16 *hl, *lh, *hh, *ll; INT16 *l_src, *h_src; _mm_prefetch_buffer((char*) dwt, subband_width * 4 * sizeof(INT16)); /* DWT in vertical direction, results in 2 sub-bands in L, H order in tmp buffer dwt. */ l_src = dwt; h_src = dwt + subband_width * subband_width * 2; rfx_dwt_2d_encode_block_vert_sse2(buffer, l_src, h_src, subband_width); /* DWT in horizontal direction, results in 4 sub-bands in HL(0), LH(1), HH(2), LL(3) order, stored in original buffer. */ /* The lower part L generates LL(3) and HL(0). */ /* The higher part H generates LH(1) and HH(2). */ ll = buffer + subband_width * subband_width * 3; hl = buffer; lh = buffer + subband_width * subband_width; hh = buffer + subband_width * subband_width * 2; rfx_dwt_2d_encode_block_horiz_sse2(l_src, ll, hl, subband_width); rfx_dwt_2d_encode_block_horiz_sse2(h_src, lh, hh, subband_width); }
rfx_dwt_2d_decode_block_sse2(INT16* buffer, INT16* idwt, int subband_width) { INT16 *hl, *lh, *hh, *ll; INT16 *l_dst, *h_dst; _mm_prefetch_buffer((char*) idwt, subband_width * 4 * sizeof(INT16)); /* Inverse DWT in horizontal direction, results in 2 sub-bands in L, H order in tmp buffer idwt. */ /* The 4 sub-bands are stored in HL(0), LH(1), HH(2), LL(3) order. */ /* The lower part L uses LL(3) and HL(0). */ /* The higher part H uses LH(1) and HH(2). */ ll = buffer + subband_width * subband_width * 3; hl = buffer; l_dst = idwt; rfx_dwt_2d_decode_block_horiz_sse2(ll, hl, l_dst, subband_width); lh = buffer + subband_width * subband_width; hh = buffer + subband_width * subband_width * 2; h_dst = idwt + subband_width * subband_width * 2; rfx_dwt_2d_decode_block_horiz_sse2(lh, hh, h_dst, subband_width); /* Inverse DWT in vertical direction, results are stored in original buffer. */ rfx_dwt_2d_decode_block_vert_sse2(l_dst, h_dst, buffer, subband_width); }
static void rfx_dwt_2d_encode_sse2(INT16* buffer, INT16* dwt_buffer) { _mm_prefetch_buffer((char*) buffer, 4096 * sizeof(INT16)); rfx_dwt_2d_encode_block_sse2(buffer, dwt_buffer, 32); rfx_dwt_2d_encode_block_sse2(buffer + 3072, dwt_buffer, 16); rfx_dwt_2d_encode_block_sse2(buffer + 3840, dwt_buffer, 8); }
static void rfx_dwt_2d_decode_sse2(INT16* buffer, INT16* dwt_buffer) { _mm_prefetch_buffer((char*) buffer, 4096 * sizeof(INT16)); rfx_dwt_2d_decode_block_sse2(&buffer[3840], dwt_buffer, 8); rfx_dwt_2d_decode_block_sse2(&buffer[3072], dwt_buffer, 16); rfx_dwt_2d_decode_block_sse2(&buffer[0], dwt_buffer, 32); }
static void rfx_quantization_decode_sse2(INT16* buffer, const UINT32* quantVals) { _mm_prefetch_buffer((char*) buffer, 4096 * sizeof(INT16)); rfx_quantization_decode_block_sse2(&buffer[0], 1024, quantVals[8] - 1); /* HL1 */ rfx_quantization_decode_block_sse2(&buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ rfx_quantization_decode_block_sse2(&buffer[2048], 1024, quantVals[9] - 1); /* HH1 */ rfx_quantization_decode_block_sse2(&buffer[3072], 256, quantVals[5] - 1); /* HL2 */ rfx_quantization_decode_block_sse2(&buffer[3328], 256, quantVals[4] - 1); /* LH2 */ rfx_quantization_decode_block_sse2(&buffer[3584], 256, quantVals[6] - 1); /* HH2 */ rfx_quantization_decode_block_sse2(&buffer[3840], 64, quantVals[2] - 1); /* HL3 */ rfx_quantization_decode_block_sse2(&buffer[3904], 64, quantVals[1] - 1); /* LH3 */ rfx_quantization_decode_block_sse2(&buffer[3968], 64, quantVals[3] - 1); /* HH3 */ rfx_quantization_decode_block_sse2(&buffer[4032], 64, quantVals[0] - 1); /* LL3 */ }
static void rfx_quantization_decode_sse2(INT16* buffer, const UINT32* quantization_values) { _mm_prefetch_buffer((char*) buffer, 4096 * sizeof(INT16)); rfx_quantization_decode_block_sse2(buffer, 4096, 5); rfx_quantization_decode_block_sse2(buffer, 1024, quantization_values[8] - 6); /* HL1 */ rfx_quantization_decode_block_sse2(buffer + 1024, 1024, quantization_values[7] - 6); /* LH1 */ rfx_quantization_decode_block_sse2(buffer + 2048, 1024, quantization_values[9] - 6); /* HH1 */ rfx_quantization_decode_block_sse2(buffer + 3072, 256, quantization_values[5] - 6); /* HL2 */ rfx_quantization_decode_block_sse2(buffer + 3328, 256, quantization_values[4] - 6); /* LH2 */ rfx_quantization_decode_block_sse2(buffer + 3584, 256, quantization_values[6] - 6); /* HH2 */ rfx_quantization_decode_block_sse2(buffer + 3840, 64, quantization_values[2] - 6); /* HL3 */ rfx_quantization_decode_block_sse2(buffer + 3904, 64, quantization_values[1] - 6); /* LH3 */ rfx_quantization_decode_block_sse2(buffer + 3968, 64, quantization_values[3] - 6); /* HH3 */ rfx_quantization_decode_block_sse2(buffer + 4032, 64, quantization_values[0] - 6); /* LL3 */ }