InplaceTransformFinal get_invhtransformfinal_sse4_2(int wavelet_index, int active_bits, int sample_size) { if (sample_size == 4) { if (active_bits == 10) { switch(wavelet_index) { case VC2DECODER_WFT_LEGALL_5_3: return LeGall_5_3_invtransform_H_final_1_10_sse4_2<int32_t>; case VC2DECODER_WFT_HAAR_NO_SHIFT: return Haar_invtransform_H_final_1_10_sse4_2_int32_t<0>; case VC2DECODER_WFT_HAAR_SINGLE_SHIFT: return Haar_invtransform_H_final_1_10_sse4_2_int32_t<1>; default: break; } } } else if (sample_size == 2) { if (active_bits == 10) { switch(wavelet_index) { case VC2DECODER_WFT_LEGALL_5_3: return LeGall_5_3_invtransform_H_final_1_10_sse4_2<int16_t>; case VC2DECODER_WFT_HAAR_NO_SHIFT: return Haar_invtransform_H_final_1_10_sse4_2_int16_t<0>; case VC2DECODER_WFT_HAAR_SINGLE_SHIFT: return Haar_invtransform_H_final_1_10_sse4_2_int16_t<1>; default: break; } } } return get_invhtransformfinal_c(wavelet_index, active_bits, sample_size); }
int perform_invhtransformfinaltest(invhtransformfinaltest_data &data, void *idata_pre, const int width, const int height, const int stride, bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) { int r = 0; (void)HAS_AVX;(void)HAS_AVX2; void *idata = ALIGNED_ALLOC(32, height*stride*data.sample_size); memcpy(idata, idata_pre, height*stride*data.sample_size); struct offsets_t offsets[] = { { 0, 0, 0, 0 }, { 32, 0, 0, 0 }, { 0, 32, 0, 0 }, { 32, 32, 0, 0 }, { 0, 0, 32, 0 }, { 0, 0, 0, 32 }, { 0, 0, 32, 32 }, { 32, 0, 32, 0 }, { 32, 0, 0, 32 }, { 32, 0, 32, 32 }, { 0, 32, 32, 0 }, { 0, 32, 0, 32 }, { 0, 0, 32, 32 }, { 32, 32, 32, 0 }, { 32, 32, 0, 32 }, { 32, 32, 32, 32 }, }; for (int i = 0; r==0 && i < (int)(sizeof(offsets)/sizeof(struct offsets_t)); i++) { char *cdata = (char *)malloc(height*stride*sizeof(uint16_t)); memset(cdata, 0, height*stride*sizeof(uint16_t)); printf("%-20s: H 0/* (%2d,%2d,%2d,%2d) (active %d-bit) ", VC2DecoderWaveletFilterTypeString[data.wavelet], offsets[i].left, offsets[i].right, offsets[i].top, offsets[i].bottom, data.active_bits); if (data.sample_size == 2) printf("16-bit "); else printf("32-bit "); /* Use C version to generate comparison value */ printf("C [ "); InplaceTransformFinal ctrans = NULL; try { ctrans = get_invhtransformfinal_c(data.wavelet, data.active_bits, data.sample_size); } catch(...) { printf(" NONE ]"); r = 1; free(cdata); break; } ctrans(idata, stride, cdata + (offsets[i].top*stride + offsets[i].left)*2, stride, width, height, offsets[i].left, offsets[i].top, width - offsets[i].left - offsets[i].right, height - offsets[i].top - offsets[i].bottom); if (memcmp(idata, idata_pre, height*stride*data.sample_size) != 0) { printf(" BAD ]\n"); printf(" c function overwrites input data!\n"); r = 1; free(cdata); printf("\n"); break; } for (int y = 0; y < offsets[i].top; y++) { for (int x = 0; x < stride; x++) { if (((uint16_t *)cdata)[y*stride + x] != 0) { printf(" BAD ]\n"); printf(" c function writes outside of specified memory area!\n"); r = 1; free(cdata); printf("\n"); break; } } } for (int y = offsets[i].top; y < height - offsets[i].bottom; y++) { for (int x = 0; x < offsets[i].left; x++) { if (((uint16_t *)cdata)[y*stride + x] != 0) { printf(" BAD ]\n"); printf(" c function writes outside of specified memory area!\n"); r = 1; free(cdata); printf("\n"); break; } } for (int x = offsets[i].left; x < width - offsets[i].right; x++) { if (((uint16_t *)cdata)[y*stride + x] >= (1 << data.active_bits)) { printf(" BAD ]\n"); printf(" c function does not clip values to correct number of bits!\n"); r = 1; free(cdata); printf("\n"); break; } } for (int x = width - offsets[i].right; x < stride; x++) { if (((uint16_t *)cdata)[y*stride + x] != 0) { printf(" BAD ]\n"); printf(" c function writes outside of specified memory area!\n"); r = 1; free(cdata); printf("\n"); break; } } } for (int y = height - offsets[i].bottom; y < height; y++) { for (int x = 0; x < stride; x++) { if (((uint16_t *)cdata)[y*stride + x] != 0) { printf(" BAD ]\n"); printf(" c function writes outside of specified memory area!\n"); r = 1; free(cdata); printf("\n"); break; } } } printf(" GOOD ] "); /* Test SSE4_2 version */ if (HAS_SSE4_2 && data.SSE4_2) { printf("SSE4.2 ["); InplaceTransformFinal trans = get_invhtransformfinal_sse4_2(data.wavelet, data.active_bits, data.sample_size); if (trans == ctrans) { printf("NONE]"); } else { char *tdata = (char *)malloc(height*stride*sizeof(uint16_t)); memset(tdata, 0, height*stride*sizeof(uint16_t)); trans(idata, stride, tdata + (offsets[i].top*stride + offsets[i].left)*2, stride, width, height, offsets[i].left, offsets[i].top, width - offsets[i].left - offsets[i].right, height - offsets[i].top - offsets[i].bottom); if (memcmp(cdata, tdata, height*stride*sizeof(uint16_t))) { printf("FAIL]\n"); for (int i = 0; i < (int)(height*stride*sizeof(uint16_t)); i++) { if (cdata[i] != tdata[i]) { printf("\nFirst difference at byte %d, 0x%02x =/= 0x%02x\n\n", i, ((uint8_t *)cdata)[i], ((uint8_t *)tdata)[i]); break; } } r = 1; } else { printf(" OK ] "); } free(tdata); } } free(cdata); printf("\n"); } ALIGNED_FREE(idata); return r; }