InplaceTransform get_invvtransform_sse4_2(int wavelet_index, int level, int depth, int sample_size) {
  if (sample_size == 4) {
    switch(wavelet_index) {
    case VC2DECODER_WFT_LEGALL_5_3:
      if (depth - level - 1 == 1)
        return LeGall_5_3_invtransform_V_inplace_sse4_2_int32_t<2>;
      else if (depth - level - 1 == 0)
        return LeGall_5_3_invtransform_V_inplace_sse4_2_int32_t<1>;
      break;
    case VC2DECODER_WFT_HAAR_NO_SHIFT:
    case VC2DECODER_WFT_HAAR_SINGLE_SHIFT:
      if (depth - level - 1 == 0) {
        return Haar_invtransform_V_inplace_sse4_2<1>;
      }
      break;
    default:
      break;
    }
  } else if (sample_size == 2) {
    switch(wavelet_index) {
    case VC2DECODER_WFT_LEGALL_5_3:
      if (depth - level - 1 == 1)
        return LeGall_5_3_invtransform_V_inplace_sse4_2_int16_t<2>;
      else if (depth - level - 1 == 0)
        return LeGall_5_3_invtransform_V_inplace_sse4_2_int16_t<1>;
      break;
    default:
      break;
    }
  }

  return get_invvtransform_c(wavelet_index, level, depth, sample_size);
}
int perform_invvtransformtest(invvtransformtest_data &data,
                              void *idata_pre,
                              const int width,
                              const int height,
                              const int stride,
                              bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) {
  int r = 0;
  (void)HAS_AVX;(void)HAS_AVX2;

  printf("%-20s: V %d/%d  ", VC2DecoderWaveletFilterTypeString[data.wavelet], data.level, data.depth);
  if (data.sample_size == 2)
    printf("16-bit ");
  else
    printf("32-bit ");

  /* Use C version to generate comparison value */
  printf("C [ ");
  void *cdata = ALIGNED_ALLOC(32, height*stride*data.sample_size);
  memcpy(cdata, idata_pre, height*stride*data.sample_size);
  InplaceTransform ctrans = NULL;
  try {
    ctrans = get_invvtransform_c(data.wavelet, data.level, data.depth, data.sample_size);
  } catch(...) {
    printf(" NONE ]");
    r = 1;
    goto out;
  }
  printf("EXISTS ] ");
  ctrans(cdata, stride, width, height);

  /* Test SSE4_2 version */
  if (HAS_SSE4_2 && data.SSE4_2) {
    printf("SSE4.2 [");
    InplaceTransform trans = get_invvtransform_sse4_2(data.wavelet, data.level, data.depth, data.sample_size);
    if (trans == ctrans) {
      printf("NONE]");
    } else {
      void *tdata = ALIGNED_ALLOC(32, height*stride*data.sample_size);
      memcpy(tdata, idata_pre, height*stride*data.sample_size);
      trans(tdata, stride, width, height);
      if (memcmp(cdata, tdata, height*stride*data.sample_size)) {
        printf("FAIL]\n");
        r = 1;
      } else {
        printf(" OK ] ");
      }
      ALIGNED_FREE(tdata);
    }
  }
 out:

  printf("\n");

  ALIGNED_FREE(cdata);

  return r;
}