Exemplo n.º 1
0
InplaceTransformFinal get_invhtransformfinal_sse4_2(int wavelet_index, int active_bits, int sample_size) {
  if (sample_size == 4) {
    if (active_bits == 10) {
      switch(wavelet_index) {
      case VC2DECODER_WFT_LEGALL_5_3:
        return LeGall_5_3_invtransform_H_final_1_10_sse4_2<int32_t>;
      case VC2DECODER_WFT_HAAR_NO_SHIFT:
        return Haar_invtransform_H_final_1_10_sse4_2_int32_t<0>;
      case VC2DECODER_WFT_HAAR_SINGLE_SHIFT:
        return Haar_invtransform_H_final_1_10_sse4_2_int32_t<1>;
      default:
        break;
      }
    }
  } else if (sample_size == 2) {
    if (active_bits == 10) {
      switch(wavelet_index) {
      case VC2DECODER_WFT_LEGALL_5_3:
        return LeGall_5_3_invtransform_H_final_1_10_sse4_2<int16_t>;
      case VC2DECODER_WFT_HAAR_NO_SHIFT:
        return Haar_invtransform_H_final_1_10_sse4_2_int16_t<0>;
      case VC2DECODER_WFT_HAAR_SINGLE_SHIFT:
        return Haar_invtransform_H_final_1_10_sse4_2_int16_t<1>;
      default:
        break;
      }
    }
  }

  return get_invhtransformfinal_c(wavelet_index, active_bits, sample_size);
}
int perform_invhtransformfinaltest(invhtransformfinaltest_data &data,
                                   void *idata_pre,
                                   const int width,
                                   const int height,
                                   const int stride,
                                   bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) {
  int r = 0;
  (void)HAS_AVX;(void)HAS_AVX2;

  void *idata = ALIGNED_ALLOC(32, height*stride*data.sample_size);
  memcpy(idata, idata_pre, height*stride*data.sample_size);

  struct offsets_t offsets[] = { {  0,  0,  0,  0 },
                                 { 32,  0,  0,  0 },
                                 {  0, 32,  0,  0 },
                                 { 32, 32,  0,  0 },
                                 {  0,  0, 32,  0 },
                                 {  0,  0,  0, 32 },
                                 {  0,  0, 32, 32 },
                                 { 32,  0, 32,  0 },
                                 { 32,  0,  0, 32 },
                                 { 32,  0, 32, 32 },
                                 {  0, 32, 32,  0 },
                                 {  0, 32,  0, 32 },
                                 {  0,  0, 32, 32 },
                                 { 32, 32, 32,  0 },
                                 { 32, 32,  0, 32 },
                                 { 32, 32, 32, 32 },
  };
  for (int i = 0; r==0 && i < (int)(sizeof(offsets)/sizeof(struct offsets_t)); i++) {
    char *cdata = (char *)malloc(height*stride*sizeof(uint16_t));
    memset(cdata, 0, height*stride*sizeof(uint16_t));

    printf("%-20s: H 0/* (%2d,%2d,%2d,%2d) (active %d-bit)  ", VC2DecoderWaveletFilterTypeString[data.wavelet], offsets[i].left, offsets[i].right, offsets[i].top, offsets[i].bottom, data.active_bits);
    if (data.sample_size == 2)
      printf("16-bit ");
    else
      printf("32-bit ");

    /* Use C version to generate comparison value */
    printf("C [ ");
    InplaceTransformFinal ctrans = NULL;
    try {
      ctrans = get_invhtransformfinal_c(data.wavelet, data.active_bits, data.sample_size);
    } catch(...) {
      printf(" NONE ]");
      r = 1;
      free(cdata);
      break;
    }

    ctrans(idata, stride, cdata + (offsets[i].top*stride + offsets[i].left)*2, stride, width, height, offsets[i].left, offsets[i].top, width - offsets[i].left - offsets[i].right, height - offsets[i].top - offsets[i].bottom);
    if (memcmp(idata, idata_pre, height*stride*data.sample_size) != 0) {
      printf(" BAD  ]\n");
      printf("   c function overwrites input data!\n");
      r = 1;
      free(cdata);
      printf("\n");
      break;
    }
    for (int y = 0; y < offsets[i].top; y++) {
      for (int x = 0; x < stride; x++) {
        if (((uint16_t *)cdata)[y*stride + x] != 0) {
          printf(" BAD  ]\n");
          printf("   c function writes outside of specified memory area!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
    }
    for (int y = offsets[i].top; y < height - offsets[i].bottom; y++) {
      for (int x = 0; x < offsets[i].left; x++) {
        if (((uint16_t *)cdata)[y*stride + x] != 0) {
          printf(" BAD  ]\n");
          printf("   c function writes outside of specified memory area!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
      for (int x = offsets[i].left; x < width - offsets[i].right; x++) {
        if (((uint16_t *)cdata)[y*stride + x] >= (1 << data.active_bits)) {
          printf(" BAD  ]\n");
          printf("   c function does not clip values to correct number of bits!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
      for (int x = width - offsets[i].right; x < stride; x++) {
        if (((uint16_t *)cdata)[y*stride + x] != 0) {
          printf(" BAD  ]\n");
          printf("   c function writes outside of specified memory area!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
    }
    for (int y = height - offsets[i].bottom; y < height; y++) {
      for (int x = 0; x < stride; x++) {
        if (((uint16_t *)cdata)[y*stride + x] != 0) {
          printf(" BAD  ]\n");
          printf("   c function writes outside of specified memory area!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
    }
    printf(" GOOD ] ");

    /* Test SSE4_2 version */
    if (HAS_SSE4_2 && data.SSE4_2) {
      printf("SSE4.2 [");
      InplaceTransformFinal trans = get_invhtransformfinal_sse4_2(data.wavelet, data.active_bits, data.sample_size);
      if (trans == ctrans) {
        printf("NONE]");
      } else {
        char *tdata = (char *)malloc(height*stride*sizeof(uint16_t));
        memset(tdata, 0, height*stride*sizeof(uint16_t));
        trans(idata, stride, tdata + (offsets[i].top*stride + offsets[i].left)*2, stride, width, height, offsets[i].left, offsets[i].top, width - offsets[i].left - offsets[i].right, height - offsets[i].top - offsets[i].bottom);
        if (memcmp(cdata, tdata, height*stride*sizeof(uint16_t))) {
          printf("FAIL]\n");

          for (int i = 0; i < (int)(height*stride*sizeof(uint16_t)); i++) {
            if (cdata[i] != tdata[i]) {
              printf("\nFirst difference at byte %d, 0x%02x =/= 0x%02x\n\n", i, ((uint8_t *)cdata)[i], ((uint8_t *)tdata)[i]);
              break;
            }
          }

          r = 1;
        } else {
          printf(" OK ] ");
        }
        free(tdata);
      }
    }
    free(cdata);
    printf("\n");
  }

  ALIGNED_FREE(idata);

  return r;
}