Exemple #1
0
VC2HQDECODE_API QuantisationMatrix *quantisation_matrices(uint32_t wavelet_index, int depth, int qindex_max) {
	if (depth > 4) {
		writelog(LOG_ERROR, "%s:%d:  Could not form quantisation matrices, depth greater than 4 not supported\n", __FILE__, __LINE__);
		throw VC2DECODER_NOTIMPLEMENTED;
	}

	QuantisationMatrix *matrix = new QuantisationMatrix[qindex_max + 1];

	__m128i **pointers_qf_1 = new __m128i*[(depth + 1)*(qindex_max + 1)];
	__m128i **pointers_qo_1 = new __m128i*[(depth + 1)*(qindex_max + 1)];
	__m128i *pointers_qf_2 = (__m128i *)ALIGNED_ALLOC(16, 4 * (depth + 1)*(qindex_max + 1)*sizeof(__m128i)); //new __m128i[4*(depth + 1)*(qindex_max + 1)];
	__m128i *pointers_qo_2 = (__m128i *)ALIGNED_ALLOC(16, 4 * (depth + 1)*(qindex_max + 1)*sizeof(__m128i));

	for (int q = 0; q <= qindex_max; q++) {
		matrix[q].qfactor = &pointers_qf_1[(depth + 1)*q];
		matrix[q].qoffset = &pointers_qo_1[(depth + 1)*q];

		for (int l = 0; l <= depth; l++) {
			matrix[q].qfactor[l] = &pointers_qf_2[4 * ((depth + 1)*q + l)];
			matrix[q].qoffset[l] = &pointers_qo_2[4 * ((depth + 1)*q + l)];

			for (int s = 0; s < 4; s++) {
				int qi = (q > DEFAULT_QUANTISATION_MATRIX_ADJUSTMENTS[wavelet_index][depth][l][s]) ? (q - DEFAULT_QUANTISATION_MATRIX_ADJUSTMENTS[wavelet_index][depth][l][s]) : 0;
				matrix[q].qfactor[l][s] = _mm_set1_epi32(quant_factor(qi));
				matrix[q].qoffset[l][s] = _mm_set1_epi32(quant_offset(qi) + 2);
			}
		}
	}

	return matrix;
}
int perform_invhtransformtest(invhtransformtest_data &data,
                              void *idata_pre,
                              const int width,
                              const int height,
                              const int stride,
                              bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) {
  int r = 0;
  (void)HAS_AVX;(void)HAS_AVX2;

  printf("%-20s: H %d/%d  ", VC2DecoderWaveletFilterTypeString[data.wavelet], data.level, data.depth);
  if (data.sample_size == 2)
    printf("16-bit ");
  else
    printf("32-bit ");

  /* Use C version to generate comparison value */
  printf("C [ ");
  void *cdata = ALIGNED_ALLOC(32, height*stride*data.sample_size);
  memcpy(cdata, idata_pre, height*stride*data.sample_size);
  InplaceTransform ctrans = NULL;
  try {
    ctrans = get_invhtransform_c(data.wavelet, data.level, data.depth, data.sample_size);
  } catch(...) {
    printf(" NONE ]");
    r = 1;
    goto out;
  }
  printf("EXISTS ] ");
  ctrans(cdata, stride, width, height);

  /* Test SSE4_2 version */
  if (HAS_SSE4_2 && data.SSE4_2) {
    printf("SSE4.2 [");
    InplaceTransform trans = get_invhtransform_sse4_2(data.wavelet, data.level, data.depth, data.sample_size);
    if (trans == ctrans) {
      printf("NONE]");
    } else {
      void *tdata = ALIGNED_ALLOC(32, height*stride*data.sample_size);
      memcpy(tdata, idata_pre, height*stride*data.sample_size);
      trans(tdata, stride, width, height);
      if (memcmp(cdata, tdata, height*stride*data.sample_size)) {
        printf("FAIL]\n");
        r = 1;
      } else {
        printf(" OK ] ");
      }
      ALIGNED_FREE(tdata);
    }
  }

 out:

  printf("\n");

  ALIGNED_FREE(cdata);

  return r;
}
Exemple #3
0
int OGGDecoder::open(VPResource resource)
{

    int ret=ov_fopen(resource.getPath().c_str(),&vf);
    if (ret<0){
        DBG("open fail");
        return -1;
    }

    buffer = (float *) ALIGNED_ALLOC(sizeof(float)*VPBUFFER_FRAMES*ov_info(&vf,0)->channels);
    half_buffer_size = VPBUFFER_FRAMES*ov_info(&vf,0)->channels*sizeof(float);

    VPBuffer bin;
    bin.srate = ov_info(&vf,0)->rate;
    bin.chans = ov_info(&vf,0)->channels;
    bin.buffer[0] = NULL;
    bin.buffer[1] = NULL;

    owner->setOutBuffers(&bin,&bout);
    for (unsigned i=0;i<VPBUFFER_FRAMES*bout->chans;i++){
        bout->buffer[0][i]=0.0f;
        bout->buffer[1][i]=0.0f;

    }

    return 0;
}
Exemple #4
0
ptst_t *critical_enter(gc_global_t *gc_global)
{
    ptst_t *ptst, *next, *new_next;
#ifdef NEED_ID
    unsigned int id, oid;
#endif

    ptst = (ptst_t *)pthread_getspecific(gc_global->ptst_key);
    if ( ptst == NULL )
    {
        for ( ptst = _ptst_first(gc_global); ptst != NULL; ptst = ptst_next(ptst) )
        {
            if ( (ptst->count == 0) && (CASIO(&ptst->count, 0, 1) == 0) )
            {
                break;
            }
        }

        if ( ptst == NULL )
        {
            ptst = ALIGNED_ALLOC(sizeof(*ptst));
            if ( ptst == NULL ) exit(1);
            memset(ptst, 0, sizeof(*ptst));
            ptst->gc = gc_init(gc_global);
            rand_init(ptst);
            ptst->count = 1;
#ifdef NEED_ID
            id = gc_global->next_id;
            while ( (oid = CASIO(&gc_global->next_id, id, id+1)) != id ) id = oid;
            ptst->id = id;
#endif
            new_next = gc_global->ptst_list;
            do {
                ptst->next = next = new_next;
                WMB_NEAR_CAS();
            }
            while ( (new_next = CASPO(&gc_global->ptst_list, next, ptst)) != next );
        }

        pthread_setspecific(gc_global->ptst_key, ptst);
    }

    gc_enter(ptst);
    return(ptst);
}
Exemple #5
0
void TranspositionTable::resize(size_t mbSize) {

  size_t newClusterCount = size_t(1) << msb((mbSize * 1024 * 1024) / sizeof(Cluster));

  if (newClusterCount == clusterCount)
    return;

  clusterCount = newClusterCount;

  if (table) {
    ALIGNED_FREE(table);
    table = nullptr;
  }
  table = (Cluster*)ALIGNED_ALLOC(
    sizeof(Cluster), clusterCount * sizeof(Cluster));

  if (!table)
  {
    SYNCCOUT << "info string Failed to allocate " << mbSize
      << "MB for transposition table." << SYNCENDL;
    exit(EXIT_FAILURE);
  }
}
Exemple #6
0
result_t gfx_csm_init(uint width, uint height)
{
	result_t r;

    log_printf(LOG_INFO, "init csm render-path ...");

    struct allocator* lsr_alloc = eng_get_lsralloc();
    struct allocator* tmp_alloc = tsk_get_tmpalloc(0);

    g_csm = (struct gfx_csm*)ALIGNED_ALLOC(sizeof(struct gfx_csm), MID_GFX);
    if (g_csm == NULL)
        return RET_OUTOFMEMORY;
    memset(g_csm, 0x00, sizeof(struct gfx_csm));

    /* render targets and buffers */
	r = csm_create_shadowrt(CSM_SHADOW_SIZE, CSM_SHADOW_SIZE);
	if (IS_FAIL(r))	{
		err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create shadow map buffers");
		return RET_FAIL;
	}

	if (BIT_CHECK(eng_get_params()->flags, ENG_FLAG_DEV))	{
        if (!csm_load_prev_shaders(lsr_alloc))  {
            err_print(__FILE__, __LINE__, "gfx-csm init failed: could not load preview shaders");
            return RET_FAIL;
        }

		r = csm_create_prevrt(CSM_PREV_SIZE, CSM_PREV_SIZE);
		if (IS_FAIL(r))	{
			err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create prev buffers");
			return RET_FAIL;
		}

        /* console commands */
        con_register_cmd("gfx_debugcsm", csm_console_debugcsm, NULL, "gfx_debugcsm [1*/0]");
	}

    /* shaders */
    if (!csm_load_shaders(lsr_alloc))   {
        err_print(__FILE__, __LINE__, "gfx-csm init failed: could not load shaders");
        return RET_FAIL;
    }

    /* cblocks */
    if (gfx_check_feature(GFX_FEATURE_RANGED_CBUFFERS)) {
        g_csm->sharedbuff = gfx_sharedbuffer_create(GFX_DEFAULT_RENDER_OBJ_CNT*GFX_INSTANCES_MAX*48);
        if (g_csm->sharedbuff == NULL)  {
            err_print(__FILE__, __LINE__, "gfx-deferred init failed: could not create uniform buffer");
            return RET_FAIL;
        }
    }

    g_csm->cb_frame = gfx_shader_create_cblock(lsr_alloc, tmp_alloc,
        gfx_shader_get(g_csm->shaders[0].shader_id), "cb_frame", NULL);
    g_csm->cb_xforms = gfx_shader_create_cblock(lsr_alloc, tmp_alloc,
        gfx_shader_get(g_csm->shaders[0].shader_id), "cb_xforms", g_csm->sharedbuff);
    g_csm->cb_frame_gs = gfx_shader_create_cblock(lsr_alloc, tmp_alloc,
        gfx_shader_get(g_csm->shaders[0].shader_id), "cb_frame_gs", NULL);
    g_csm->tb_skins = gfx_shader_create_cblock_tbuffer(mem_heap(),
        gfx_shader_get(gfx_csm_getshader(CMP_OBJTYPE_MODEL, GFX_RPATH_SKINNED | GFX_RPATH_CSMSHADOW)),
        "tb_skins", sizeof(struct vec4f)*3*GFX_INSTANCES_MAX*GFX_SKIN_BONES_MAX);
    if (g_csm->cb_frame == NULL || g_csm->cb_xforms == NULL || g_csm->cb_frame_gs == NULL ||
        g_csm->tb_skins == NULL)
    {
        err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create cblocks");
        return RET_FAIL;
    }

    /* states */
    r = csm_create_states();
    if (IS_FAIL(r)) {
        err_print(__FILE__, __LINE__, "gfx-csm init failed: could not create states");
        return RET_FAIL;
    }

    g_csm->shadowmap_size = (float)CSM_SHADOW_SIZE;

	return RET_OK;
}
Exemple #7
0
int VPOutPluginAlsa::init(VPlayer *v, VPBuffer *in)
{
    DBG("Alsa:init");
    owner = v;
    bin = in;
    if (snd_pcm_open(&handle, "default", SND_PCM_STREAM_PLAYBACK, SND_PCM_NO_AUTO_RESAMPLE) < 0){
        DBG("Alsa:init: failed to open pcm");
        exit(0);
        return -1;
    }
    snd_pcm_sw_params_t *swparams;
    snd_pcm_sw_params_malloc(&swparams);
    snd_pcm_sw_params_current (handle, swparams);
    snd_pcm_sw_params_set_start_threshold (handle, swparams, VPBUFFER_FRAMES - PERIOD_SIZE);
    snd_pcm_sw_params (handle, swparams);
    snd_pcm_sw_params_free(swparams);

    snd_pcm_hw_params_alloca(&params);
    snd_pcm_hw_params_any(handle, params);
    snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
    snd_pcm_format_mask_t *mask;
    snd_pcm_format_mask_alloca(&mask);
    snd_pcm_hw_params_get_format_mask(params, mask);
    if (snd_pcm_format_mask_test(mask, SND_PCM_FORMAT_S32))
    {
        DBG("bit depth is 32");
        snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S32);
        multiplier = (1<<31) -1 ;
        DBG(multiplier);
    }
    else if (snd_pcm_format_mask_test(mask, SND_PCM_FORMAT_S24))
    {
        DBG("bit depth is 24");
        snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S24);
        multiplier = (1<<23) -1;
    }
    else if (snd_pcm_format_mask_test(mask, SND_PCM_FORMAT_S16))
    {
        DBG("bit depth is 16");
        snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16);
        multiplier = (1<<15) -1;
    }
    else if (snd_pcm_format_mask_test(mask, SND_PCM_FORMAT_S8))
    {
        DBG("bit depth is 8");
        snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S8);
        multiplier = (1<<7) -1;;
    }

    snd_pcm_hw_params_set_channels(handle, params, bin->chans);

    snd_pcm_hw_params_set_period_size(handle, params, PERIOD_SIZE, 0);

    if (snd_pcm_hw_params(handle, params) < 0) {
        DBG("Alsa:init: failed to set pcm params");
       return -1;
    }

    snd_pcm_hw_params_current(handle, params);
    int dir;
    snd_pcm_hw_params_get_rate(params, &out_srate, &dir);
    in_srate = bin->srate;
    int rerr;
    rs = src_new(SRC_SINC_FASTEST, bin->chans, &rerr);
    if (!rs){
        DBG("SRC error"<<rerr);
        return -1;
    }

    rd.src_ratio = (out_srate*1.0)/(in_srate*1.0);
    out_frames = (VPBUFFER_FRAMES*rd.src_ratio)*2;

    out_buf = (float *)ALIGNED_ALLOC(sizeof(float)*out_frames*bin->chans);
    out_buf_i = (int *)ALIGNED_ALLOC(sizeof(int)*out_frames*bin->chans);

    DBG("target rate "<<out_srate);
    work = true;
    paused = false;
    pause_check = false;

    FULL_MEMORY_BARRIER;
    in_fd = inotify_init();
    if ( in_fd < 0 ) {
        DBG("error initializing inotify, auto pause won't work");
    } else {
        in_wd[0]=inotify_add_watch( in_fd, "/dev/snd/pcmC0D0p", IN_OPEN | IN_CLOSE );
    }
    fcntl(in_fd, F_SETFL, O_NONBLOCK);

    worker = new std::thread((void(*)(void*))worker_run, this);
    worker->high_priority();
    DBG("alsa thread made");
    DBG((void *)VPOutPluginAlsa::check_contention);
    VPEvents::getSingleton()->schedulerAddJob((VPEvents::VPJob) VPOutPluginAlsa::check_contention, this,0);
    return 0;
}
Exemple #8
0
void VPlayer::setOutBuffers(VPBuffer *outprop, VPBuffer **out)
{

    *out = &bout;
    bufferCursor = 0;
    bufferSamples[0] = 0;
    bufferSamples[1] = 0;
    if (bout.srate != outprop->srate || bout.chans != outprop->chans) {
        if (vpout){
            delete vpout;
            vpout=NULL;
        }

        if (bout.buffer[0]) {
            ALIGNED_FREE(bout.buffer[0]);
            bout.buffer[0] = NULL;
            ALIGNED_FREE(bout.buffer[1]);
            bout.buffer[1] = NULL;
        }

        vpout =VPOutFactory::getSingleton()->create();
        assert(vpout);

        outprop->buffer[0] = (float*)ALIGNED_ALLOC(sizeof(float)*VPBUFFER_FRAMES*outprop->chans);
        outprop->buffer[1] = (float*)ALIGNED_ALLOC(sizeof(float)*VPBUFFER_FRAMES*outprop->chans);
        outprop->cursor = &bufferCursor;

        assert(outprop->buffer[0] && outprop->buffer[1]);

        bout.chans = outprop->chans;
        bout.srate = outprop->srate;
        bout.buffer[0] = outprop->buffer[0];
        bout.buffer[1] = outprop->buffer[1];

        bout.cursor = &bufferCursor;

        bufferCursor =0;


        DBG("track chs: "<<bout.chans);
        DBG("track rate: "<<bout.srate);

        // bout - source to first dsp, output of input plugin
        // bin - source to vpout

        VPBuffer *tmp=&bout;
        DBG(eff_count);
        for (int i=0;i<eff_count;i++){
            effects[i].in = tmp;
            effects[i].eff->init(this, tmp, &tmp);
            effects[i].out = tmp;
            if (!effects[i].active){
                effects[i].eff->finit();
            }
        }
        memcpy(&bin,tmp,sizeof(VPBuffer));

        vpout->init(this, &bin);
    } else {
        DBG("gapless");
    }
}
int test_invtransform(bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) {
  printf("--------------------------------------------------------------------------------\n");
  printf("  Inverse Transform Tests\n");
  printf("\n");
  /* Load some input data for the tests */
  const int height = 1080;
  const int width  = 1920;
  const int stride = ((1920 + 1023)/1024)*1024;
  void *idata16 = ALIGNED_ALLOC(32, height*stride*sizeof(int16_t));
  void *idata32 = ALIGNED_ALLOC(32, height*stride*sizeof(int32_t));

  if (!randomiser((char *)idata16, height*stride*sizeof(int16_t))) {
    printf("Error Getting Random Data\n");
    return 1;
  }

  /* Need to make sure the samples aren't so large they overflow during the calculations */
  for (int y = 0; y < height; y++) {
    for (int x = 0; x < stride; x++) {
      ((int16_t *)idata16)[y*stride + x] >>= 2;
      ((int32_t *)idata32)[y*stride + x] = ((int16_t *)idata16)[y*stride + x];
    }
  }
  /*for (int y = 0; y < height; y++) {
    for (int x = 0; x < stride; x++) {
      ((int32_t *)idata)[y*stride + x] = 1;
    }
    }*/

  int r = 0;
  for (int i = 0; !r && i < INVHTRANSFORMTEST_DATA_NUM; i++) {
    void * idata = (INVHTRANSFORMTEST_DATA[i].sample_size == 2)?idata16:idata32;
    r = perform_invhtransformtest(INVHTRANSFORMTEST_DATA[i],
                                  idata,
                                  width,
                                  height,
                                  stride,
                                  HAS_SSE4_2, HAS_AVX, HAS_AVX2);
  }

  for (int i = 0; !r && i < INVHTRANSFORMFINALTEST_DATA_NUM; i++) {
    void * idata = (INVHTRANSFORMFINALTEST_DATA[i].sample_size == 2)?idata16:idata32;
    r = perform_invhtransformfinaltest(INVHTRANSFORMFINALTEST_DATA[i],
                                       idata,
                                       width,
                                       height,
                                       stride,
                                       HAS_SSE4_2, HAS_AVX, HAS_AVX2);
  }

  for (int i = 0; !r && i < INVVTRANSFORMTEST_DATA_NUM; i++) {
    void * idata = (INVVTRANSFORMTEST_DATA[i].sample_size == 2)?idata16:idata32;
    r = perform_invvtransformtest(INVVTRANSFORMTEST_DATA[i],
                                  idata,
                                  width,
                                  height,
                                  stride,
                                  HAS_SSE4_2, HAS_AVX, HAS_AVX2);
  }

  ALIGNED_FREE(idata16);
  ALIGNED_FREE(idata32);

  printf("--------------------------------------------------------------------------------\n");
  return r;
}
int perform_invhtransformfinaltest(invhtransformfinaltest_data &data,
                                   void *idata_pre,
                                   const int width,
                                   const int height,
                                   const int stride,
                                   bool HAS_SSE4_2, bool HAS_AVX, bool HAS_AVX2) {
  int r = 0;
  (void)HAS_AVX;(void)HAS_AVX2;

  void *idata = ALIGNED_ALLOC(32, height*stride*data.sample_size);
  memcpy(idata, idata_pre, height*stride*data.sample_size);

  struct offsets_t offsets[] = { {  0,  0,  0,  0 },
                                 { 32,  0,  0,  0 },
                                 {  0, 32,  0,  0 },
                                 { 32, 32,  0,  0 },
                                 {  0,  0, 32,  0 },
                                 {  0,  0,  0, 32 },
                                 {  0,  0, 32, 32 },
                                 { 32,  0, 32,  0 },
                                 { 32,  0,  0, 32 },
                                 { 32,  0, 32, 32 },
                                 {  0, 32, 32,  0 },
                                 {  0, 32,  0, 32 },
                                 {  0,  0, 32, 32 },
                                 { 32, 32, 32,  0 },
                                 { 32, 32,  0, 32 },
                                 { 32, 32, 32, 32 },
  };
  for (int i = 0; r==0 && i < (int)(sizeof(offsets)/sizeof(struct offsets_t)); i++) {
    char *cdata = (char *)malloc(height*stride*sizeof(uint16_t));
    memset(cdata, 0, height*stride*sizeof(uint16_t));

    printf("%-20s: H 0/* (%2d,%2d,%2d,%2d) (active %d-bit)  ", VC2DecoderWaveletFilterTypeString[data.wavelet], offsets[i].left, offsets[i].right, offsets[i].top, offsets[i].bottom, data.active_bits);
    if (data.sample_size == 2)
      printf("16-bit ");
    else
      printf("32-bit ");

    /* Use C version to generate comparison value */
    printf("C [ ");
    InplaceTransformFinal ctrans = NULL;
    try {
      ctrans = get_invhtransformfinal_c(data.wavelet, data.active_bits, data.sample_size);
    } catch(...) {
      printf(" NONE ]");
      r = 1;
      free(cdata);
      break;
    }

    ctrans(idata, stride, cdata + (offsets[i].top*stride + offsets[i].left)*2, stride, width, height, offsets[i].left, offsets[i].top, width - offsets[i].left - offsets[i].right, height - offsets[i].top - offsets[i].bottom);
    if (memcmp(idata, idata_pre, height*stride*data.sample_size) != 0) {
      printf(" BAD  ]\n");
      printf("   c function overwrites input data!\n");
      r = 1;
      free(cdata);
      printf("\n");
      break;
    }
    for (int y = 0; y < offsets[i].top; y++) {
      for (int x = 0; x < stride; x++) {
        if (((uint16_t *)cdata)[y*stride + x] != 0) {
          printf(" BAD  ]\n");
          printf("   c function writes outside of specified memory area!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
    }
    for (int y = offsets[i].top; y < height - offsets[i].bottom; y++) {
      for (int x = 0; x < offsets[i].left; x++) {
        if (((uint16_t *)cdata)[y*stride + x] != 0) {
          printf(" BAD  ]\n");
          printf("   c function writes outside of specified memory area!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
      for (int x = offsets[i].left; x < width - offsets[i].right; x++) {
        if (((uint16_t *)cdata)[y*stride + x] >= (1 << data.active_bits)) {
          printf(" BAD  ]\n");
          printf("   c function does not clip values to correct number of bits!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
      for (int x = width - offsets[i].right; x < stride; x++) {
        if (((uint16_t *)cdata)[y*stride + x] != 0) {
          printf(" BAD  ]\n");
          printf("   c function writes outside of specified memory area!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
    }
    for (int y = height - offsets[i].bottom; y < height; y++) {
      for (int x = 0; x < stride; x++) {
        if (((uint16_t *)cdata)[y*stride + x] != 0) {
          printf(" BAD  ]\n");
          printf("   c function writes outside of specified memory area!\n");
          r = 1;
          free(cdata);
          printf("\n");
          break;
        }
      }
    }
    printf(" GOOD ] ");

    /* Test SSE4_2 version */
    if (HAS_SSE4_2 && data.SSE4_2) {
      printf("SSE4.2 [");
      InplaceTransformFinal trans = get_invhtransformfinal_sse4_2(data.wavelet, data.active_bits, data.sample_size);
      if (trans == ctrans) {
        printf("NONE]");
      } else {
        char *tdata = (char *)malloc(height*stride*sizeof(uint16_t));
        memset(tdata, 0, height*stride*sizeof(uint16_t));
        trans(idata, stride, tdata + (offsets[i].top*stride + offsets[i].left)*2, stride, width, height, offsets[i].left, offsets[i].top, width - offsets[i].left - offsets[i].right, height - offsets[i].top - offsets[i].bottom);
        if (memcmp(cdata, tdata, height*stride*sizeof(uint16_t))) {
          printf("FAIL]\n");

          for (int i = 0; i < (int)(height*stride*sizeof(uint16_t)); i++) {
            if (cdata[i] != tdata[i]) {
              printf("\nFirst difference at byte %d, 0x%02x =/= 0x%02x\n\n", i, ((uint8_t *)cdata)[i], ((uint8_t *)tdata)[i]);
              break;
            }
          }

          r = 1;
        } else {
          printf(" OK ] ");
        }
        free(tdata);
      }
    }
    free(cdata);
    printf("\n");
  }

  ALIGNED_FREE(idata);

  return r;
}