示例#1
0
int AlgorithmHough::DoYourJob(Magick::Image& image)
{
    BW(image);
    std::cout<<"BW"<<std::endl;
    Accu(image);
    std::cout<<"Ac"<<std::endl;
    Blur(Accu.Accumulator);
    std::cout<<"Bl"<<std::endl;

    HoughResult max=Accu.Maximum();
    std::cout<<"Max"<<std::endl;

    Magick::Image result(image.size(), Magick::Color("white"));

    result.modifyImage();
    Magick::Pixels pixelCache(result);
    Magick::PixelPacket* pixels=pixelCache.set(0,0,image.columns(),image.rows());
    for(int x=0; x<image.columns(); x++)
        for(int y=0; y<image.rows(); y++)
        {
            double fi_f=max.Fi-90;
            double r_=double(x)*cos(fi_f*3.14/180.0)+double(y)*sin(fi_f*3.14/180.0);

            int pixelIndex=x+y*image.columns();

            if(r_<0) //jak wyżej
                continue;

            if(round(r_)==max.R)
                pixels[pixelIndex].red=pixels[pixelIndex].blue=pixels[pixelIndex].green=0;
            else
                pixels[pixelIndex].red=pixels[pixelIndex].blue=pixels[pixelIndex].green=(1<<QuantumDepth)-1;
        }

    pixelCache.sync();

    image=result;

    std::cout<<"R: "<<max.R<<" FI: "<<max.Fi<<" Value: "<<max.Value<<std::endl;
    return 0;
}
示例#2
0
  void ff(TextureQuantizeRAW,format,A)(RESOURCEINFO &texo, RESOURCEINFO &texd, ULONG *texs, ULONG *texr, int level, int l) {
    /* square dimension of this surface-level */
    /* square area of this surface-level */
    const int lv = (1 << l);
    const int av = lv * lv;

    /* ------------------------------------------------------------------------------------------------------- */
    const int NORMALS_SCALEBYLEVEL = ::NORMALS_SCALEBYLEVEL;
    const int  ALPHAS_SCALEBYLEVEL =  ::ALPHAS_SCALEBYLEVEL;
    const float colorgamma       = ::colorgamma;
    const float alphacontrast    = ::alphacontrast;
    const float colorgammainv    = ::colorgammainv;
    const float alphacontrastinv = ::alphacontrastinv;

    int iwidth  = texo.Width;
    int iheight = texo.Height;
    int owidth  = texd.Width;
    int oheight = texd.Height;
    int cwidth  = owidth;
    int cheight = oheight;

    /* get the data back to the CPU */

#if	(TCOMPRESS_CHANNELS(format) == 4)
    /* ABGR -> ARGB */ cwidth = (cwidth +  0) >> 0; /* 1x LONG to 1x LONG */
#elif	(TCOMPRESS_CHANNELS(format) == 3)
    /* -BGR -> -RGB */ cwidth = (cwidth +  1) >> 1; /* 1x LONG to 1x SHORT */
#elif	(TCOMPRESS_CHANNELS(format) == 2)
    /* LA-- -> AL-- */ cwidth = (cwidth +  3) >> 2; /* 1x LONG to 1x CHAR */
#elif	(TCOMPRESS_CHANNELS(format) == 1)
    /* A--- -> A--- */ cwidth = (cwidth + 31) >> 5; /* 8x LONG to 1x CHAR */
#else
#error
#endif

    /* ensure tile ability (bit on overhead for non-4 resolutions) */
    owidth  = (owidth  + (TX - 1)) & (~(TX - 1));
    oheight = (oheight + (TY - 1)) & (~(TY - 1));

    assert((owidth  & (TX - 1)) == 0);
    assert((oheight & (TY - 1)) == 0);

#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
    /* get a two-dimensional extend over the whole output (without re-cast to LONG),
     * then get a tile-extend over that one ()
     */
    Concurrency::extent<2> ee(oheight, owidth);
    Concurrency::tiled_extent<TY, TX> te(ee);

    Concurrency::array_view<const unsigned int, 2> sArr(iheight, iwidth, (const unsigned int *)texs);
    Concurrency::array_view<      unsigned int, 2> dArr(cheight, cwidth, (      unsigned int *)texr);

    Concurrency::parallel_for_each(te /*dArr.extent.tile<TY, TX>(osize)*/, [=](tiled_index<TY, TX> elm) restrict(amp) {
      typedef type accu[DIM];

      /* tile static memory */
//    tile_static UTYPE bTex[2][TY][TX];
      tile_static int   bTex[2][TY][TX];
      tile_static type  fTex[2][TY][TX][DIM];

//    const int y = elm.global[0] - ly;
//    const int x = elm.global[1] - lx;
      const int y = elm.tile[0] * TY;
      const int x = elm.tile[1] * TX;
      const int ly = elm.local[0];
      const int lx = elm.local[1];
#else
    array_view<const unsigned int, 2> sArr(iheight, iwidth, (const unsigned int *)texs);
    array_view<      unsigned int, 2> dArr(cheight, cwidth, (      unsigned int *)texr, true);

    for (int groupsy = 0; groupsy < (owidth  / TY); groupsy++)
    for (int groupsx = 0; groupsx < (oheight / TX); groupsx++) {
      typedef type accu[DIM];

      /* tile static memory */
//    UTYPE bTex[2][TY][TX];
      int   bTex[2][TY][TX];
      type  fTex[2][TY][TX][DIM];

    for (int tiley = 0; tiley < TY; tiley++)
    for (int tilex = 0; tilex < TX; tilex++)
    {
      const int y = groupsy * TY;
      const int x = groupsx * TX;
      const int ly = tiley;
      const int lx = tilex;
#endif
      /* generate this level's 4x4-block from the original surface */
      {
	const int yl = ((y + ly) << l);
	const int xl = ((x + lx) << l);

	accu tt; tt[0] = tt[1] = tt[2] = tt[3] = tt[4] = tt[5] = tt[6] = tt[7] = 0;

	/* access all pixels this level's 4x4-block represents in
	 * the full dimension original surface (high quality mip-mapping)
	 */
	for (int oy = 0; oy < lv; oy += 1)
	for (int ox = 0; ox < lv; ox += 1) {
	  /* assume seamless tiling: wrap pixels around */
	  const int posx = (xl + ox) % iwidth;
	  const int posy = (yl + oy) % iheight;

	  const ULONG &t = sArr(posy, posx);

	  Accu(tt, t);	// +=
	}

	/* build average of each channel */
	Norm(fTex[0][ly][lx], tt, av, level, l);
      }

#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
      tile_static accu tr; tr[(ly * TX + lx) & 7] = 0;

      tile_static_memory_fence(elm.barrier);
//    elm.barrier.wait_with_tile_static_memory_fence();
#else
      }

      accu tr = {0};
#endif

      /* runs on only 1 thread per tile (reduction) */
#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
      if (elm.local == index<2>(0, 0))
#endif
      {
	/* analyze this level's 4x4-block */
	for (int ly = 0; ly < TY; ly += 1)
	for (int lx = 0; lx < TX; lx += 1) {
	  Look(fTex[0][ly][lx], tr);
	}
      }

#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
      tile_static_memory_fence(elm.barrier);
//    elm.barrier.wait_with_tile_static_memory_fence();
#else
      for (int tiley = 0; tiley < TY; tiley++)
      for (int tilex = 0; tilex < TX; tilex++)
      {
	const int y = groupsy;
	const int x = groupsx;
	const int ly = tiley;
	const int lx = tilex;
#endif

      /* generate this level's 4x4-block from the original surface */
      {
	/* build average of each channel an join */
	ULONG t;

	Code(fTex[0][ly][lx], tr, (A > 2 ? 4 : (A > 1 ? 10 : (A > 0 ? 5 : 6)))); t =
	Qunt(fTex[0][ly][lx], tr, (A > 2 ? 4 : (A > 1 ? 10 : (A > 0 ? 5 : 6))));

	/* write the result */

	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#if	(TCOMPRESS_CHANNELS(format) == 4)
	/* ABGR -> RGBA */
	bTex[0][ly][lx] = t;
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	(TCOMPRESS_CHANNELS(format) == 3)
	/* -BGR -> RGB- */
	bTex[0][ly][lx] = t;
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	(TCOMPRESS_CHANNELS(format) == 2)
	/* AL-- -> LA-- */
	bTex[0][ly][lx] = t;
#else
#error
#endif
      }

      /* put this level's 4x4-block into the destination surface */
      {
	/* assume seamless tiling: wrap pixels around */
	const int posx = (x + lx) % owidth;
	const int posy = (y + ly) % oheight;

	/* convert unaligned output location to "int"-space output location */
	const int linear = ((posy * owidth) + posx) * 1;
	const int lposx = (linear << 0) % (cwidth << 0);
	const int lposy = (linear << 0) / (cwidth << 0);

	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#if	(TCOMPRESS_CHANNELS(format) <= 4)
	/* ABGR -> ARGB */
	if (sizeof(UTYPE) == 4) {
	  /* every single thread */
	  {
	    int t0 = bTex[0][ly][lx + 0];

	    /* write combining */
	    unsigned int val = (ULONG)t0;

	    /* write out all of an "int" */
	    dArr(lposy, lposx) = val;
	  }
	}
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	(TCOMPRESS_CHANNELS(format) <= 3)
	/* -BGR -> -RGB */
	if (sizeof(UTYPE) == 2) {
	  /* every second thread */
	  if (!(elm.local[1] & 1)) {
	    int t0 = bTex[0][ly][lx + 0];
	    int t1 = bTex[0][ly][lx + 1];

	    /* write combining */
	    unsigned int val = (ULONG)((t1 << 16) + (t0 << 0));

	    /* write out all of an "int" */
	    dArr(lposy, lposx) = val;
	  }
	}
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	(TCOMPRESS_CHANNELS(format) <= 2)
	/* --YX -> XY-- */
	/* LA-- -> AL-- */
	if (sizeof(UTYPE) == 1) {
	  /* every fourth thread */
	  if (!(elm.local[1] & 3)) {
	    int t0 = bTex[0][ly][lx + 0];
	    int t1 = bTex[0][ly][lx + 1];
	    int t2 = bTex[0][ly][lx + 2];
	    int t3 = bTex[0][ly][lx + 3];

	    /* write combining */
	    unsigned int val = (ULONG)((t3 << 24) + (t2 << 16) + (t1 << 8) + (t0 << 0));

	    /* write out all of an "int" */
	    dArr(lposy, lposx) = val;
	  }
	}
#else
#error
#endif
      }

//    dTex += 0;
#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
    });

    dArr.synchronize();
#else
    }}
示例#3
0
static bool TextureCompressQDM(LPDIRECT3DTEXTURE *base, LPDIRECT3DTEXTURE *norm, int minlevel) {
  LPDIRECT3DTEXTURE baset;
  LPDIRECT3DTEXTURE normt;
  RESOURCEINFO based, baseo;
  RESOURCEINFO normd, normo;

  TextureInfoLevel(*base, baseo, 0);
  TextureInfoLevel(*norm, normo, 0);

#if 0
  /* Converts a height map into a normal map. The (x,y,z)
   * components of each normal are mapped to the (r,g,b)
   * channels of the output texture.
   */
  HRESULT D3DXComputeNormalMap(
    __out  LPDIRECT3DTEXTURE pTexture,
    __in   LPDIRECT3DTEXTURE pSrcTexture,
    __in   const PALETTEENTRY *pSrcPalette,
    __in   DWORD Flags,
    __in   DWORD Channel,
    __in   FLOAT Amplitude
    );
#endif

  /* they have to have the same dimension */
  if ((baseo.Width  != normo.Width ) ||
      (baseo.Height != normo.Height))
    return false;

  /* convert to ARGB8 (TODO: support at least the 16bit formats as well) */
  if ((baseo.Format != TEXFMT_A8B8G8R8) && baseo.Format = TEXFMT_A8R8G8B8, !TextureConvert(baseo, base, false))
    return false;
  if ((normo.Format != TEXFMT_A8B8G8R8) && normo.Format = TEXFMT_A8R8G8B8, !TextureConvert(normo, norm, true))
    return false;

  /* create the textures */
  int levels = TextureCalcMip(baseo.Width, baseo.Height, minlevel);
  int flags = squish::kColourIterativeClusterFit | squish::kBtc3;

#ifdef DX11
  ULONG *bases;
  ULONG *norms;

  DWORD basel = 1;
  DWORD norml = 1;
  DWORD level = max(basel, norml);
#else
  /* create the textures */
  pD3DDevice->CreateTexture(baseo.Width, baseo.Height, levels, 0, D3DFMT_DXT5, D3DPOOL_SYSTEMMEM, &baset, NULL);
  pD3DDevice->CreateTexture(normo.Width, normo.Height, levels, 0, D3DFMT_DXT5, D3DPOOL_SYSTEMMEM, &normt, NULL);

  /* damit */
  if (!baset || !normt) {
    if (baset) baset->Release();
    if (normt) normt->Release();

    return false;
  }

  ULONG bPch, *bases = TextureLock(*base, &bPch, 0);
  ULONG nPch, *norms = TextureLock(*norm, &nPch, 0);

  DWORD basel = baset->GetLevelCount();
  DWORD norml = normt->GetLevelCount();
  DWORD level = max(basel, norml);
#endif

  for (unsigned int l = 0; l < level; l++) {
    /* square dimension of this surface-level */
    /* square area of this surface-level */
    int lv = (1 << l);
    int av = lv * lv;

    TextureInfoLevel(baset, based, l);
    TextureInfoLevel(normt, normd, l);

    ULONG sPch, *baser = TextureLock(baset, l, &sPch, true);
    ULONG nPch, *normr = TextureLock(normt, l, &nPch, true);

    ULONG *sBase = (ULONG *)bases;
    ULONG *sNorm = (ULONG *)norms;
    ULONG *dBase = (ULONG *)baser;
    ULONG *dNorm = (ULONG *)normr;

    /* loop over 4x4-blocks of this level (DXT5) */
    for (unsigned int y = 0; y < based.Height; y += TY) {
      if (!(y & 0x3F)) {
//	logrf("line processed %d/%d of level %d/%d\r", y, based.Height, l, level);

//	PollProgress();
      }

    for (unsigned int x = 0; x < based.Width; x += TX) {
      UTYPE bBase[2][TY][TX];
      ULONG bNorm[2][TY][TX];
      type  fBase[2][TY][TX][DIM];
      float fNorm[2][TY][TX][DIM];

      /* generate this level's 4x4-block from the original surface */
      for (int ly = 0; ly < TY; ly += 1)
      for (int lx = 0; lx < TX; lx += 1) {
	type  bs[DIM] = {0}; int yl = ((y + ly) << l);
	/*ng  ns[DIM] = {0*/ int xl = ((x + lx) << l);
	float nn[DIM] = {0.0f};

	/* access all pixels this level's 4x4-block represents in
	 * the full dimension original surface (high quality mip-mapping)
	 */
	for (int oy = 0; oy < lv; oy += 1)
	for (int ox = 0; ox < lv; ox += 1) {
	  /* assume seamless tiling: wrap pixels around */
	  int posx = (xl + ox) % baseo.Width;
	  int posy = (yl + oy) % baseo.Height;

	  ULONG &b = sBase[(posy * sPch) + posx];
	  ULONG &n = sNorm[(posy * nPch) + posx];

	  /* transfer heightmap into the normal-map (overwrite) */
	  if (LODed)
	    n = (n & 0x00FFFFFF) + (b & 0xFF000000);

	  { static const f<TCOMPRESS_RGBH> fmt; Accu(bs, b); }
	  { static const f<TCOMPRESS_XYZD> fmt; Accu(nn, n); }

//	  AccuRGBM<ACCUMODE_LINEAR>(bs, b, level, l, colorgamma);	// += and max
#if	defined(NORMALS_INTEGER)
//	  AccuXYZD<ACCUMODE_SCALE >(ns, n, level, l, NORMALS_SCALEBYLEVEL);	// +=
#else
//	  AccuXYZD<ACCUMODE_SCALE >(nn, n, level, l, NORMALS_SCALEBYLEVEL);	// +=
#endif
	}

	/* build average of each channel */
	{ const int format = TCOMPRESS_RGBH; Norm(fBase[0][ly][lx], bs, av, levels, l); }
	{ const int format = TCOMPRESS_XYZD; Norm(fNorm[0][ly][lx], nn, av, levels, l); }

//	NormRGBM<TRGTMODE_CODING_RGB                         >(fBase[0][ly][lx], bs, av, colorgammainv);
#if	defined(NORMALS_INTEGER)
//	NormXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], ns, av);
#else
//	NormXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], nn, av);
#endif
      }

      type  br[DIM] = {0};
      /*ng  nr[DIM] = {0*/
      float rn[DIM] = {0.0f};

      /* analyze this level's 4x4-block */
      for (int ly = 0; ly < TY; ly += 1)
      for (int lx = 0; lx < TX; lx += 1) {
	{ const int format = TCOMPRESS_RGBH; Look(fBase[0][ly][lx], br); }
	{ const int format = TCOMPRESS_XYZD; Look(fNorm[0][ly][lx], rn); }

//	LookRGBH<TRGTMODE_CODING_RGB                         >(fBase[0][ly][lx], br);
#if	defined(NORMALS_INTEGER)
//	LookXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], nr);
#else
//	LookXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], rn);
#endif
      }

      /* generate this level's 4x4-block from the original surface */
      for (int ly = 0; ly < TY; ly += 1)
      for (int lx = 0; lx < TX; lx += 1) {
	/* build average of each channel an join */
	UTYPE b;
	ULONG n;

	{ const int format = TCOMPRESS_RGBH;
	Code(fBase[0][ly][lx], br, (TCOMPRESS_CHANNELS(format) +
				   (TCOMPRESS_GREYS   (format) ? 2 : 0)) == 2 ? 8 :
				   (TCOMPRESS_SWIZZL  (format) ? 6 : 5)); }
	{ const int format = TCOMPRESS_XYZD;
	Code(fNorm[0][ly][lx], rn, (TCOMPRESS_CHANNELS(format) +
				   (TCOMPRESS_GREYS   (format) ? 2 : 0)) == 2 ? 8 :
				   (TCOMPRESS_SWIZZL  (format) ? 6 : 5)); }
	{ const int format = TCOMPRESS_RGBH; b = Join(fBase[0][ly][lx], br); }
	{ const int format = TCOMPRESS_XYZD; n = Join(fNorm[0][ly][lx], rn); }

//	CodeRGBH<TRGTMODE_CODING_RGB                                                           >(fBase[0][ly][lx], br);
#if	defined(NORMALS_INTEGER)
//	CodeXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE, TCOMPRESS_SWIZZL(format) ? 6 : 5>(fNorm[0][ly][lx], nr);
#else
//	CodeXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE, TCOMPRESS_SWIZZL(format) ? 6 : 5>(fNorm[0][ly][lx], rn);
#endif

//	b = JoinRGBH<TRGTMODE_CODING_RGB                         >(fBase[0][ly][lx], br);
#if	defined(NORMALS_INTEGER)
//	n = JoinXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], nr);
#else
//	n = JoinXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], rn);
#endif

	/* write the result ABGR */
	bBase[0][ly][lx] = b;
	bNorm[0][ly][lx] = n;
      }

      /* compress to DXT5 */
#if 0
      stb_compress_dxt_block((unsigned char *)dBase, (unsigned char *)bBase[0], true, STB_DXT_DITHER | STB_DXT_HIGHQUAL);
      stb_compress_dxt_block((unsigned char *)dNorm, (unsigned char *)bNorm[0], true, STB_DXT_NORMAL | STB_DXT_HIGHQUAL);
#else
      squish::Compress((unsigned char *)bBase[0], dBase, flags + squish::kColourMetricPerceptual);
      squish::Compress((unsigned char *)bNorm[0], dNorm, flags + squish::kColourMetricUniform);
#endif

      /* advance pointer of compressed blocks */
      dBase += (128 / 32);
      dNorm += (128 / 32);

#if 0
      for (int ly = 0; ly < TY; ly += 1)
      for (int lx = 0; lx < TX; lx += 1) {
	dBase[((y + ly) * bPch) + (x + lx)] = bBase[0][ly][lx];
	dNorm[((y + ly) * nPch) + (x + lx)] = bNorm[0][ly][lx];
      }
#endif
    }
    }

    TextureUnlock(baset, l);
    TextureUnlock(normt, l);
  }

  TextureUnlock((*base), 0);
  TextureUnlock((*norm), 0);

  (*base)->Release();
  (*norm)->Release();

  (*base) = baset;
  (*norm) = normt;

  return true;
}
示例#4
0
  void ff(TextureCompressDXT,format,coding,fiting)(RESOURCEINFO &texo, RESOURCEINFO &texd, ULONG *texs, ULONG *texr, int level, int l, int blocksize, int flags) {
    /* square dimension of this surface-level */
    /* square area of this surface-level */
    const int lv = (1 << l);
    const int av = lv * lv;

    /* ------------------------------------------------------------------------------------------------------- */
    const int NORMALS_SCALEBYLEVEL = ::NORMALS_SCALEBYLEVEL;
    const int  ALPHAS_SCALEBYLEVEL =  ::ALPHAS_SCALEBYLEVEL;
    const float colorgamma       = ::colorgamma;
    const float alphacontrast    = ::alphacontrast;
    const float colorgammainv    = ::colorgammainv;
    const float alphacontrastinv = ::alphacontrastinv;

    int iwidth  = texo.Width;
    int iheight = texo.Height;
    int owidth  = texd.Width;
    int oheight = texd.Height;
    int cwidth  = owidth;
    int cheight = oheight;

    /* get the data back to the CPU */
    cheight = (cheight + 3) / 4;	/* 4x4 LONG ... */
    cwidth  = (cwidth  + 3) / 4;	/* 4x4 LONG ... */
    cwidth *= 2 * blocksize;		/* ... to 2x|4x LONG */

    /* ensure tile ability (bit on overhead for non-4 resolutions) */
    owidth  = (owidth  + (TX - 1)) & (~(TX - 1));
    oheight = (oheight + (TY - 1)) & (~(TY - 1));

    assert((owidth  & (TX - 1)) == 0);
    assert((oheight & (TY - 1)) == 0);

#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
    /* constant buffer array */
    Concurrency::array_view<const SingleColourLookup_CCR, 2> lArr(2, 256, (const SingleColourLookup_CCR *)::lookup_34_56_ccr);
    Concurrency::array_view<const   IndexBlockLookup_CCR, 2> yArr(4, 8,   (const   IndexBlockLookup_CCR *)::lookup_c34a57_ccr);

    /* get a two-dimensional extend over the whole output (without re-cast to LONG),
     * then get a tile-extend over that one ()
     */
    Concurrency::extent<2> ee(oheight, owidth);
    Concurrency::tiled_extent<TY, TX> te(ee);

    Concurrency::array_view<const unsigned int, 2> sArr(iheight, iwidth, (const unsigned int *)texs);
    Concurrency::array_view<      unsigned int, 2> dArr(cheight, cwidth, (      unsigned int *)texr);

    Concurrency::parallel_for_each(te /*dArr.extent.tile<TY, TX>(osize)*/, [=](tiled_index<TY, TX> elm) restrict(amp) {
      typedef type accu[DIM];

      /* tile static memory */
//    tile_static UTYPE bTex[2][TY*TX];
      tile_static type  fTex[2][TY*TX][DIM];
      tile_static int   iTex[2][TY*TX][DIM];

      /* generate this level's 4x4-block from the original surface */
//    const int y = elm.global[0] - ly;
//    const int x = elm.global[1] - lx;
      const int y = elm.tile[0] * TY;
      const int x = elm.tile[1] * TX;
      const int ly = elm.local[0];
      const int lx = elm.local[1];
      const int lxy = ly * TX + lx;
#else
    Concurrency::array_view<const SingleColourLookup_CCR, 2> lArr(2, 256, (const SingleColourLookup_CCR *)::lookup_34_56_ccr);

    Concurrency::array_view<const unsigned int, 2> sArr(iheight, iwidth, (const unsigned int *)texs);
    Concurrency::array_view<      unsigned int, 2> dArr(cheight, cwidth, (      unsigned int *)texr, true);

    for (int groupsy = 0; groupsy < (owidth  / TY); groupsy++)
    for (int groupsx = 0; groupsx < (oheight / TX); groupsx++) {
      typedef type accu[DIM];

      /* tile static memory */
//    UTYPE bTex[2][TY*TX];
      type  fTex[2][TY*TX][DIM];
      int   iTex[2][TY*TX][DIM];

      for (int tiley = 0; tiley < TY; tiley++)
      for (int tilex = 0; tilex < TX; tilex++)
      {
	const int y = groupsy * TY;
	const int x = groupsx * TX;
	const int ly = tiley;
	const int lx = tilex;
	const int lxy = ly * TX + lx;
#endif

      {
	const int yl = ((y + ly) << l);
	const int xl = ((x + lx) << l);

	accu tt = {0};

	/* access all pixels this level's 4x4-block represents in
	 * the full dimension original surface (high quality mip-mapping)
	 */
	for (int oy = 0; oy < lv; oy += 1) {
	for (int ox = 0; ox < lv; ox += 1) {
	  /* assume seamless tiling: wrap pixels around */
	  const int posx = (xl + ox) % iwidth;
	  const int posy = (yl + oy) % iheight;

	  const ULONG &t = sArr(posy, posx);

	  Accu(tt, t);	// +=
	}
	}

	/* build average of each channel */
	Norm(fTex[0][lxy], tt, av, level, l);
      }

#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
      tile_static accu tr; tr[lxy & 7] = 0;

      tile_static_memory_fence(elm.barrier);
//    elm.barrier.wait_with_tile_static_memory_fence();
#else
      }

      accu tr = {0};
#endif

      /* runs on only 1 thread per tile (reduction) */
#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
      if (elm.local == index<2>(0, 0))
#endif
      {
	/* analyze this level's 4x4-block */
	for (int lxy = 0; lxy < TY*TX; lxy += 1) {
	  Look(fTex[0][lxy], tr);
	}
      }

#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
      tile_static_memory_fence(elm.barrier);
//    elm.barrier.wait_with_tile_static_memory_fence();
#else
      for (int tiley = 0; tiley < TY; tiley++)
      for (int tilex = 0; tilex < TX; tilex++)
      {
	const int y = groupsy;
	const int x = groupsx;
	const int ly = tiley;
	const int lx = tilex;
	const int lxy = ly * TX + lx;
#endif

      /* generate this level's 4x4-block from the original surface */
      {
	/* build average of each channel an join */
	Code (fTex[0][lxy], tr,
	  (TCOMPRESS_CHANNELS(format) +
	  (TCOMPRESS_GREYS   (format) ? 2 : 0)) == 2 ? 8 :
	  (TCOMPRESS_SWIZZL  (format) ? 6 : 5));
	Range(iTex[0][lxy],
	      fTex[0][lxy]);

#if	(TCOMPRESS_SWIZZL(format))
	/* swizzle ABGR -> AGBR */
        {
	  int swap =        iTex[0][lxy][1];
	  iTex[0][lxy][1] = iTex[0][lxy][2];
	  iTex[0][lxy][2] = swap           ;
	}
#endif

	/* write the result */

	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#if	((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 4)
	/* ABGR -> RGBA */
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 3)
	/* -BGR -> RGB- */
	iTex[0][lxy][0] = 0xFF;
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 2)
	/* --YX -> XY-- */
	/* AL-- -> LA-- */
#if	(format == TCOMPRESS_XYz)
	iTex[0][lxy][0] = iTex[0][lxy][2],  // Y
	iTex[1][lxy][0] = iTex[0][lxy][3];  // X
#else
	iTex[0][lxy][0] = iTex[0][lxy][0],  // A
	iTex[1][lxy][0] = iTex[0][lxy][1];  // Z
#endif
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 1)
	  /* -Z-- -> Z--- */
	  /* A--- -> A--- */
	  /* -LLL -> L--- */
#if	(format == TCOMPRESS_a  )
	iTex[0][lxy][0] = iTex[0][lxy][0];  // A
#elif	(format == TCOMPRESS_A  )
	iTex[0][lxy][0] = iTex[0][lxy][0];  // A
#elif	(format == TCOMPRESS_xyZ)
	iTex[0][lxy][0] = iTex[0][lxy][1];  // Z
#else
	iTex[0][lxy][0] = iTex[0][lxy][3];  // X
#endif
#else
#error
#endif
      }

#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
      tile_static_memory_fence(elm.barrier);
//    elm.barrier.wait_with_tile_static_memory_fence();

#define local_is(a,b) elm.local == index<2>(a, b)
#else
      }

      for (int tiley = 0; tiley < TY; tiley++)
      for (int tilex = 0; tilex < TX; tilex++)
      {
	const int y = groupsy;
	const int x = groupsx;
	const int ly = tiley;
	const int lx = tilex;
	const int lxy = ly * TX + lx;

#define local_is(a,b) ((ly == a) && (lx == b))
#endif

      /* put this level's 4x4-block into the destination surface */
      {
	/* round down */
	int posx = (x + lx) >> 2;
	int posy = (y + ly) >> 2;

	/* first and second block */
	unsigned int b[2][2];

        /* compress to DXT1/DXT3/DXT5/ATI1/ATI2 */
#define	sflgs	TCOMPRESS_COLOR(format) ? SQUISH_METRIC_PERCEPTUAL : SQUISH_METRIC_UNIFORM,	\
		TCOMPRESS_TRANS(format),							\
		                fiting

	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#if	((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 4) ||		\
	((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 3)
	/* 1x LONG per block for DXT1, 2x for the others */

#if	(coding == 1)
	{ posx <<= 0;
	  squish::CompressColorBtc1(elm.barrier, lxy, iTex[0], 0xFFFF, b[1], sflgs, yArr, lArr);

	  dArr(posy, posx + 0) = b[1][0];
	  dArr(posy, posx + 1) = b[1][1];
	}
#elif	(coding == 2)
	{ posx <<= 1;
	  squish::CompressAlphaBtc2(elm.barrier, lxy, iTex[0], 0xFFFF, b[0]       , yArr      );
	  squish::CompressColorBtc2(elm.barrier, lxy, iTex[0], 0xFFFF, b[1], sflgs, yArr, lArr);

	  dArr(posy, posx + 0) = b[0][0];
	  dArr(posy, posx + 1) = b[0][1];
	  dArr(posy, posx + 2) = b[1][0];
	  dArr(posy, posx + 3) = b[1][1];
	}
#elif	(coding == 3)
	{ posx <<= 1;
	  squish::CompressAlphaBtc3(elm.barrier, lxy, iTex[0], 0xFFFF, b[0]       , yArr      );
	  squish::CompressColorBtc3(elm.barrier, lxy, iTex[0], 0xFFFF, b[1], sflgs, yArr, lArr);

	  dArr(posy, posx + 0) = b[0][0];
	  dArr(posy, posx + 1) = b[0][1];
	  dArr(posy, posx + 2) = b[1][0];
	  dArr(posy, posx + 3) = b[1][1];
	}
#else
#error
#endif
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 1)
	/* 1x LONG for ATI1 */

#if	(coding == 4)
	{ posx <<= 0;
	  squish::CompressAlphaBtc3(elm.barrier, lxy, iTex[0], 0xFFFF, b[0]       , yArr      );

	  dArr(posy, posx + 0) = b[0][0];
	  dArr(posy, posx + 1) = b[0][1];
	}
#else
#error
#endif
	/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
#elif	((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 2)
	/* 2x LONG for ATI2 */

#if	(coding == 5)
	{ posx <<= 1;
	  squish::CompressAlphaBtc3(elm.barrier, lxy, iTex[0], 0xFFFF, b[0]       , yArr      );
	  squish::CompressAlphaBtc3(elm.barrier, lxy, iTex[1], 0xFFFF, b[1]       , yArr      );

	  dArr(posy, posx + 0) = b[0][0];
	  dArr(posy, posx + 1) = b[0][1];
	  dArr(posy, posx + 2) = b[1][0];
	  dArr(posy, posx + 3) = b[1][1];
	}
#else
#error
#endif
#else
#error
#endif

#undef	sflgs

//	elm.barrier.wait();

        /* advance pointer of compressed blocks */
//      wTex += blocksize;
//      dTex += blocksize;
      }

#if	defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG)
//    elm.barrier.wait();

//    dTex += 0;
    });

    dArr.synchronize();
#else
    }}