int AlgorithmHough::DoYourJob(Magick::Image& image) { BW(image); std::cout<<"BW"<<std::endl; Accu(image); std::cout<<"Ac"<<std::endl; Blur(Accu.Accumulator); std::cout<<"Bl"<<std::endl; HoughResult max=Accu.Maximum(); std::cout<<"Max"<<std::endl; Magick::Image result(image.size(), Magick::Color("white")); result.modifyImage(); Magick::Pixels pixelCache(result); Magick::PixelPacket* pixels=pixelCache.set(0,0,image.columns(),image.rows()); for(int x=0; x<image.columns(); x++) for(int y=0; y<image.rows(); y++) { double fi_f=max.Fi-90; double r_=double(x)*cos(fi_f*3.14/180.0)+double(y)*sin(fi_f*3.14/180.0); int pixelIndex=x+y*image.columns(); if(r_<0) //jak wyżej continue; if(round(r_)==max.R) pixels[pixelIndex].red=pixels[pixelIndex].blue=pixels[pixelIndex].green=0; else pixels[pixelIndex].red=pixels[pixelIndex].blue=pixels[pixelIndex].green=(1<<QuantumDepth)-1; } pixelCache.sync(); image=result; std::cout<<"R: "<<max.R<<" FI: "<<max.Fi<<" Value: "<<max.Value<<std::endl; return 0; }
void ff(TextureQuantizeRAW,format,A)(RESOURCEINFO &texo, RESOURCEINFO &texd, ULONG *texs, ULONG *texr, int level, int l) { /* square dimension of this surface-level */ /* square area of this surface-level */ const int lv = (1 << l); const int av = lv * lv; /* ------------------------------------------------------------------------------------------------------- */ const int NORMALS_SCALEBYLEVEL = ::NORMALS_SCALEBYLEVEL; const int ALPHAS_SCALEBYLEVEL = ::ALPHAS_SCALEBYLEVEL; const float colorgamma = ::colorgamma; const float alphacontrast = ::alphacontrast; const float colorgammainv = ::colorgammainv; const float alphacontrastinv = ::alphacontrastinv; int iwidth = texo.Width; int iheight = texo.Height; int owidth = texd.Width; int oheight = texd.Height; int cwidth = owidth; int cheight = oheight; /* get the data back to the CPU */ #if (TCOMPRESS_CHANNELS(format) == 4) /* ABGR -> ARGB */ cwidth = (cwidth + 0) >> 0; /* 1x LONG to 1x LONG */ #elif (TCOMPRESS_CHANNELS(format) == 3) /* -BGR -> -RGB */ cwidth = (cwidth + 1) >> 1; /* 1x LONG to 1x SHORT */ #elif (TCOMPRESS_CHANNELS(format) == 2) /* LA-- -> AL-- */ cwidth = (cwidth + 3) >> 2; /* 1x LONG to 1x CHAR */ #elif (TCOMPRESS_CHANNELS(format) == 1) /* A--- -> A--- */ cwidth = (cwidth + 31) >> 5; /* 8x LONG to 1x CHAR */ #else #error #endif /* ensure tile ability (bit on overhead for non-4 resolutions) */ owidth = (owidth + (TX - 1)) & (~(TX - 1)); oheight = (oheight + (TY - 1)) & (~(TY - 1)); assert((owidth & (TX - 1)) == 0); assert((oheight & (TY - 1)) == 0); #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) /* get a two-dimensional extend over the whole output (without re-cast to LONG), * then get a tile-extend over that one () */ Concurrency::extent<2> ee(oheight, owidth); Concurrency::tiled_extent<TY, TX> te(ee); Concurrency::array_view<const unsigned int, 2> sArr(iheight, iwidth, (const unsigned int *)texs); Concurrency::array_view< unsigned int, 2> dArr(cheight, cwidth, ( unsigned int *)texr); Concurrency::parallel_for_each(te /*dArr.extent.tile<TY, TX>(osize)*/, [=](tiled_index<TY, TX> elm) restrict(amp) { typedef type accu[DIM]; /* tile static memory */ // tile_static UTYPE bTex[2][TY][TX]; tile_static int bTex[2][TY][TX]; tile_static type fTex[2][TY][TX][DIM]; // const int y = elm.global[0] - ly; // const int x = elm.global[1] - lx; const int y = elm.tile[0] * TY; const int x = elm.tile[1] * TX; const int ly = elm.local[0]; const int lx = elm.local[1]; #else array_view<const unsigned int, 2> sArr(iheight, iwidth, (const unsigned int *)texs); array_view< unsigned int, 2> dArr(cheight, cwidth, ( unsigned int *)texr, true); for (int groupsy = 0; groupsy < (owidth / TY); groupsy++) for (int groupsx = 0; groupsx < (oheight / TX); groupsx++) { typedef type accu[DIM]; /* tile static memory */ // UTYPE bTex[2][TY][TX]; int bTex[2][TY][TX]; type fTex[2][TY][TX][DIM]; for (int tiley = 0; tiley < TY; tiley++) for (int tilex = 0; tilex < TX; tilex++) { const int y = groupsy * TY; const int x = groupsx * TX; const int ly = tiley; const int lx = tilex; #endif /* generate this level's 4x4-block from the original surface */ { const int yl = ((y + ly) << l); const int xl = ((x + lx) << l); accu tt; tt[0] = tt[1] = tt[2] = tt[3] = tt[4] = tt[5] = tt[6] = tt[7] = 0; /* access all pixels this level's 4x4-block represents in * the full dimension original surface (high quality mip-mapping) */ for (int oy = 0; oy < lv; oy += 1) for (int ox = 0; ox < lv; ox += 1) { /* assume seamless tiling: wrap pixels around */ const int posx = (xl + ox) % iwidth; const int posy = (yl + oy) % iheight; const ULONG &t = sArr(posy, posx); Accu(tt, t); // += } /* build average of each channel */ Norm(fTex[0][ly][lx], tt, av, level, l); } #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) tile_static accu tr; tr[(ly * TX + lx) & 7] = 0; tile_static_memory_fence(elm.barrier); // elm.barrier.wait_with_tile_static_memory_fence(); #else } accu tr = {0}; #endif /* runs on only 1 thread per tile (reduction) */ #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) if (elm.local == index<2>(0, 0)) #endif { /* analyze this level's 4x4-block */ for (int ly = 0; ly < TY; ly += 1) for (int lx = 0; lx < TX; lx += 1) { Look(fTex[0][ly][lx], tr); } } #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) tile_static_memory_fence(elm.barrier); // elm.barrier.wait_with_tile_static_memory_fence(); #else for (int tiley = 0; tiley < TY; tiley++) for (int tilex = 0; tilex < TX; tilex++) { const int y = groupsy; const int x = groupsx; const int ly = tiley; const int lx = tilex; #endif /* generate this level's 4x4-block from the original surface */ { /* build average of each channel an join */ ULONG t; Code(fTex[0][ly][lx], tr, (A > 2 ? 4 : (A > 1 ? 10 : (A > 0 ? 5 : 6)))); t = Qunt(fTex[0][ly][lx], tr, (A > 2 ? 4 : (A > 1 ? 10 : (A > 0 ? 5 : 6)))); /* write the result */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #if (TCOMPRESS_CHANNELS(format) == 4) /* ABGR -> RGBA */ bTex[0][ly][lx] = t; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif (TCOMPRESS_CHANNELS(format) == 3) /* -BGR -> RGB- */ bTex[0][ly][lx] = t; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif (TCOMPRESS_CHANNELS(format) == 2) /* AL-- -> LA-- */ bTex[0][ly][lx] = t; #else #error #endif } /* put this level's 4x4-block into the destination surface */ { /* assume seamless tiling: wrap pixels around */ const int posx = (x + lx) % owidth; const int posy = (y + ly) % oheight; /* convert unaligned output location to "int"-space output location */ const int linear = ((posy * owidth) + posx) * 1; const int lposx = (linear << 0) % (cwidth << 0); const int lposy = (linear << 0) / (cwidth << 0); /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #if (TCOMPRESS_CHANNELS(format) <= 4) /* ABGR -> ARGB */ if (sizeof(UTYPE) == 4) { /* every single thread */ { int t0 = bTex[0][ly][lx + 0]; /* write combining */ unsigned int val = (ULONG)t0; /* write out all of an "int" */ dArr(lposy, lposx) = val; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif (TCOMPRESS_CHANNELS(format) <= 3) /* -BGR -> -RGB */ if (sizeof(UTYPE) == 2) { /* every second thread */ if (!(elm.local[1] & 1)) { int t0 = bTex[0][ly][lx + 0]; int t1 = bTex[0][ly][lx + 1]; /* write combining */ unsigned int val = (ULONG)((t1 << 16) + (t0 << 0)); /* write out all of an "int" */ dArr(lposy, lposx) = val; } } /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif (TCOMPRESS_CHANNELS(format) <= 2) /* --YX -> XY-- */ /* LA-- -> AL-- */ if (sizeof(UTYPE) == 1) { /* every fourth thread */ if (!(elm.local[1] & 3)) { int t0 = bTex[0][ly][lx + 0]; int t1 = bTex[0][ly][lx + 1]; int t2 = bTex[0][ly][lx + 2]; int t3 = bTex[0][ly][lx + 3]; /* write combining */ unsigned int val = (ULONG)((t3 << 24) + (t2 << 16) + (t1 << 8) + (t0 << 0)); /* write out all of an "int" */ dArr(lposy, lposx) = val; } } #else #error #endif } // dTex += 0; #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) }); dArr.synchronize(); #else }}
static bool TextureCompressQDM(LPDIRECT3DTEXTURE *base, LPDIRECT3DTEXTURE *norm, int minlevel) { LPDIRECT3DTEXTURE baset; LPDIRECT3DTEXTURE normt; RESOURCEINFO based, baseo; RESOURCEINFO normd, normo; TextureInfoLevel(*base, baseo, 0); TextureInfoLevel(*norm, normo, 0); #if 0 /* Converts a height map into a normal map. The (x,y,z) * components of each normal are mapped to the (r,g,b) * channels of the output texture. */ HRESULT D3DXComputeNormalMap( __out LPDIRECT3DTEXTURE pTexture, __in LPDIRECT3DTEXTURE pSrcTexture, __in const PALETTEENTRY *pSrcPalette, __in DWORD Flags, __in DWORD Channel, __in FLOAT Amplitude ); #endif /* they have to have the same dimension */ if ((baseo.Width != normo.Width ) || (baseo.Height != normo.Height)) return false; /* convert to ARGB8 (TODO: support at least the 16bit formats as well) */ if ((baseo.Format != TEXFMT_A8B8G8R8) && baseo.Format = TEXFMT_A8R8G8B8, !TextureConvert(baseo, base, false)) return false; if ((normo.Format != TEXFMT_A8B8G8R8) && normo.Format = TEXFMT_A8R8G8B8, !TextureConvert(normo, norm, true)) return false; /* create the textures */ int levels = TextureCalcMip(baseo.Width, baseo.Height, minlevel); int flags = squish::kColourIterativeClusterFit | squish::kBtc3; #ifdef DX11 ULONG *bases; ULONG *norms; DWORD basel = 1; DWORD norml = 1; DWORD level = max(basel, norml); #else /* create the textures */ pD3DDevice->CreateTexture(baseo.Width, baseo.Height, levels, 0, D3DFMT_DXT5, D3DPOOL_SYSTEMMEM, &baset, NULL); pD3DDevice->CreateTexture(normo.Width, normo.Height, levels, 0, D3DFMT_DXT5, D3DPOOL_SYSTEMMEM, &normt, NULL); /* damit */ if (!baset || !normt) { if (baset) baset->Release(); if (normt) normt->Release(); return false; } ULONG bPch, *bases = TextureLock(*base, &bPch, 0); ULONG nPch, *norms = TextureLock(*norm, &nPch, 0); DWORD basel = baset->GetLevelCount(); DWORD norml = normt->GetLevelCount(); DWORD level = max(basel, norml); #endif for (unsigned int l = 0; l < level; l++) { /* square dimension of this surface-level */ /* square area of this surface-level */ int lv = (1 << l); int av = lv * lv; TextureInfoLevel(baset, based, l); TextureInfoLevel(normt, normd, l); ULONG sPch, *baser = TextureLock(baset, l, &sPch, true); ULONG nPch, *normr = TextureLock(normt, l, &nPch, true); ULONG *sBase = (ULONG *)bases; ULONG *sNorm = (ULONG *)norms; ULONG *dBase = (ULONG *)baser; ULONG *dNorm = (ULONG *)normr; /* loop over 4x4-blocks of this level (DXT5) */ for (unsigned int y = 0; y < based.Height; y += TY) { if (!(y & 0x3F)) { // logrf("line processed %d/%d of level %d/%d\r", y, based.Height, l, level); // PollProgress(); } for (unsigned int x = 0; x < based.Width; x += TX) { UTYPE bBase[2][TY][TX]; ULONG bNorm[2][TY][TX]; type fBase[2][TY][TX][DIM]; float fNorm[2][TY][TX][DIM]; /* generate this level's 4x4-block from the original surface */ for (int ly = 0; ly < TY; ly += 1) for (int lx = 0; lx < TX; lx += 1) { type bs[DIM] = {0}; int yl = ((y + ly) << l); /*ng ns[DIM] = {0*/ int xl = ((x + lx) << l); float nn[DIM] = {0.0f}; /* access all pixels this level's 4x4-block represents in * the full dimension original surface (high quality mip-mapping) */ for (int oy = 0; oy < lv; oy += 1) for (int ox = 0; ox < lv; ox += 1) { /* assume seamless tiling: wrap pixels around */ int posx = (xl + ox) % baseo.Width; int posy = (yl + oy) % baseo.Height; ULONG &b = sBase[(posy * sPch) + posx]; ULONG &n = sNorm[(posy * nPch) + posx]; /* transfer heightmap into the normal-map (overwrite) */ if (LODed) n = (n & 0x00FFFFFF) + (b & 0xFF000000); { static const f<TCOMPRESS_RGBH> fmt; Accu(bs, b); } { static const f<TCOMPRESS_XYZD> fmt; Accu(nn, n); } // AccuRGBM<ACCUMODE_LINEAR>(bs, b, level, l, colorgamma); // += and max #if defined(NORMALS_INTEGER) // AccuXYZD<ACCUMODE_SCALE >(ns, n, level, l, NORMALS_SCALEBYLEVEL); // += #else // AccuXYZD<ACCUMODE_SCALE >(nn, n, level, l, NORMALS_SCALEBYLEVEL); // += #endif } /* build average of each channel */ { const int format = TCOMPRESS_RGBH; Norm(fBase[0][ly][lx], bs, av, levels, l); } { const int format = TCOMPRESS_XYZD; Norm(fNorm[0][ly][lx], nn, av, levels, l); } // NormRGBM<TRGTMODE_CODING_RGB >(fBase[0][ly][lx], bs, av, colorgammainv); #if defined(NORMALS_INTEGER) // NormXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], ns, av); #else // NormXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], nn, av); #endif } type br[DIM] = {0}; /*ng nr[DIM] = {0*/ float rn[DIM] = {0.0f}; /* analyze this level's 4x4-block */ for (int ly = 0; ly < TY; ly += 1) for (int lx = 0; lx < TX; lx += 1) { { const int format = TCOMPRESS_RGBH; Look(fBase[0][ly][lx], br); } { const int format = TCOMPRESS_XYZD; Look(fNorm[0][ly][lx], rn); } // LookRGBH<TRGTMODE_CODING_RGB >(fBase[0][ly][lx], br); #if defined(NORMALS_INTEGER) // LookXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], nr); #else // LookXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], rn); #endif } /* generate this level's 4x4-block from the original surface */ for (int ly = 0; ly < TY; ly += 1) for (int lx = 0; lx < TX; lx += 1) { /* build average of each channel an join */ UTYPE b; ULONG n; { const int format = TCOMPRESS_RGBH; Code(fBase[0][ly][lx], br, (TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS (format) ? 2 : 0)) == 2 ? 8 : (TCOMPRESS_SWIZZL (format) ? 6 : 5)); } { const int format = TCOMPRESS_XYZD; Code(fNorm[0][ly][lx], rn, (TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS (format) ? 2 : 0)) == 2 ? 8 : (TCOMPRESS_SWIZZL (format) ? 6 : 5)); } { const int format = TCOMPRESS_RGBH; b = Join(fBase[0][ly][lx], br); } { const int format = TCOMPRESS_XYZD; n = Join(fNorm[0][ly][lx], rn); } // CodeRGBH<TRGTMODE_CODING_RGB >(fBase[0][ly][lx], br); #if defined(NORMALS_INTEGER) // CodeXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE, TCOMPRESS_SWIZZL(format) ? 6 : 5>(fNorm[0][ly][lx], nr); #else // CodeXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE, TCOMPRESS_SWIZZL(format) ? 6 : 5>(fNorm[0][ly][lx], rn); #endif // b = JoinRGBH<TRGTMODE_CODING_RGB >(fBase[0][ly][lx], br); #if defined(NORMALS_INTEGER) // n = JoinXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], nr); #else // n = JoinXYZD<TRGTMODE_CODING_DXDYdZt | TRGTNORM_CUBESPACE>(fNorm[0][ly][lx], rn); #endif /* write the result ABGR */ bBase[0][ly][lx] = b; bNorm[0][ly][lx] = n; } /* compress to DXT5 */ #if 0 stb_compress_dxt_block((unsigned char *)dBase, (unsigned char *)bBase[0], true, STB_DXT_DITHER | STB_DXT_HIGHQUAL); stb_compress_dxt_block((unsigned char *)dNorm, (unsigned char *)bNorm[0], true, STB_DXT_NORMAL | STB_DXT_HIGHQUAL); #else squish::Compress((unsigned char *)bBase[0], dBase, flags + squish::kColourMetricPerceptual); squish::Compress((unsigned char *)bNorm[0], dNorm, flags + squish::kColourMetricUniform); #endif /* advance pointer of compressed blocks */ dBase += (128 / 32); dNorm += (128 / 32); #if 0 for (int ly = 0; ly < TY; ly += 1) for (int lx = 0; lx < TX; lx += 1) { dBase[((y + ly) * bPch) + (x + lx)] = bBase[0][ly][lx]; dNorm[((y + ly) * nPch) + (x + lx)] = bNorm[0][ly][lx]; } #endif } } TextureUnlock(baset, l); TextureUnlock(normt, l); } TextureUnlock((*base), 0); TextureUnlock((*norm), 0); (*base)->Release(); (*norm)->Release(); (*base) = baset; (*norm) = normt; return true; }
void ff(TextureCompressDXT,format,coding,fiting)(RESOURCEINFO &texo, RESOURCEINFO &texd, ULONG *texs, ULONG *texr, int level, int l, int blocksize, int flags) { /* square dimension of this surface-level */ /* square area of this surface-level */ const int lv = (1 << l); const int av = lv * lv; /* ------------------------------------------------------------------------------------------------------- */ const int NORMALS_SCALEBYLEVEL = ::NORMALS_SCALEBYLEVEL; const int ALPHAS_SCALEBYLEVEL = ::ALPHAS_SCALEBYLEVEL; const float colorgamma = ::colorgamma; const float alphacontrast = ::alphacontrast; const float colorgammainv = ::colorgammainv; const float alphacontrastinv = ::alphacontrastinv; int iwidth = texo.Width; int iheight = texo.Height; int owidth = texd.Width; int oheight = texd.Height; int cwidth = owidth; int cheight = oheight; /* get the data back to the CPU */ cheight = (cheight + 3) / 4; /* 4x4 LONG ... */ cwidth = (cwidth + 3) / 4; /* 4x4 LONG ... */ cwidth *= 2 * blocksize; /* ... to 2x|4x LONG */ /* ensure tile ability (bit on overhead for non-4 resolutions) */ owidth = (owidth + (TX - 1)) & (~(TX - 1)); oheight = (oheight + (TY - 1)) & (~(TY - 1)); assert((owidth & (TX - 1)) == 0); assert((oheight & (TY - 1)) == 0); #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) /* constant buffer array */ Concurrency::array_view<const SingleColourLookup_CCR, 2> lArr(2, 256, (const SingleColourLookup_CCR *)::lookup_34_56_ccr); Concurrency::array_view<const IndexBlockLookup_CCR, 2> yArr(4, 8, (const IndexBlockLookup_CCR *)::lookup_c34a57_ccr); /* get a two-dimensional extend over the whole output (without re-cast to LONG), * then get a tile-extend over that one () */ Concurrency::extent<2> ee(oheight, owidth); Concurrency::tiled_extent<TY, TX> te(ee); Concurrency::array_view<const unsigned int, 2> sArr(iheight, iwidth, (const unsigned int *)texs); Concurrency::array_view< unsigned int, 2> dArr(cheight, cwidth, ( unsigned int *)texr); Concurrency::parallel_for_each(te /*dArr.extent.tile<TY, TX>(osize)*/, [=](tiled_index<TY, TX> elm) restrict(amp) { typedef type accu[DIM]; /* tile static memory */ // tile_static UTYPE bTex[2][TY*TX]; tile_static type fTex[2][TY*TX][DIM]; tile_static int iTex[2][TY*TX][DIM]; /* generate this level's 4x4-block from the original surface */ // const int y = elm.global[0] - ly; // const int x = elm.global[1] - lx; const int y = elm.tile[0] * TY; const int x = elm.tile[1] * TX; const int ly = elm.local[0]; const int lx = elm.local[1]; const int lxy = ly * TX + lx; #else Concurrency::array_view<const SingleColourLookup_CCR, 2> lArr(2, 256, (const SingleColourLookup_CCR *)::lookup_34_56_ccr); Concurrency::array_view<const unsigned int, 2> sArr(iheight, iwidth, (const unsigned int *)texs); Concurrency::array_view< unsigned int, 2> dArr(cheight, cwidth, ( unsigned int *)texr, true); for (int groupsy = 0; groupsy < (owidth / TY); groupsy++) for (int groupsx = 0; groupsx < (oheight / TX); groupsx++) { typedef type accu[DIM]; /* tile static memory */ // UTYPE bTex[2][TY*TX]; type fTex[2][TY*TX][DIM]; int iTex[2][TY*TX][DIM]; for (int tiley = 0; tiley < TY; tiley++) for (int tilex = 0; tilex < TX; tilex++) { const int y = groupsy * TY; const int x = groupsx * TX; const int ly = tiley; const int lx = tilex; const int lxy = ly * TX + lx; #endif { const int yl = ((y + ly) << l); const int xl = ((x + lx) << l); accu tt = {0}; /* access all pixels this level's 4x4-block represents in * the full dimension original surface (high quality mip-mapping) */ for (int oy = 0; oy < lv; oy += 1) { for (int ox = 0; ox < lv; ox += 1) { /* assume seamless tiling: wrap pixels around */ const int posx = (xl + ox) % iwidth; const int posy = (yl + oy) % iheight; const ULONG &t = sArr(posy, posx); Accu(tt, t); // += } } /* build average of each channel */ Norm(fTex[0][lxy], tt, av, level, l); } #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) tile_static accu tr; tr[lxy & 7] = 0; tile_static_memory_fence(elm.barrier); // elm.barrier.wait_with_tile_static_memory_fence(); #else } accu tr = {0}; #endif /* runs on only 1 thread per tile (reduction) */ #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) if (elm.local == index<2>(0, 0)) #endif { /* analyze this level's 4x4-block */ for (int lxy = 0; lxy < TY*TX; lxy += 1) { Look(fTex[0][lxy], tr); } } #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) tile_static_memory_fence(elm.barrier); // elm.barrier.wait_with_tile_static_memory_fence(); #else for (int tiley = 0; tiley < TY; tiley++) for (int tilex = 0; tilex < TX; tilex++) { const int y = groupsy; const int x = groupsx; const int ly = tiley; const int lx = tilex; const int lxy = ly * TX + lx; #endif /* generate this level's 4x4-block from the original surface */ { /* build average of each channel an join */ Code (fTex[0][lxy], tr, (TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS (format) ? 2 : 0)) == 2 ? 8 : (TCOMPRESS_SWIZZL (format) ? 6 : 5)); Range(iTex[0][lxy], fTex[0][lxy]); #if (TCOMPRESS_SWIZZL(format)) /* swizzle ABGR -> AGBR */ { int swap = iTex[0][lxy][1]; iTex[0][lxy][1] = iTex[0][lxy][2]; iTex[0][lxy][2] = swap ; } #endif /* write the result */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #if ((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 4) /* ABGR -> RGBA */ /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif ((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 3) /* -BGR -> RGB- */ iTex[0][lxy][0] = 0xFF; /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif ((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 2) /* --YX -> XY-- */ /* AL-- -> LA-- */ #if (format == TCOMPRESS_XYz) iTex[0][lxy][0] = iTex[0][lxy][2], // Y iTex[1][lxy][0] = iTex[0][lxy][3]; // X #else iTex[0][lxy][0] = iTex[0][lxy][0], // A iTex[1][lxy][0] = iTex[0][lxy][1]; // Z #endif /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif ((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 1) /* -Z-- -> Z--- */ /* A--- -> A--- */ /* -LLL -> L--- */ #if (format == TCOMPRESS_a ) iTex[0][lxy][0] = iTex[0][lxy][0]; // A #elif (format == TCOMPRESS_A ) iTex[0][lxy][0] = iTex[0][lxy][0]; // A #elif (format == TCOMPRESS_xyZ) iTex[0][lxy][0] = iTex[0][lxy][1]; // Z #else iTex[0][lxy][0] = iTex[0][lxy][3]; // X #endif #else #error #endif } #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) tile_static_memory_fence(elm.barrier); // elm.barrier.wait_with_tile_static_memory_fence(); #define local_is(a,b) elm.local == index<2>(a, b) #else } for (int tiley = 0; tiley < TY; tiley++) for (int tilex = 0; tilex < TX; tilex++) { const int y = groupsy; const int x = groupsx; const int ly = tiley; const int lx = tilex; const int lxy = ly * TX + lx; #define local_is(a,b) ((ly == a) && (lx == b)) #endif /* put this level's 4x4-block into the destination surface */ { /* round down */ int posx = (x + lx) >> 2; int posy = (y + ly) >> 2; /* first and second block */ unsigned int b[2][2]; /* compress to DXT1/DXT3/DXT5/ATI1/ATI2 */ #define sflgs TCOMPRESS_COLOR(format) ? SQUISH_METRIC_PERCEPTUAL : SQUISH_METRIC_UNIFORM, \ TCOMPRESS_TRANS(format), \ fiting /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #if ((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 4) || \ ((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 3) /* 1x LONG per block for DXT1, 2x for the others */ #if (coding == 1) { posx <<= 0; squish::CompressColorBtc1(elm.barrier, lxy, iTex[0], 0xFFFF, b[1], sflgs, yArr, lArr); dArr(posy, posx + 0) = b[1][0]; dArr(posy, posx + 1) = b[1][1]; } #elif (coding == 2) { posx <<= 1; squish::CompressAlphaBtc2(elm.barrier, lxy, iTex[0], 0xFFFF, b[0] , yArr ); squish::CompressColorBtc2(elm.barrier, lxy, iTex[0], 0xFFFF, b[1], sflgs, yArr, lArr); dArr(posy, posx + 0) = b[0][0]; dArr(posy, posx + 1) = b[0][1]; dArr(posy, posx + 2) = b[1][0]; dArr(posy, posx + 3) = b[1][1]; } #elif (coding == 3) { posx <<= 1; squish::CompressAlphaBtc3(elm.barrier, lxy, iTex[0], 0xFFFF, b[0] , yArr ); squish::CompressColorBtc3(elm.barrier, lxy, iTex[0], 0xFFFF, b[1], sflgs, yArr, lArr); dArr(posy, posx + 0) = b[0][0]; dArr(posy, posx + 1) = b[0][1]; dArr(posy, posx + 2) = b[1][0]; dArr(posy, posx + 3) = b[1][1]; } #else #error #endif /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif ((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 1) /* 1x LONG for ATI1 */ #if (coding == 4) { posx <<= 0; squish::CompressAlphaBtc3(elm.barrier, lxy, iTex[0], 0xFFFF, b[0] , yArr ); dArr(posy, posx + 0) = b[0][0]; dArr(posy, posx + 1) = b[0][1]; } #else #error #endif /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ #elif ((TCOMPRESS_CHANNELS(format) + (TCOMPRESS_GREYS(format) ? 2 : 0)) == 2) /* 2x LONG for ATI2 */ #if (coding == 5) { posx <<= 1; squish::CompressAlphaBtc3(elm.barrier, lxy, iTex[0], 0xFFFF, b[0] , yArr ); squish::CompressAlphaBtc3(elm.barrier, lxy, iTex[1], 0xFFFF, b[1] , yArr ); dArr(posy, posx + 0) = b[0][0]; dArr(posy, posx + 1) = b[0][1]; dArr(posy, posx + 2) = b[1][0]; dArr(posy, posx + 3) = b[1][1]; } #else #error #endif #else #error #endif #undef sflgs // elm.barrier.wait(); /* advance pointer of compressed blocks */ // wTex += blocksize; // dTex += blocksize; } #if defined(SQUASH_USE_AMP) && !defined(SQUASH_USE_AMP_DEBUG) // elm.barrier.wait(); // dTex += 0; }); dArr.synchronize(); #else }}