///////////////////////////////////////////////transforms///////////////////////////////////////////////////////////////////// void mBior53::transrows(char** dest, char** sour, unsigned int w, unsigned int h) const { char srck0[8] = {0, 0, 0, 0, 0, 0, 0, 0}; //int n; float s, d; unsigned int w2 = w / 2; const vec1D& tH = gettH(); const vec1D& tG = gettG(); for (unsigned int y = 0; y < h; y++) { //k=0 unsigned int k = 0; srck0[0] = sour[y][2]; srck0[1] = sour[y][1]; srck0[2] = sour[y][0]; srck0[3] = sour[y][1]; s = conv(tH.data(), srck0); s += tH[2] * (float)sour[y][2*k + 2]; d = conv(tG.data(), &sour[y][2*k]); dest[y][k] = mmxround(s); dest[y][k+w2] = mmxroundTH(d); //k=1, k<w2-1 for (k = 1; k < w2 - 1; k++) { s = conv(tH.data(), &sour[y][2*k - 2]); s += tH[2] * (float)sour[y][2*k + 2]; d = conv(tG.data(), &sour[y][2*k]); dest[y][k] = mmxround(s); dest[y][k+w2] = mmxroundTH(d); } //k=w2-1 k = w2 - 1; srck0[0] = sour[y][2*k]; srck0[1] = sour[y][2*k+1]; srck0[2] = sour[y][2*k]; srck0[3] = sour[y][2*k-1]; s = conv(tH.data(), &sour[y][2*k - 2]); s += tH[2] * (float)sour[y][2*k]; d = conv(tG.data(), srck0); dest[y][k] = mmxround(s); dest[y][k+w2] = mmxroundTH(d); } }
void FWT2D::transcols(char** dest, char** sour, unsigned int w, unsigned int h) const { int n; float s, d; unsigned int h2 = h / 2; const vec1D& tH = gettH(); const vec1D& tG = gettG(); for (unsigned int x = 0; x < w; x++) { for (unsigned int k = 0; k < h2; k++) { s = 0.0f; d = 0.0f; for (int m = tH.first(); m <= tH.last(); m++) { n = 2 * k + m; if (n < 0) n = 0 - n; if (n >= (int)h) n -= 2 * (1 + n - h); s += tH[m] * float(sour[n][x]); } for (int m = tG.first(); m <= tG.last(); m++) { n = 2 * k + m; if (n < 0) n = 0 - n; if (n >= (int)h) n -= 2 * (1 + n - h); d += tG[m] * float(sour[n][x]); } if (x < w / 2) dest[k][x] = mmxround(s); else dest[k][x] = mmxroundTH(s); //is this needed? hi band were TH'ed on transrows dest[k+h2][x] = mmxroundTH(d); //is this needed? hi band were TH'ed on transrows on x>w/2 } } }
///////////////////////////////////////////////transforms///////////////////////////////////////////////////////////////////// void FWT2D::transrows(char** dest, char** sour, unsigned int w, unsigned int h) const { int n; float s, d; unsigned int w2 = w / 2; const vec1D& tH = gettH(); const vec1D& tG = gettG(); for (unsigned int y = 0; y < h; y++) { for (unsigned int k = 0; k < w2; k++) { s = 0.0f; d = 0.0f; for (int m = tH.first(); m <= tH.last(); m++) { n = 2 * k + m; if (n < 0) n = 0 - n; if (n >= (int)w) n -= 2 * (1 + n - w); s += tH[m] * float(sour[y][n]); } for (int m = tG.first(); m <= tG.last(); m++) { n = 2 * k + m; if (n < 0) n = 0 - n; if (n >= (int)w) n -= 2 * (1 + n - w); d += tG[m] * float(sour[y][n]); } dest[y][k] = mmxround(s); dest[y][k+w2] = mmxroundTH(d); } } }
void mBior53::transcols(char** dest, char** sour, unsigned int w, unsigned int h) const { float fz = 0.0f; int n; float s, d; __m128 ms, md; unsigned int h2 = h / 2; const vec1D& tH = gettH(); const vec1D& tG = gettG(); for (unsigned int x = 0; x < w / 4; x++) { //x<w/4 x = 4*x for (unsigned int k = 0; k < h2; k++) { ms = _mm_load_ss(&fz); md = ms; for (int m = -2; m <= 2; m++) { n = 2 * k + m; if (n < 0) n = 0 - n; if (n >= (int)h) n -= 2 * (1 + n - h); ms = _mm_add_ps(ms, _mm_mul_ps(_mm_load_ps1(tH.data(m)), _mm_cvtpi8_ps(*(__m64 *)(&sour[n][4*x])))); } for (int m = 0; m <= 2; m++) { n = 2 * k + m; if (n < 0) n = 0 - n; if (n >= (int)h) n -= 2 * (1 + n - h); md = _mm_add_ps(md, _mm_mul_ps(_mm_load_ps1(tG.data(m)), _mm_cvtpi8_ps(*(__m64 *)(&sour[n][4*x])))); } if (4*x < w / 2) { if ((w / 2) - (4*x) >= 4) mmxround4(&dest[k][4*x], ms); else mmxround4TH(&dest[k][4*x], ms, (w / 2) - (4*x)); //skip first from LL part 10/2-4=1 [lo] o o o o * | * * * o o [hi] } else mmxround4TH(&dest[k][4*x], ms); mmxround4TH(&dest[k+h2][4*x], md); } } _mm_empty(); //odd remainder for (unsigned int x = w - (w % 4); x < w; x++) { for (unsigned int k = 0; k < h2; k++) { s = 0; d = 0; for (int m = -2; m <= 2; m++) { n = 2 * k + m; if (n < 0) n = 0 - n; if (n >= (int)h) n -= 2 * (1 + n - h); s += tH[m] * float(sour[n][x]); } for (int m = 0; m <= 2; m++) { n = 2 * k + m; if (n < 0) n = 0 - n; if (n >= (int)h) n -= 2 * (1 + n - h); d += tG[m] * float(sour[n][x]); } if (x < w / 2) dest[k][x] = mmxround(s); else dest[k][x] = mmxroundTH(s); //is this needed? hi band were TH'ed on transrows dest[k+h2][x] = mmxroundTH(d); //is this needed? hi band were TH'ed on transrows on x>w/2 } } }