void mp_shiftr1(digit_t* x, const unsigned int nwords) { // Multiprecision right shift by one. unsigned int i; for (i = 0; i < nwords-1; i++) { SHIFTR(x[i+1], x[i], 1, x[i], RADIX); } x[nwords-1] >>= 1; }
void fdct_int32( short *const In ) { short *pIn; int i; pIn = In; for(i=8; i>0; --i) { int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill; // even LOAD_BUTF(mm1,mm6, 1, 6, mm0, pIn); LOAD_BUTF(mm2,mm5, 2, 5, mm0, pIn); LOAD_BUTF(mm3,mm4, 3, 4, mm0, pIn); LOAD_BUTF(mm0,mm7, 0, 7, Spill, pIn); BUTF(mm1, mm2, Spill); BUTF(mm0, mm3, Spill); ROTATE(mm3, mm2, ROT6_C, ROT6_SmC, -ROT6_SpC, Spill, FIX-FPASS, HALF(FIX-FPASS)); pIn[2] = mm3; pIn[6] = mm2; BUTF(mm0, mm1, Spill); pIn[0] = SHIFTL(mm0, FPASS); pIn[4] = SHIFTL(mm1, FPASS); // odd mm3 = mm5 + mm7; mm2 = mm4 + mm6; ROTATE(mm2, mm3, ROT17_C, -ROT17_SpC, -ROT17_SmC, mm0, FIX-FPASS, HALF(FIX-FPASS)); ROTATE(mm4, mm7, -ROT37_C, ROT37_SpC, ROT37_SmC, mm0, FIX-FPASS, HALF(FIX-FPASS)); mm7 += mm3; mm4 += mm2; pIn[1] = mm7; pIn[7] = mm4; ROTATE(mm5, mm6, -ROT13_C, ROT13_SmC, ROT13_SpC, mm0, FIX-FPASS, HALF(FIX-FPASS)); mm5 += mm3; mm6 += mm2; pIn[3] = mm6; pIn[5] = mm5; pIn += 8; } pIn = In; for(i=8; i>0; --i) { int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill; // even LOAD_BUTF(mm1,mm6, 1*8, 6*8, mm0, pIn); LOAD_BUTF(mm2,mm5, 2*8, 5*8, mm0, pIn); BUTF(mm1, mm2, mm0); LOAD_BUTF(mm3,mm4, 3*8, 4*8, mm0, pIn); LOAD_BUTF(mm0,mm7, 0*8, 7*8, Spill, pIn); BUTF(mm0, mm3, Spill); ROTATE(mm3, mm2, ROT6_C, ROT6_SmC, -ROT6_SpC, Spill, 0, HALF(FIX+FPASS+3)); pIn[2*8] = (int16_t)SHIFTR(mm3,FIX+FPASS+3); pIn[6*8] = (int16_t)SHIFTR(mm2,FIX+FPASS+3); mm0 += HALF(FPASS+3) - 1; BUTF(mm0, mm1, Spill); pIn[0*8] = (int16_t)SHIFTR(mm0, FPASS+3); pIn[4*8] = (int16_t)SHIFTR(mm1, FPASS+3); // odd mm3 = mm5 + mm7; mm2 = mm4 + mm6; ROTATE(mm2, mm3, ROT17_C, -ROT17_SpC, -ROT17_SmC, mm0, 0, HALF(FIX+FPASS+3)); ROTATE2(mm4, mm7, -ROT37_C, ROT37_SpC, ROT37_SmC, mm0); mm7 += mm3; mm4 += mm2; pIn[7*8] = (int16_t)SHIFTR(mm4,FIX+FPASS+3); pIn[1*8] = (int16_t)SHIFTR(mm7,FIX+FPASS+3); ROTATE2(mm5, mm6, -ROT13_C, ROT13_SmC, ROT13_SpC, mm0); mm5 += mm3; mm6 += mm2; pIn[5*8] = (int16_t)SHIFTR(mm5,FIX+FPASS+3); pIn[3*8] = (int16_t)SHIFTR(mm6,FIX+FPASS+3); pIn++; } }