static void memzero_align8(void *dst,size_t size) { int fpscr; fp_single_enter(fpscr); dst = (char *)dst + size; size /= 32; __asm__ volatile ( " fldi0 fr0\n" " fldi0 fr1\n" " fschg\n" // double "1: \n" \ " dt %1\n" " fmov dr0,@-%0\n" " fmov dr0,@-%0\n" " fmov dr0,@-%0\n" " bf.s 1b\n" " fmov dr0,@-%0\n" " fschg" //back to single : "+r"(dst),"+r"(size) :: "memory" ); fp_single_leave(fpscr); }
void idct_sh4(DCTELEM *block) { DEFREG; int i; float tblock[8*8],*fblock; int ofs1,ofs2,ofs3; int fpscr; fp_single_enter(fpscr); /* row */ /* even part */ load_matrix(even_table); fblock = tblock+4; i = 8; do { fr0 = block[0]; fr1 = block[2]; fr2 = block[4]; fr3 = block[6]; block+=8; ftrv(); *--fblock = fr3; *--fblock = fr2; *--fblock = fr1; *--fblock = fr0; fblock+=8+4; } while(--i); block-=8*8; fblock-=8*8+4; load_matrix(odd_table); i = 8; do { float t0,t1,t2,t3; fr0 = block[1]; fr1 = block[3]; fr2 = block[5]; fr3 = block[7]; block+=8; ftrv(); t0 = *fblock++; t1 = *fblock++; t2 = *fblock++; t3 = *fblock++; fblock+=4; *--fblock = t0 - fr0; *--fblock = t1 - fr1; *--fblock = t2 - fr2; *--fblock = t3 - fr3; *--fblock = t3 + fr3; *--fblock = t2 + fr2; *--fblock = t1 + fr1; *--fblock = t0 + fr0; fblock+=8; } while(--i); block-=8*8; fblock-=8*8; /* col */ /* even part */ load_matrix(even_table); ofs1 = sizeof(float)*2*8; ofs2 = sizeof(float)*4*8; ofs3 = sizeof(float)*6*8; i = 8; #define OA(fblock,ofs) *(float*)((char*)fblock + ofs) do { fr0 = OA(fblock, 0); fr1 = OA(fblock,ofs1); fr2 = OA(fblock,ofs2); fr3 = OA(fblock,ofs3); ftrv(); OA(fblock,0 ) = fr0; OA(fblock,ofs1) = fr1; OA(fblock,ofs2) = fr2; OA(fblock,ofs3) = fr3; fblock++; } while(--i); fblock-=8; load_matrix(odd_table); i=8; do { float t0,t1,t2,t3; t0 = OA(fblock, 0); /* [8*0] */ t1 = OA(fblock,ofs1); /* [8*2] */ t2 = OA(fblock,ofs2); /* [8*4] */ t3 = OA(fblock,ofs3); /* [8*6] */ fblock+=8; fr0 = OA(fblock, 0); /* [8*1] */ fr1 = OA(fblock,ofs1); /* [8*3] */ fr2 = OA(fblock,ofs2); /* [8*5] */ fr3 = OA(fblock,ofs3); /* [8*7] */ fblock+=-8+1; ftrv(); block[8*0] = DESCALE(t0 + fr0,3); block[8*7] = DESCALE(t0 - fr0,3); block[8*1] = DESCALE(t1 + fr1,3); block[8*6] = DESCALE(t1 - fr1,3); block[8*2] = DESCALE(t2 + fr2,3); block[8*5] = DESCALE(t2 - fr2,3); block[8*3] = DESCALE(t3 + fr3,3); block[8*4] = DESCALE(t3 - fr3,3); block++; } while(--i); fp_single_leave(fpscr); }