void b() { z = vec_add (x, y); /* Make sure the predicates accept correct argument types. */ int1 = vec_all_in (f, g); int1 = vec_all_ge (f, g); int1 = vec_all_eq (c, d); int1 = vec_all_ne (s, t); int1 = vec_any_eq (i, j); int1 = vec_any_ge (f, g); int1 = vec_all_ngt (f, g); int1 = vec_any_ge (c, d); int1 = vec_any_ge (s, t); int1 = vec_any_ge (i, j); int1 = vec_any_ge (c, d); int1 = vec_any_ge (s, t); int1 = vec_any_ge (i, j); vec_mtvscr (i); vec_dssall (); s = (vector signed short) vec_mfvscr (); vec_dss (3); vec_dst (pi, int1 + int2, 3); vec_dstst (pi, int1 + int2, 3); vec_dststt (pi, int1 + int2, 3); vec_dstt (pi, int1 + int2, 3); uc = (vector unsigned char) vec_lvsl (int1 + 69, (signed int *) pi); uc = (vector unsigned char) vec_lvsr (int1 + 69, (signed int *) pi); c = vec_lde (int1, (signed char *) pi); s = vec_lde (int1, (signed short *) pi); i = vec_lde (int1, (signed int *) pi); i = vec_ldl (int1, pi); i = vec_ld (int1, pi); vec_st (i, int2, pi); vec_ste (c, int2, (signed char *) pi); vec_ste (s, int2, (signed short *) pi); vec_ste (i, int2, (signed int *) pi); vec_stl (i, int2, pi); }
/* * add prediction and prediction error, saturate to 0...255 * pred % 8 == 0 * cur % 8 == 0 * lx % 16 == 0 * blk % 16 == 0 */ void add_pred_altivec(ADD_PRED_PDECL) { #ifdef ALTIVEC_DST unsigned int dst; #endif uint8_t *pCA, *pCB, *pPA, *pPB; int16_t *pBA, *pBB; vector unsigned char zero; vector unsigned char predA, predB, curA, curB; vector signed short blkA, blkB; #ifdef ALTIVEC_VERIFY if (NOT_VECTOR_ALIGNED(lx)) mjpeg_error_exit1("add_pred: lx %% 16 != 0, (%d)", lx); if (NOT_VECTOR_ALIGNED(blk)) mjpeg_error_exit1("add_pred: blk %% 16 != 0, (%d)", blk); #ifdef ALTIVEC_DST if (lx & (~0xffff) != 0) mjpeg_error_exit1("add_pred: lx=%d > vec_dst range", lx); #endif if (((unsigned long)pred & 0xf) != ((unsigned long)cur & 0xf)) mjpeg_error_exit1("add_pred: (pred(0x%X) %% 16) != (cur(0x%X) %% 16)", pred, cur); if ((((unsigned long)pred) & 0x7) != 0) mjpeg_error_exit1("add_pred: pred %% 8 != 0, (0x%X)", pred); if ((((unsigned long)cur) & 0x7) != 0) mjpeg_error_exit1("add_pred: cur %% 8 != 0, (0x%X)", cur); #endif /* MACROS expand differently depending on input */ #define ABBA(symbol,ab) _ABBA(ABBA_##ab,symbol) /* {{{ */ #define _ABBA(abba_ab,symbol) abba_ab(symbol) #define ABBA_A(symbol) symbol##B #define ABBA_B(symbol) symbol##A /* }}} */ #define HLLH(symbol,hl) _HLLH(HLLH_##hl,symbol) /* {{{ */ #define _HLLH(hllh_hl,symbol) hllh_hl(symbol) #define HLLH_h(symbol) symbol##l #define HLLH_l(symbol) symbol##h /* }}} */ #define PACKSU(hl,st,ld) _PACKSU(PACKSU_##hl,st,ld) /* {{{ */ #define _PACKSU(psu,st,ld) psu(st,ld) #define PACKSU_h(st,ld) vec_packsu(st,ld) #define PACKSU_l(st,ld) vec_packsu(ld,st) /* }}} */ #define PERFORM_ITERATION(hl,ab,iter) /* iter {{{ */ \ pred##ab = vec_merge##hl(zero, pred##ab); \ cur##ab = HLLH(vec_merge,hl)(zero, cur##ab); \ blk##ab = vec_add(blk##ab, vs16(pred##ab)); \ blk##ab = vec_max(blk##ab, vs16(zero)); \ cur##ab = PACKSU(hl, vu16(blk##ab), vu16(cur##ab)); \ vec_st(cur##ab, 0, pC##ab); \ /* }}} */ #define PREPARE_ITERATION(hl,ab,iter) /* iter {{{ */ \ pP##ab = ABBA(pP,ab) + lx; \ pC##ab = ABBA(pC,ab) + lx; \ pB##ab = ABBA(pB,ab) + 8; \ pred##ab = vec_ld(0, pP##ab); \ cur##ab = vec_ld(0, pC##ab); \ blk##ab = vec_ld(0, pB##ab); \ /* }}} */ #define NO_RESCHEDULE asm volatile ("") AMBER_START; pPA = pred; pCA = cur; pBA = blk; #ifdef ALTIVEC_DST dst = 0x01080000 | lx; vec_dst(pPA, dst, 0); vec_dst(pCA, dst, 1); dst = 0x01080010; vec_dst(pBA, dst, 2); #endif predA = vec_ld(0, pPA); curA = vec_ld(0, pCA); NO_RESCHEDULE; pPB = pPA + lx; NO_RESCHEDULE; blkA = vec_ld(0, pBA); NO_RESCHEDULE; pCB = pCA + lx; NO_RESCHEDULE; predB = vec_ld(0, pPB); NO_RESCHEDULE; pBB = pBA + 8; NO_RESCHEDULE; curB = vec_ld(0, pCB); NO_RESCHEDULE; zero = vec_splat_u8(0); NO_RESCHEDULE; blkB = vec_ld(0, pBB); if (VECTOR_ALIGNED(pPA)) { PERFORM_ITERATION(h,A,0); PREPARE_ITERATION(h,A,2); /* prepare next A iteration */ PERFORM_ITERATION(h,B,1); PREPARE_ITERATION(h,B,3); /* prepare next B iteration */ PERFORM_ITERATION(h,A,2); PREPARE_ITERATION(h,A,4); PERFORM_ITERATION(h,B,3); PREPARE_ITERATION(h,B,5); PERFORM_ITERATION(h,A,4); PREPARE_ITERATION(h,A,6); PERFORM_ITERATION(h,B,5); PREPARE_ITERATION(h,B,7); PERFORM_ITERATION(h,A,6); PERFORM_ITERATION(h,B,7); } else { PERFORM_ITERATION(l,A,0); PREPARE_ITERATION(l,A,2); /* prepare next A iteration */ PERFORM_ITERATION(l,B,1); PREPARE_ITERATION(l,B,3); /* prepare next B iteration */ PERFORM_ITERATION(l,A,2); PREPARE_ITERATION(l,A,4); PERFORM_ITERATION(l,B,3); PREPARE_ITERATION(l,B,5); PERFORM_ITERATION(l,A,4); PREPARE_ITERATION(l,A,6); PERFORM_ITERATION(l,B,5); PREPARE_ITERATION(l,B,7); PERFORM_ITERATION(l,A,6); PERFORM_ITERATION(l,B,7); } #ifdef ALTIVEC_DST vec_dssall(); #endif AMBER_STOP; }
/* * subtract prediction from block data * pred % 8 == 0 * cur % 8 == 0 * lx % 16 == 0 * blk % 16 == 0 */ void sub_pred_altivec(SUB_PRED_PDECL) { unsigned int dst; uint8_t *pCA, *pCB, *pPA, *pPB; int16_t *pBA, *pBB; vector unsigned char zero; vector unsigned char predA, predB, curA, curB; vector signed short blkA, blkB; #ifdef ALTIVEC_VERIFY #ifdef ALTIVEC_DST if (lx & (~0xffff) != 0) mjpeg_error_exit1("sub_pred: lx > vec_dst range", lx); #endif if (NOT_VECTOR_ALIGNED(lx)) mjpeg_error_exit1("sub_pred: lx %% 16 != 0, (%d)", lx); if (NOT_VECTOR_ALIGNED(blk)) mjpeg_error_exit1("sub_pred: blk %% 16 != 0, (%d)", blk); if (((unsigned long)pred & 0xf) != ((unsigned long)cur & 0xf)) mjpeg_error_exit1("sub_pred: (pred(0x%X) %% 16) != (cur(0x%X) %% 16)", pred, cur); if ((((unsigned long)pred) & 0x7) != 0) mjpeg_error_exit1("sub_pred: pred %% 8 != 0, (0x%X)", pred); if ((((unsigned long)cur) & 0x7) != 0) mjpeg_error_exit1("sub_pred: cur %% 8 != 0, (0x%X)", cur); #endif /* A->B, B->A expand differently depending on input */ #define ABBA(symbol,ab) _ABBA(ABBA_##ab,symbol) /* {{{ */ #define _ABBA(abba_ab,symbol) abba_ab(symbol) #define ABBA_A(symbol) symbol##B #define ABBA_B(symbol) symbol##A /* }}} */ #define PERFORM_ITERATION(hl,ab,iter) /* iter {{{ */ \ pred##ab = vec_merge##hl(zero, pred##ab); \ cur##ab = vec_merge##hl(zero, cur##ab); \ blk##ab = vec_sub(vs16(cur##ab), vs16(pred##ab)); \ vec_st(blk##ab, 0, (signed short*)pB##ab); \ /* }}} */ #define PREPARE_ITERATION(hl,ab,iter) /* iter {{{ */ \ pP##ab = ABBA(pP,ab) + lx; \ pC##ab = ABBA(pC,ab) + lx; \ pB##ab = ABBA(pB,ab) + 8; \ pred##ab = vec_ld(0, pP##ab); \ cur##ab = vec_ld(0, pC##ab); \ /* }}} */ #define NO_RESCHEDULE asm volatile ("") AMBER_START; pPA = pred; pCA = cur; pBA = blk; #ifdef ALTIVEC_DST dst = 0x01080000 | lx; vec_dst(pPA, dst, 0); vec_dst(pCA, dst, 1); dst = 0x01080010; vec_dstst(pBA, dst, 2); #endif pPB = pPA + lx; NO_RESCHEDULE; predA = vec_ld(0, pPA); NO_RESCHEDULE; pCB = pCA + lx; NO_RESCHEDULE; curA = vec_ld(0, pCA); NO_RESCHEDULE; pBB = pBA + 8; NO_RESCHEDULE; predB = vec_ld(0, pPB); NO_RESCHEDULE; zero = vec_splat_u8(0); NO_RESCHEDULE; curB = vec_ld(0, pCB); if (VECTOR_ALIGNED(pPA)) { PERFORM_ITERATION(h,A,0); PREPARE_ITERATION(h,A,2); /* prepare next A iteration */ PERFORM_ITERATION(h,B,1); PREPARE_ITERATION(h,B,3); /* prepare next B iteration */ PERFORM_ITERATION(h,A,2); PREPARE_ITERATION(h,A,4); PERFORM_ITERATION(h,B,3); PREPARE_ITERATION(h,B,5); PERFORM_ITERATION(h,A,4); PREPARE_ITERATION(h,A,6); PERFORM_ITERATION(h,B,5); PREPARE_ITERATION(h,B,7); PERFORM_ITERATION(h,A,6); PERFORM_ITERATION(h,B,7); } else { PERFORM_ITERATION(l,A,0); PREPARE_ITERATION(l,A,2); /* prepare next A iteration */ PERFORM_ITERATION(l,B,1); PREPARE_ITERATION(l,B,3); /* prepare next B iteration */ PERFORM_ITERATION(l,A,2); PREPARE_ITERATION(l,A,4); PERFORM_ITERATION(l,B,3); PREPARE_ITERATION(l,B,5); PERFORM_ITERATION(l,A,4); PREPARE_ITERATION(l,A,6); PERFORM_ITERATION(l,B,5); PREPARE_ITERATION(l,B,7); PERFORM_ITERATION(l,A,6); PERFORM_ITERATION(l,B,7); } #ifdef ALTIVEC_DST vec_dssall(); #endif AMBER_STOP; }