/* * This function assumes cur and stride are 16 bytes aligned and ref is unaligned */ unsigned long sad16_altivec(const vector unsigned char *cur, const vector unsigned char *ref, unsigned long stride, const unsigned long best_sad) { vector unsigned char perm; vector unsigned char t1, t2, t3, t4; vector unsigned int sad; vector signed int sumdiffs, best_vec; unsigned long result; ZERODEF; #ifdef DEBUG if (((unsigned long) cur) & 0xf) fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); // if (((unsigned long)ref) & 0xf) // fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref); if (stride & 0xf) fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); #endif /* initialization */ sad = (vector unsigned int) (ZEROVEC); stride >>= 4; perm = vec_lvsl(0, (unsigned char *) ref); *((unsigned long *) &best_vec) = best_sad; best_vec = vec_splat(best_vec, 0); /* perform sum of differences between current and previous */ SAD16(); SAD16(); SAD16(); SAD16(); /* Temp sum for exit */ sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); if (vec_all_ge(sumdiffs, best_vec)) goto bail; SAD16(); SAD16(); SAD16(); SAD16(); sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); if (vec_all_ge(sumdiffs, best_vec)) goto bail; SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); /* sum all parts of difference into one 32 bit quantity */ sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); bail: /* copy vector sum into unaligned result */ sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, (int *) &result); return (result); }
uint32_t sad16_altivec_c(vector unsigned char *cur, vector unsigned char *ref, uint32_t stride, const uint32_t best_sad) { vector unsigned char perm; vector unsigned char t1, t2; vector unsigned int sad; vector unsigned int sumdiffs; vector unsigned int best_vec; uint32_t result; #ifdef DEBUG /* print alignment errors if DEBUG is on */ if (((unsigned long) cur) & 0xf) fprintf(stderr, "sad16_altivec:incorrect align, cur: %lx\n", (long)cur); if (stride & 0xf) fprintf(stderr, "sad16_altivec:incorrect align, stride: %lu\n", stride); #endif /* initialization */ sad = vec_splat_u32(0); sumdiffs = sad; stride >>= 4; perm = vec_lvsl(0, (unsigned char *) ref); *((uint32_t*)&best_vec) = best_sad; best_vec = vec_splat(best_vec, 0); /* perform sum of differences between current and previous */ SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); bail: /* copy vector sum into unaligned result */ sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, (uint32_t*) &result); return result; }