unsigned long dev16_altivec(const vector unsigned char *cur, unsigned long stride) { vector unsigned char t2, t3, t4, mn; vector unsigned int mean, dev; vector signed int sumdiffs; vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15; unsigned long result; ZERODEF; mean = (vector unsigned int) (ZEROVEC); dev = (vector unsigned int) (ZEROVEC); stride >>= 4; MEAN16(0); MEAN16(1); MEAN16(2); MEAN16(3); MEAN16(4); MEAN16(5); MEAN16(6); MEAN16(7); MEAN16(8); MEAN16(9); MEAN16(10); MEAN16(11); MEAN16(12); MEAN16(13); MEAN16(14); MEAN16(15); sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC); mn = vec_perm((vector unsigned char) sumdiffs, (vector unsigned char) sumdiffs, (vector unsigned char) (14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14)); DEV16(0); DEV16(1); DEV16(2); DEV16(3); DEV16(4); DEV16(5); DEV16(6); DEV16(7); DEV16(8); DEV16(9); DEV16(10); DEV16(11); DEV16(12); DEV16(13); DEV16(14); DEV16(15); /* sum all parts of difference into one 32 bit quantity */ sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC); /* copy vector sum into unaligned result */ sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, (int *) &result); return (result); }
uint32_t dev16_altivec_c(vector unsigned char *cur, uint32_t stride) { vector unsigned char t2, t3, mn; vector unsigned int mean, dev; vector unsigned int sumdiffs; vector unsigned char *ptr; uint32_t result; #ifdef DEBUG /* print alignment errors if DEBUG is on */ if(((unsigned long)cur) & 0x7) fprintf(stderr, "dev16_altivec:incorrect align, cur: %lx\n", (long)cur); if(stride & 0xf) fprintf(stderr, "dev16_altivec:incorrect align, stride: %lu\n", stride); #endif dev = mean = vec_splat_u32(0); stride >>= 4; /* set pointer to iterate through cur */ ptr = cur; MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); /* Add all together in sumdiffs */ sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0)); /* teilen durch 16 * 16 */ mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14)); /* set pointer to iterate through cur */ ptr = cur; DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); /* sum all parts of difference into one 32 bit quantity */ sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0)); /* copy vector sum into unaligned result */ sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, (uint32_t*) &result); return result; }