inline ushort v_reduce_sum(const v_uint16x8& a) { const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8)))); return saturate_cast<ushort>(vec_extract(vec_sums(v4, vec_int4_z), 3)); }
VSX_FINLINE(void) vsx_st2(const vec_dword2& vec, long o, int64* p) { vsx_stf(vec_int4_c(vec), VSX_OFFSET(o, p), (int*)p); }