void jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, JDIMENSION width_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data) { int outrow, outcol; JDIMENSION output_cols = width_blocks * DCTSIZE; JSAMPROW inptr, outptr; __vector unsigned char this0, next0, out; __vector unsigned short this0e, this0o, next0e, next0o, outl, outh; /* Constants */ __vector unsigned short pw_bias = { __4X2(0, 1) }, pw_one = { __8X(1) }; __vector unsigned char even_odd_index = {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15}, pb_zero = { __16X(0) }; expand_right_edge(input_data, max_v_samp_factor, image_width, output_cols * 2); for (outrow = 0; outrow < v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr = input_data[outrow]; for (outcol = output_cols; outcol > 0; outcol -= 16, inptr += 32, outptr += 16) { this0 = vec_ld(0, inptr); this0 = vec_perm(this0, this0, even_odd_index); this0e = (__vector unsigned short)VEC_UNPACKHU(this0); this0o = (__vector unsigned short)VEC_UNPACKLU(this0); outl = vec_add(this0e, this0o); outl = vec_add(outl, pw_bias); outl = vec_sr(outl, pw_one); if (outcol > 8) { next0 = vec_ld(16, inptr); next0 = vec_perm(next0, next0, even_odd_index); next0e = (__vector unsigned short)VEC_UNPACKHU(next0); next0o = (__vector unsigned short)VEC_UNPACKLU(next0); outh = vec_add(next0e, next0o); outh = vec_add(outh, pw_bias); outh = vec_sr(outh, pw_one); } else outh = vec_splat_u16(0); out = vec_pack(outl, outh); vec_st(out, 0, outptr); } } }
void jsimd_convsamp_altivec (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace) { JSAMPROW elemptr; __vector unsigned char in0, in1, in2, in3, in4, in5, in6, in7; __vector short out0, out1, out2, out3, out4, out5, out6, out7; /* Constants */ __vector short pw_centerjsamp = { __8X(CENTERJSAMPLE) }; __vector unsigned char pb_zero = { __16X(0) }; LOAD_ROW(0); LOAD_ROW(1); LOAD_ROW(2); LOAD_ROW(3); LOAD_ROW(4); LOAD_ROW(5); LOAD_ROW(6); LOAD_ROW(7); out0 = (__vector short)VEC_UNPACKHU(in0); out1 = (__vector short)VEC_UNPACKHU(in1); out2 = (__vector short)VEC_UNPACKHU(in2); out3 = (__vector short)VEC_UNPACKHU(in3); out4 = (__vector short)VEC_UNPACKHU(in4); out5 = (__vector short)VEC_UNPACKHU(in5); out6 = (__vector short)VEC_UNPACKHU(in6); out7 = (__vector short)VEC_UNPACKHU(in7); out0 = vec_sub(out0, pw_centerjsamp); out1 = vec_sub(out1, pw_centerjsamp); out2 = vec_sub(out2, pw_centerjsamp); out3 = vec_sub(out3, pw_centerjsamp); out4 = vec_sub(out4, pw_centerjsamp); out5 = vec_sub(out5, pw_centerjsamp); out6 = vec_sub(out6, pw_centerjsamp); out7 = vec_sub(out7, pw_centerjsamp); vec_st(out0, 0, workspace); vec_st(out1, 16, workspace); vec_st(out2, 32, workspace); vec_st(out3, 48, workspace); vec_st(out4, 64, workspace); vec_st(out5, 80, workspace); vec_st(out6, 96, workspace); vec_st(out7, 112, workspace); }