h2v1_downsample( j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data ) { int inrow; JDIMENSION outcol; JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size; register JSAMPROW inptr, outptr; register int bias; /* Expand input data enough to let all the output samples be generated * by the standard loop. Special-casing padded output would be more * efficient. */ expand_right_edge( input_data, cinfo->max_v_samp_factor, cinfo->image_width, output_cols * 2 ); for( inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++ ) { outptr = output_data[inrow]; inptr = input_data[inrow]; bias = 0; /* bias = 0,1,0,1,... for successive samples */ for( outcol = 0; outcol < output_cols; outcol++ ) { *outptr++ = ( JSAMPLE )( ( GETJSAMPLE( *inptr ) + GETJSAMPLE( inptr[1] ) + bias ) >> 1 ); bias ^= 1; /* 0=>1, 1=>0 */ inptr += 2; } } }
fullsize_downsample( j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data ) { /* Copy the data */ jcopy_sample_rows( input_data, 0, output_data, 0, cinfo->max_v_samp_factor, cinfo->image_width ); /* Edge-expand */ expand_right_edge( output_data, cinfo->max_v_samp_factor, cinfo->image_width, compptr->width_in_blocks * compptr->DCT_h_scaled_size ); }
void jsimd_h2v1_downsample_altivec (JDIMENSION image_width, int max_v_samp_factor, JDIMENSION v_samp_factor, JDIMENSION width_blocks, JSAMPARRAY input_data, JSAMPARRAY output_data) { int outrow, outcol; JDIMENSION output_cols = width_blocks * DCTSIZE; JSAMPROW inptr, outptr; __vector unsigned char this0, next0, out; __vector unsigned short this0e, this0o, next0e, next0o, outl, outh; /* Constants */ __vector unsigned short pw_bias = { __4X2(0, 1) }, pw_one = { __8X(1) }; __vector unsigned char even_odd_index = {0,2,4,6,8,10,12,14,1,3,5,7,9,11,13,15}, pb_zero = { __16X(0) }; expand_right_edge(input_data, max_v_samp_factor, image_width, output_cols * 2); for (outrow = 0; outrow < v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr = input_data[outrow]; for (outcol = output_cols; outcol > 0; outcol -= 16, inptr += 32, outptr += 16) { this0 = vec_ld(0, inptr); this0 = vec_perm(this0, this0, even_odd_index); this0e = (__vector unsigned short)VEC_UNPACKHU(this0); this0o = (__vector unsigned short)VEC_UNPACKLU(this0); outl = vec_add(this0e, this0o); outl = vec_add(outl, pw_bias); outl = vec_sr(outl, pw_one); if (outcol > 8) { next0 = vec_ld(16, inptr); next0 = vec_perm(next0, next0, even_odd_index); next0e = (__vector unsigned short)VEC_UNPACKHU(next0); next0o = (__vector unsigned short)VEC_UNPACKLU(next0); outh = vec_add(next0e, next0o); outh = vec_add(outh, pw_bias); outh = vec_sr(outh, pw_one); } else outh = vec_splat_u16(0); out = vec_pack(outl, outh); vec_st(out, 0, outptr); } } }
METHODDEF void int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v; JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */ JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; JSAMPROW inptr, outptr; INT32 outvalue; h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor; v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor; numpix = h_expand * v_expand; numpix2 = numpix / 2; /* Expand input data enough to let all the output samples be generated * by the standard loop. Special-casing padded output would be more * efficient. */ expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width, output_cols * h_expand); inrow = 0; for(outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; for(outcol = 0, outcol_h = 0; outcol < output_cols; outcol++, outcol_h += h_expand) { outvalue = 0; for(v = 0; v < v_expand; v++) { inptr = input_data[inrow + v] + outcol_h; for(h = 0; h < h_expand; h++) { outvalue += (INT32) GETJSAMPLE(*inptr++); } } *outptr++ = (JSAMPLE)((outvalue + numpix2) / numpix); } inrow += v_expand; } }
METHODDEF void h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { int inrow, outrow; JDIMENSION outcol; JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; register JSAMPROW inptr0, inptr1, outptr; register int bias; /* Expand input data enough to let all the output samples be generated * by the standard loop. Special-casing padded output would be more * efficient. */ expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width, output_cols * 2); inrow = 0; for(outrow = 0; outrow < compptr->v_samp_factor; outrow++) { outptr = output_data[outrow]; inptr0 = input_data[inrow]; inptr1 = input_data[inrow + 1]; bias = 1; /* bias = 1,2,1,2,... for successive samples */ for(outcol = 0; outcol < output_cols; outcol++) { *outptr++ = (JSAMPLE)((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) + bias) >> 2); bias ^= 3; /* 1=>2, 2=>1 */ inptr0 += 2; inptr1 += 2; } inrow += 2; } }
fullsize_smooth_downsample( j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data ) { int inrow; JDIMENSION colctr; JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size; register JSAMPROW inptr, above_ptr, below_ptr, outptr; INT32 membersum, neighsum, memberscale, neighscale; int colsum, lastcolsum, nextcolsum; /* Expand input data enough to let all the output samples be generated * by the standard loop. Special-casing padded output would be more * efficient. */ expand_right_edge( input_data - 1, cinfo->max_v_samp_factor + 2, cinfo->image_width, output_cols ); /* Each of the eight neighbor pixels contributes a fraction SF to the * smoothed pixel, while the main pixel contributes (1-8*SF). In order * to use integer arithmetic, these factors are multiplied by 2^16 = 65536. * Also recall that SF = smoothing_factor / 1024. */ memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */ neighscale = cinfo->smoothing_factor * 64; /* scaled SF */ for( inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++ ) { outptr = output_data[inrow]; inptr = input_data[inrow]; above_ptr = input_data[inrow - 1]; below_ptr = input_data[inrow + 1]; /* Special case for first column */ colsum = GETJSAMPLE( *above_ptr++ ) + GETJSAMPLE( *below_ptr++ ) + GETJSAMPLE( *inptr ); membersum = GETJSAMPLE( *inptr++ ); nextcolsum = GETJSAMPLE( *above_ptr ) + GETJSAMPLE( *below_ptr ) + GETJSAMPLE( *inptr ); neighsum = colsum + ( colsum - membersum ) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = ( JSAMPLE )( ( membersum + 32768 ) >> 16 ); lastcolsum = colsum; colsum = nextcolsum; for( colctr = output_cols - 2; colctr > 0; colctr-- ) { membersum = GETJSAMPLE( *inptr++ ); above_ptr++; below_ptr++; nextcolsum = GETJSAMPLE( *above_ptr ) + GETJSAMPLE( *below_ptr ) + GETJSAMPLE( *inptr ); neighsum = lastcolsum + ( colsum - membersum ) + nextcolsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = ( JSAMPLE )( ( membersum + 32768 ) >> 16 ); lastcolsum = colsum; colsum = nextcolsum; } /* Special case for last column */ membersum = GETJSAMPLE( *inptr ); neighsum = lastcolsum + ( colsum - membersum ) + colsum; membersum = membersum * memberscale + neighsum * neighscale; *outptr = ( JSAMPLE )( ( membersum + 32768 ) >> 16 ); } }
h2v2_smooth_downsample( j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data ) { int inrow, outrow; JDIMENSION colctr; JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size; register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr; INT32 membersum, neighsum, memberscale, neighscale; /* Expand input data enough to let all the output samples be generated * by the standard loop. Special-casing padded output would be more * efficient. */ expand_right_edge( input_data - 1, cinfo->max_v_samp_factor + 2, cinfo->image_width, output_cols * 2 ); /* We don't bother to form the individual "smoothed" input pixel values; * we can directly compute the output which is the average of the four * smoothed values. Each of the four member pixels contributes a fraction * (1-8*SF) to its own smoothed image and a fraction SF to each of the three * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final * output. The four corner-adjacent neighbor pixels contribute a fraction * SF to just one smoothed pixel, or SF/4 to the final output; while the * eight edge-adjacent neighbors contribute SF to each of two smoothed * pixels, or SF/2 overall. In order to use integer arithmetic, these * factors are scaled by 2^16 = 65536. * Also recall that SF = smoothing_factor / 1024. */ memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */ neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */ inrow = outrow = 0; while( inrow < cinfo->max_v_samp_factor ) { outptr = output_data[outrow]; inptr0 = input_data[inrow]; inptr1 = input_data[inrow + 1]; above_ptr = input_data[inrow - 1]; below_ptr = input_data[inrow + 2]; /* Special case for first column: pretend column -1 is same as column 0 */ membersum = GETJSAMPLE( *inptr0 ) + GETJSAMPLE( inptr0[1] ) + GETJSAMPLE( *inptr1 ) + GETJSAMPLE( inptr1[1] ); neighsum = GETJSAMPLE( *above_ptr ) + GETJSAMPLE( above_ptr[1] ) + GETJSAMPLE( *below_ptr ) + GETJSAMPLE( below_ptr[1] ) + GETJSAMPLE( *inptr0 ) + GETJSAMPLE( inptr0[2] ) + GETJSAMPLE( *inptr1 ) + GETJSAMPLE( inptr1[2] ); neighsum += neighsum; neighsum += GETJSAMPLE( *above_ptr ) + GETJSAMPLE( above_ptr[2] ) + GETJSAMPLE( *below_ptr ) + GETJSAMPLE( below_ptr[2] ); membersum = membersum * memberscale + neighsum * neighscale; *outptr++ = ( JSAMPLE )( ( membersum + 32768 ) >> 16 ); inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; for( colctr = output_cols - 2; colctr > 0; colctr-- ) { /* sum of pixels directly mapped to this output element */ membersum = GETJSAMPLE( *inptr0 ) + GETJSAMPLE( inptr0[1] ) + GETJSAMPLE( *inptr1 ) + GETJSAMPLE( inptr1[1] ); /* sum of edge-neighbor pixels */ neighsum = GETJSAMPLE( *above_ptr ) + GETJSAMPLE( above_ptr[1] ) + GETJSAMPLE( *below_ptr ) + GETJSAMPLE( below_ptr[1] ) + GETJSAMPLE( inptr0[-1] ) + GETJSAMPLE( inptr0[2] ) + GETJSAMPLE( inptr1[-1] ) + GETJSAMPLE( inptr1[2] ); /* The edge-neighbors count twice as much as corner-neighbors */ neighsum += neighsum; /* Add in the corner-neighbors */ neighsum += GETJSAMPLE( above_ptr[-1] ) + GETJSAMPLE( above_ptr[2] ) + GETJSAMPLE( below_ptr[-1] ) + GETJSAMPLE( below_ptr[2] ); /* form final output scaled up by 2^16 */ membersum = membersum * memberscale + neighsum * neighscale; /* round, descale and output it */ *outptr++ = ( JSAMPLE )( ( membersum + 32768 ) >> 16 ); inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; } /* Special case for last column */ membersum = GETJSAMPLE( *inptr0 ) + GETJSAMPLE( inptr0[1] ) + GETJSAMPLE( *inptr1 ) + GETJSAMPLE( inptr1[1] ); neighsum = GETJSAMPLE( *above_ptr ) + GETJSAMPLE( above_ptr[1] ) + GETJSAMPLE( *below_ptr ) + GETJSAMPLE( below_ptr[1] ) + GETJSAMPLE( inptr0[-1] ) + GETJSAMPLE( inptr0[1] ) + GETJSAMPLE( inptr1[-1] ) + GETJSAMPLE( inptr1[1] ); neighsum += neighsum; neighsum += GETJSAMPLE( above_ptr[-1] ) + GETJSAMPLE( above_ptr[1] ) + GETJSAMPLE( below_ptr[-1] ) + GETJSAMPLE( below_ptr[1] ); membersum = membersum * memberscale + neighsum * neighscale; *outptr = ( JSAMPLE )( ( membersum + 32768 ) >> 16 ); inrow += 2; outrow++; } }