示例#1
0
/* Generate code for a section of the mask. first is the index we start
 * at, we set last to the index of the last one we use before we run 
 * out of intermediates / constants / parameters / sources or mask
 * coefficients.
 *
 * 0 for success, -1 on error.
 */
static int
vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
{
	VipsVector *v;
	int i;

#ifdef DEBUG_COMPILE
	printf( "starting pass %d\n", pass->first ); 
#endif /*DEBUG_COMPILE*/

	pass->vector = v = vips_vector_new( "reducev", 1 );

	/* We have two destinations: the final output image (8-bit) and the
	 * intermediate buffer if this is not the final pass (16-bit).
	 */
	pass->d2 = vips_vector_destination( v, "d2", 2 );

	/* "r" is the array of sums from the previous pass (if any).
	 */
	pass->r = vips_vector_source_name( v, "r", 2 );

	/* The value we fetch from the image, the accumulated sum.
	 */
	TEMP( "value", 2 );
	TEMP( "sum", 2 );

	/* Init the sum. If this is the first pass, it's a constant. If this
	 * is a later pass, we have to init the sum from the result 
	 * of the previous pass. 
	 */
	if( first ) {
		char c0[256];

		CONST( c0, 0, 2 );
		ASM2( "loadpw", "sum", c0 );
	}
	else 
		ASM2( "loadw", "sum", "r" );

	for( i = pass->first; i < reducev->n_point; i++ ) {
		char source[256];
		char coeff[256];

		SCANLINE( source, i, 1 );

		/* This mask coefficient.
		 */
		vips_snprintf( coeff, 256, "p%d", i );
		pass->p[pass->n_param] = PARAM( coeff, 2 );
		pass->n_param += 1;
		if( pass->n_param >= MAX_PARAM )
			return( -1 );

		/* Mask coefficients are 2.6 bits fixed point. We need to hold
		 * about -0.5 to 1.0, so -2 to +1.999 is as close as we can
		 * get. 
		 *
		 * We need a signed multiply, so the image pixel needs to
		 * become a signed 16-bit value. We know only the bottom 8 bits
		 * of the image and coefficient are interesting, so we can take
		 * the bottom bits of a 16x16->32 multiply. 
		 *
		 * We accumulate the signed 16-bit result in sum.
		 */
		ASM2( "convubw", "value", source );
		ASM3( "mullw", "value", "value", coeff );
		ASM3( "addssw", "sum", "sum", "value" );

		/* We've used this coeff.
		 */
		pass->last = i;

		if( vips_vector_full( v ) )
			break;

		/* orc 0.4.24 and earlier hate more than about five lines at
		 * once :( 
		 */
		if( i - pass->first > 3 )
			break;
	}

	/* If this is the end of the mask, we write the 8-bit result to the
	 * image, otherwise write the 16-bit intermediate to our temp buffer. 
	 */
	if( pass->last >= reducev->n_point - 1 ) {
		char c32[256];
		char c6[256];
		char c0[256];
		char c255[256];

		CONST( c32, 32, 2 );
		ASM3( "addw", "sum", "sum", c32 );
		CONST( c6, 6, 2 );
		ASM3( "shrsw", "sum", "sum", c6 );

		/* You'd think "convsuswb", convert signed 16-bit to unsigned
		 * 8-bit with saturation, would be quicker, but it's a lot
		 * slower.
		 */
		CONST( c0, 0, 2 );
		ASM3( "maxsw", "sum", c0, "sum" ); 
		CONST( c255, 255, 2 );
		ASM3( "minsw", "sum", c255, "sum" ); 

		ASM2( "convwb", "d1", "sum" );
	}
	else 
		ASM2( "copyw", "d2", "sum" );

	if( !vips_vector_compile( v ) ) 
		return( -1 );

#ifdef DEBUG_COMPILE
	printf( "done coeffs %d to %d\n", pass->first, pass->last );
	vips_vector_print( v );
#endif /*DEBUG_COMPILE*/

	return( 0 );
}
示例#2
0
/* Generate code for a section of the mask. first is the index we start
 * at, we set last to the index of the last one we use before we run 
 * out of intermediates / constants / parameters / sources or mask
 * coefficients.
 *
 * 0 for success, -1 on error.
 */
static int
pass_compile_section( Pass *pass, Morph *morph, gboolean first_pass )
{
	INTMASK *mask = morph->mask;
	const int n_mask = mask->xsize * mask->ysize; 

	VipsVector *v;
	char offset[256];
	char source[256];
	char zero[256];
	char one[256];
	int i;

	pass->vector = v = vips_vector_new( "morph", 1 );

	/* The value we fetch from the image, the accumulated sum.
	 */
	TEMP( "value", 1 );
	TEMP( "sum", 1 );

	CONST( zero, 0, 1 );
	CONST( one, 255, 1 );

	/* Init the sum. If this is the first pass, it's a constant. If this
	 * is a later pass, we have to init the sum from the result 
	 * of the previous pass. 
	 */
	if( first_pass ) {
		if( morph->op == DILATE )
			ASM2( "copyb", "sum", zero );
		else
			ASM2( "copyb", "sum", one );
	}
	else {
		/* "r" is the result of the previous pass. 
		 */
		pass->r = vips_vector_source_name( v, "r", 1 );
		ASM2( "loadb", "sum", "r" );
	}

	for( i = pass->first; i < n_mask; i++ ) {
		int x = i % mask->xsize;
		int y = i / mask->xsize;

		/* Exclude don't-care elements.
		 */
		if( mask->coeff[i] == 128 )
			continue;

		/* The source. sl0 is the first scanline in the mask.
		 */
		SCANLINE( source, y, 1 );

		/* The offset, only for non-first-columns though.
		 */
		if( x > 0 ) {
			CONST( offset, morph->in->Bands * x, 1 );
			ASM3( "loadoffb", "value", source, offset );
		}
		else
			ASM2( "loadb", "value", source );

		/* Join to our sum. If the mask element is zero, we have to
		 * add an extra negate.
		 */
		if( morph->op == DILATE ) {
			if( !mask->coeff[i] ) 
				ASM3( "xorb", "value", "value", one );
			ASM3( "orb", "sum", "sum", "value" );
		}
		else {
			if( !mask->coeff[i] ) 
				ASM3( "andnb", "sum", "sum", "value" );
			else
				ASM3( "andb", "sum", "sum", "value" );
		}

		if( vips_vector_full( v ) )
			break;
	}

	pass->last = i;

	ASM2( "copyb", "d1", "sum" );

	if( !vips_vector_compile( v ) ) 
		return( -1 );

#ifdef DEBUG
	printf( "done matrix coeffs %d to %d\n", pass->first, pass->last );
	vips_vector_print( v );
#endif /*DEBUG*/

	return( 0 );
}