/* Generate code for a section of the mask. first is the index we start * at, we set last to the index of the last one we use before we run * out of intermediates / constants / parameters / sources or mask * coefficients. * * 0 for success, -1 on error. */ static int vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first ) { VipsVector *v; int i; #ifdef DEBUG_COMPILE printf( "starting pass %d\n", pass->first ); #endif /*DEBUG_COMPILE*/ pass->vector = v = vips_vector_new( "reducev", 1 ); /* We have two destinations: the final output image (8-bit) and the * intermediate buffer if this is not the final pass (16-bit). */ pass->d2 = vips_vector_destination( v, "d2", 2 ); /* "r" is the array of sums from the previous pass (if any). */ pass->r = vips_vector_source_name( v, "r", 2 ); /* The value we fetch from the image, the accumulated sum. */ TEMP( "value", 2 ); TEMP( "sum", 2 ); /* Init the sum. If this is the first pass, it's a constant. If this * is a later pass, we have to init the sum from the result * of the previous pass. */ if( first ) { char c0[256]; CONST( c0, 0, 2 ); ASM2( "loadpw", "sum", c0 ); } else ASM2( "loadw", "sum", "r" ); for( i = pass->first; i < reducev->n_point; i++ ) { char source[256]; char coeff[256]; SCANLINE( source, i, 1 ); /* This mask coefficient. */ vips_snprintf( coeff, 256, "p%d", i ); pass->p[pass->n_param] = PARAM( coeff, 2 ); pass->n_param += 1; if( pass->n_param >= MAX_PARAM ) return( -1 ); /* Mask coefficients are 2.6 bits fixed point. We need to hold * about -0.5 to 1.0, so -2 to +1.999 is as close as we can * get. * * We need a signed multiply, so the image pixel needs to * become a signed 16-bit value. We know only the bottom 8 bits * of the image and coefficient are interesting, so we can take * the bottom bits of a 16x16->32 multiply. * * We accumulate the signed 16-bit result in sum. */ ASM2( "convubw", "value", source ); ASM3( "mullw", "value", "value", coeff ); ASM3( "addssw", "sum", "sum", "value" ); /* We've used this coeff. */ pass->last = i; if( vips_vector_full( v ) ) break; /* orc 0.4.24 and earlier hate more than about five lines at * once :( */ if( i - pass->first > 3 ) break; } /* If this is the end of the mask, we write the 8-bit result to the * image, otherwise write the 16-bit intermediate to our temp buffer. */ if( pass->last >= reducev->n_point - 1 ) { char c32[256]; char c6[256]; char c0[256]; char c255[256]; CONST( c32, 32, 2 ); ASM3( "addw", "sum", "sum", c32 ); CONST( c6, 6, 2 ); ASM3( "shrsw", "sum", "sum", c6 ); /* You'd think "convsuswb", convert signed 16-bit to unsigned * 8-bit with saturation, would be quicker, but it's a lot * slower. */ CONST( c0, 0, 2 ); ASM3( "maxsw", "sum", c0, "sum" ); CONST( c255, 255, 2 ); ASM3( "minsw", "sum", c255, "sum" ); ASM2( "convwb", "d1", "sum" ); } else ASM2( "copyw", "d2", "sum" ); if( !vips_vector_compile( v ) ) return( -1 ); #ifdef DEBUG_COMPILE printf( "done coeffs %d to %d\n", pass->first, pass->last ); vips_vector_print( v ); #endif /*DEBUG_COMPILE*/ return( 0 ); }
/* Generate code for a section of the mask. first is the index we start * at, we set last to the index of the last one we use before we run * out of intermediates / constants / parameters / sources or mask * coefficients. * * 0 for success, -1 on error. */ static int pass_compile_section( Pass *pass, Morph *morph, gboolean first_pass ) { INTMASK *mask = morph->mask; const int n_mask = mask->xsize * mask->ysize; VipsVector *v; char offset[256]; char source[256]; char zero[256]; char one[256]; int i; pass->vector = v = vips_vector_new( "morph", 1 ); /* The value we fetch from the image, the accumulated sum. */ TEMP( "value", 1 ); TEMP( "sum", 1 ); CONST( zero, 0, 1 ); CONST( one, 255, 1 ); /* Init the sum. If this is the first pass, it's a constant. If this * is a later pass, we have to init the sum from the result * of the previous pass. */ if( first_pass ) { if( morph->op == DILATE ) ASM2( "copyb", "sum", zero ); else ASM2( "copyb", "sum", one ); } else { /* "r" is the result of the previous pass. */ pass->r = vips_vector_source_name( v, "r", 1 ); ASM2( "loadb", "sum", "r" ); } for( i = pass->first; i < n_mask; i++ ) { int x = i % mask->xsize; int y = i / mask->xsize; /* Exclude don't-care elements. */ if( mask->coeff[i] == 128 ) continue; /* The source. sl0 is the first scanline in the mask. */ SCANLINE( source, y, 1 ); /* The offset, only for non-first-columns though. */ if( x > 0 ) { CONST( offset, morph->in->Bands * x, 1 ); ASM3( "loadoffb", "value", source, offset ); } else ASM2( "loadb", "value", source ); /* Join to our sum. If the mask element is zero, we have to * add an extra negate. */ if( morph->op == DILATE ) { if( !mask->coeff[i] ) ASM3( "xorb", "value", "value", one ); ASM3( "orb", "sum", "sum", "value" ); } else { if( !mask->coeff[i] ) ASM3( "andnb", "sum", "sum", "value" ); else ASM3( "andb", "sum", "sum", "value" ); } if( vips_vector_full( v ) ) break; } pass->last = i; ASM2( "copyb", "d1", "sum" ); if( !vips_vector_compile( v ) ) return( -1 ); #ifdef DEBUG printf( "done matrix coeffs %d to %d\n", pass->first, pass->last ); vips_vector_print( v ); #endif /*DEBUG*/ return( 0 ); }