VipsVector * vips_vector_new( const char *name, int dsize ) { static int vector_number = 0; VipsVector *vector; int i; if( !(vector = VIPS_NEW( NULL, VipsVector )) ) return( NULL ); vector->name = name; vector->unique_name = g_strdup_printf( "p[%d]", vector_number++ ); vector->n_temp = 0; vector->n_scanline = 0; vector->n_source = 0; vector->n_destination = 0; vector->n_constant = 0; vector->n_parameter = 0; vector->n_instruction = 0; for( i = 0; i < VIPS_VECTOR_SOURCE_MAX; i++ ) { vector->s[i] = -1; vector->sl[i] = -1; } vector->d1 = -1; vector->compiled = FALSE; #ifdef HAVE_ORC vector->program = orc_program_new(); #ifdef DEBUG_TRACE printf( "%s = orc_program_new();\n", vector->unique_name ); #endif /*DEBUG_TRACE*/ #endif /*HAVE_ORC*/ /* We always make d1, our callers make either a single point source, or * for area ops, a set of scanlines. * * Don't check error return. orc uses 0 to mean error, but the first * var you create will have id 0 :-( The first var is unlikely to fail * anyway. */ vector->d1 = vips_vector_destination( vector, "d1", dsize ); return( vector ); }
/* Generate code for a section of the mask. first is the index we start * at, we set last to the index of the last one we use before we run * out of intermediates / constants / parameters / sources or mask * coefficients. * * 0 for success, -1 on error. */ static int vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first ) { VipsVector *v; int i; #ifdef DEBUG_COMPILE printf( "starting pass %d\n", pass->first ); #endif /*DEBUG_COMPILE*/ pass->vector = v = vips_vector_new( "reducev", 1 ); /* We have two destinations: the final output image (8-bit) and the * intermediate buffer if this is not the final pass (16-bit). */ pass->d2 = vips_vector_destination( v, "d2", 2 ); /* "r" is the array of sums from the previous pass (if any). */ pass->r = vips_vector_source_name( v, "r", 2 ); /* The value we fetch from the image, the accumulated sum. */ TEMP( "value", 2 ); TEMP( "sum", 2 ); /* Init the sum. If this is the first pass, it's a constant. If this * is a later pass, we have to init the sum from the result * of the previous pass. */ if( first ) { char c0[256]; CONST( c0, 0, 2 ); ASM2( "loadpw", "sum", c0 ); } else ASM2( "loadw", "sum", "r" ); for( i = pass->first; i < reducev->n_point; i++ ) { char source[256]; char coeff[256]; SCANLINE( source, i, 1 ); /* This mask coefficient. */ vips_snprintf( coeff, 256, "p%d", i ); pass->p[pass->n_param] = PARAM( coeff, 2 ); pass->n_param += 1; if( pass->n_param >= MAX_PARAM ) return( -1 ); /* Mask coefficients are 2.6 bits fixed point. We need to hold * about -0.5 to 1.0, so -2 to +1.999 is as close as we can * get. * * We need a signed multiply, so the image pixel needs to * become a signed 16-bit value. We know only the bottom 8 bits * of the image and coefficient are interesting, so we can take * the bottom bits of a 16x16->32 multiply. * * We accumulate the signed 16-bit result in sum. */ ASM2( "convubw", "value", source ); ASM3( "mullw", "value", "value", coeff ); ASM3( "addssw", "sum", "sum", "value" ); /* We've used this coeff. */ pass->last = i; if( vips_vector_full( v ) ) break; /* orc 0.4.24 and earlier hate more than about five lines at * once :( */ if( i - pass->first > 3 ) break; } /* If this is the end of the mask, we write the 8-bit result to the * image, otherwise write the 16-bit intermediate to our temp buffer. */ if( pass->last >= reducev->n_point - 1 ) { char c32[256]; char c6[256]; char c0[256]; char c255[256]; CONST( c32, 32, 2 ); ASM3( "addw", "sum", "sum", c32 ); CONST( c6, 6, 2 ); ASM3( "shrsw", "sum", "sum", c6 ); /* You'd think "convsuswb", convert signed 16-bit to unsigned * 8-bit with saturation, would be quicker, but it's a lot * slower. */ CONST( c0, 0, 2 ); ASM3( "maxsw", "sum", c0, "sum" ); CONST( c255, 255, 2 ); ASM3( "minsw", "sum", c255, "sum" ); ASM2( "convwb", "d1", "sum" ); } else ASM2( "copyw", "d2", "sum" ); if( !vips_vector_compile( v ) ) return( -1 ); #ifdef DEBUG_COMPILE printf( "done coeffs %d to %d\n", pass->first, pass->last ); vips_vector_print( v ); #endif /*DEBUG_COMPILE*/ return( 0 ); }