// TODO(egouriou): Take advantage of periods in the convolution. // Practical resizing filters are periodic outside of the border area. // For Lanczos, a scaling by a (reduced) factor of p/q (q pixels in the // source become p pixels in the destination) will have a period of p. // A nice consequence is a period of 1 when downscaling by an integral // factor. Downscaling from typical display resolutions is also bound // to produce interesting periods as those are chosen to have multiple // small factors. // Small periods reduce computational load and improve cache usage if // the coefficients can be shared. For periods of 1 we can consider // loading the factors only once outside the borders. void SkResizeFilter::computeFilters(int srcSize, float destSubsetLo, float destSubsetSize, float scale, SkConvolutionFilter1D* output, const SkConvolutionProcs& convolveProcs) { float destSubsetHi = destSubsetLo + destSubsetSize; // [lo, hi) // When we're doing a magnification, the scale will be larger than one. This // means the destination pixels are much smaller than the source pixels, and // that the range covered by the filter won't necessarily cover any source // pixel boundaries. Therefore, we use these clamped values (max of 1) for // some computations. float clampedScale = SkTMin(1.0f, scale); // This is how many source pixels from the center we need to count // to support the filtering function. float srcSupport = fBitmapFilter->width() / clampedScale; // Speed up the divisions below by turning them into multiplies. float invScale = 1.0f / scale; SkTArray<float> filterValues(64); SkTArray<short> fixedFilterValues(64); // Loop over all pixels in the output range. We will generate one set of // filter values for each one. Those values will tell us how to blend the // source pixels to compute the destination pixel. for (int destSubsetI = SkScalarFloorToInt(destSubsetLo); destSubsetI < SkScalarCeilToInt(destSubsetHi); destSubsetI++) { // Reset the arrays. We don't declare them inside so they can re-use the // same malloc-ed buffer. filterValues.reset(); fixedFilterValues.reset(); // This is the pixel in the source directly under the pixel in the dest. // Note that we base computations on the "center" of the pixels. To see // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x // downscale should "cover" the pixels around the pixel with *its center* // at coordinates (2.5, 2.5) in the source, not those around (0, 0). // Hence we need to scale coordinates (0.5, 0.5), not (0, 0). float srcPixel = (static_cast<float>(destSubsetI) + 0.5f) * invScale; // Compute the (inclusive) range of source pixels the filter covers. int srcBegin = SkTMax(0, SkScalarFloorToInt(srcPixel - srcSupport)); int srcEnd = SkTMin(srcSize - 1, SkScalarCeilToInt(srcPixel + srcSupport)); // Compute the unnormalized filter value at each location of the source // it covers. float filterSum = 0.0f; // Sub of the filter values for normalizing. for (int curFilterPixel = srcBegin; curFilterPixel <= srcEnd; curFilterPixel++) { // Distance from the center of the filter, this is the filter coordinate // in source space. We also need to consider the center of the pixel // when comparing distance against 'srcPixel'. In the 5x downscale // example used above the distance from the center of the filter to // the pixel with coordinates (2, 2) should be 0, because its center // is at (2.5, 2.5). float srcFilterDist = ((static_cast<float>(curFilterPixel) + 0.5f) - srcPixel); // Since the filter really exists in dest space, map it there. float destFilterDist = srcFilterDist * clampedScale; // Compute the filter value at that location. float filterValue = fBitmapFilter->evaluate(destFilterDist); filterValues.push_back(filterValue); filterSum += filterValue; } SkASSERT(!filterValues.empty()); // The filter must be normalized so that we don't affect the brightness of // the image. Convert to normalized fixed point. short fixedSum = 0; for (int i = 0; i < filterValues.count(); i++) { short curFixed = output->FloatToFixed(filterValues[i] / filterSum); fixedSum += curFixed; fixedFilterValues.push_back(curFixed); } // The conversion to fixed point will leave some rounding errors, which // we add back in to avoid affecting the brightness of the image. We // arbitrarily add this to the center of the filter array (this won't always // be the center of the filter function since it could get clipped on the // edges, but it doesn't matter enough to worry about that case). short leftovers = output->FloatToFixed(1.0f) - fixedSum; fixedFilterValues[fixedFilterValues.count() / 2] += leftovers; // Now it's ready to go. output->AddFilter(srcBegin, &fixedFilterValues[0], static_cast<int>(fixedFilterValues.count())); } if (convolveProcs.fApplySIMDPadding) { convolveProcs.fApplySIMDPadding( output ); } }
// TODO(egouriou): Take advantage of periods in the convolution. // Practical resizing filters are periodic outside of the border area. // For Lanczos, a scaling by a (reduced) factor of p/q (q pixels in the // source become p pixels in the destination) will have a period of p. // A nice consequence is a period of 1 when downscaling by an integral // factor. Downscaling from typical display resolutions is also bound // to produce interesting periods as those are chosen to have multiple // small factors. // Small periods reduce computational load and improve cache usage if // the coefficients can be shared. For periods of 1 we can consider // loading the factors only once outside the borders. void SkResizeFilter::computeFilters(int srcSize, float destSubsetLo, float destSubsetSize, float scale, SkConvolutionFilter1D* output, const SkConvolutionProcs& convolveProcs) { float destSubsetHi = destSubsetLo + destSubsetSize; // [lo, hi) // When we're doing a magnification, the scale will be larger than one. This // means the destination pixels are much smaller than the source pixels, and // that the range covered by the filter won't necessarily cover any source // pixel boundaries. Therefore, we use these clamped values (max of 1) for // some computations. float clampedScale = SkTMin(1.0f, scale); // This is how many source pixels from the center we need to count // to support the filtering function. float srcSupport = fBitmapFilter->width() / clampedScale; float invScale = 1.0f / scale; SkSTArray<64, float, true> filterValuesArray; SkSTArray<64, SkConvolutionFilter1D::ConvolutionFixed, true> fixedFilterValuesArray; // Loop over all pixels in the output range. We will generate one set of // filter values for each one. Those values will tell us how to blend the // source pixels to compute the destination pixel. // This is the pixel in the source directly under the pixel in the dest. // Note that we base computations on the "center" of the pixels. To see // why, observe that the destination pixel at coordinates (0, 0) in a 5.0x // downscale should "cover" the pixels around the pixel with *its center* // at coordinates (2.5, 2.5) in the source, not those around (0, 0). // Hence we need to scale coordinates (0.5, 0.5), not (0, 0). destSubsetLo = SkScalarFloorToScalar(destSubsetLo); destSubsetHi = SkScalarCeilToScalar(destSubsetHi); float srcPixel = (destSubsetLo + 0.5f) * invScale; int destLimit = SkScalarTruncToInt(destSubsetHi - destSubsetLo); output->reserveAdditional(destLimit, SkScalarCeilToInt(destLimit * srcSupport * 2)); for (int destI = 0; destI < destLimit; srcPixel += invScale, destI++) { // Compute the (inclusive) range of source pixels the filter covers. float srcBegin = SkTMax(0.f, SkScalarFloorToScalar(srcPixel - srcSupport)); float srcEnd = SkTMin(srcSize - 1.f, SkScalarCeilToScalar(srcPixel + srcSupport)); // Compute the unnormalized filter value at each location of the source // it covers. // Sum of the filter values for normalizing. // Distance from the center of the filter, this is the filter coordinate // in source space. We also need to consider the center of the pixel // when comparing distance against 'srcPixel'. In the 5x downscale // example used above the distance from the center of the filter to // the pixel with coordinates (2, 2) should be 0, because its center // is at (2.5, 2.5). float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale; int filterCount = SkScalarTruncToInt(srcEnd - srcBegin) + 1; if (filterCount <= 0) { // true when srcSize is equal to srcPixel - srcSupport; this may be a bug return; } filterValuesArray.reset(filterCount); float filterSum = fBitmapFilter->evaluate_n(destFilterDist, clampedScale, filterCount, filterValuesArray.begin()); // The filter must be normalized so that we don't affect the brightness of // the image. Convert to normalized fixed point. int fixedSum = 0; fixedFilterValuesArray.reset(filterCount); const float* filterValues = filterValuesArray.begin(); SkConvolutionFilter1D::ConvolutionFixed* fixedFilterValues = fixedFilterValuesArray.begin(); float invFilterSum = 1 / filterSum; for (int fixedI = 0; fixedI < filterCount; fixedI++) { int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] * invFilterSum); fixedSum += curFixed; fixedFilterValues[fixedI] = SkToS16(curFixed); } SkASSERT(fixedSum <= 0x7FFF); // The conversion to fixed point will leave some rounding errors, which // we add back in to avoid affecting the brightness of the image. We // arbitrarily add this to the center of the filter array (this won't always // be the center of the filter function since it could get clipped on the // edges, but it doesn't matter enough to worry about that case). int leftovers = SkConvolutionFilter1D::FloatToFixed(1) - fixedSum; fixedFilterValues[filterCount / 2] += leftovers; // Now it's ready to go. output->AddFilter(SkScalarFloorToInt(srcBegin), fixedFilterValues, filterCount); } if (convolveProcs.fApplySIMDPadding) { convolveProcs.fApplySIMDPadding(output); } }
void BGRAConvolve2D(const unsigned char* sourceData, int sourceByteRowStride, bool sourceHasAlpha, const SkConvolutionFilter1D& filterX, const SkConvolutionFilter1D& filterY, int outputByteRowStride, unsigned char* output, const SkConvolutionProcs& convolveProcs, bool useSimdIfPossible) { int maxYFilterSize = filterY.maxFilter(); // The next row in the input that we will generate a horizontally // convolved row for. If the filter doesn't start at the beginning of the // image (this is the case when we are only resizing a subset), then we // don't want to generate any output rows before that. Compute the starting // row for convolution as the first pixel for the first vertical filter. int filterOffset, filterLength; const SkConvolutionFilter1D::ConvolutionFixed* filterValues = filterY.FilterForValue(0, &filterOffset, &filterLength); int nextXRow = filterOffset; // We loop over each row in the input doing a horizontal convolution. This // will result in a horizontally convolved image. We write the results into // a circular buffer of convolved rows and do vertical convolution as rows // are available. This prevents us from having to store the entire // intermediate image and helps cache coherency. // We will need four extra rows to allow horizontal convolution could be done // simultaneously. We also pad each row in row buffer to be aligned-up to // 16 bytes. // TODO(jiesun): We do not use aligned load from row buffer in vertical // convolution pass yet. Somehow Windows does not like it. int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; int rowBufferHeight = maxYFilterSize + (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0); CircularRowBuffer rowBuffer(rowBufferWidth, rowBufferHeight, filterOffset); // Loop over every possible output row, processing just enough horizontal // convolutions to run each subsequent vertical convolution. SkASSERT(outputByteRowStride >= filterX.numValues() * 4); int numOutputRows = filterY.numValues(); // We need to check which is the last line to convolve before we advance 4 // lines in one iteration. int lastFilterOffset, lastFilterLength; // SSE2 can access up to 3 extra pixels past the end of the // buffer. At the bottom of the image, we have to be careful // not to access data past the end of the buffer. Normally // we fall back to the C++ implementation for the last row. // If the last row is less than 3 pixels wide, we may have to fall // back to the C++ version for more rows. Compute how many // rows we need to avoid the SSE implementation for here. filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, &lastFilterLength); int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads / (lastFilterOffset + lastFilterLength); filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, &lastFilterLength); for (int outY = 0; outY < numOutputRows; outY++) { filterValues = filterY.FilterForValue(outY, &filterOffset, &filterLength); // Generate output rows until we have enough to run the current filter. while (nextXRow < filterOffset + filterLength) { if (convolveProcs.fConvolve4RowsHorizontally && nextXRow + 3 < lastFilterOffset + lastFilterLength - avoidSimdRows) { const unsigned char* src[4]; unsigned char* outRow[4]; for (int i = 0; i < 4; ++i) { src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride]; outRow[i] = rowBuffer.advanceRow(); } convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4*rowBufferWidth); nextXRow += 4; } else { // Check if we need to avoid SSE2 for this row. if (convolveProcs.fConvolveHorizontally && nextXRow < lastFilterOffset + lastFilterLength - avoidSimdRows) { convolveProcs.fConvolveHorizontally( &sourceData[(uint64_t)nextXRow * sourceByteRowStride], filterX, rowBuffer.advanceRow(), sourceHasAlpha); } else { if (sourceHasAlpha) { ConvolveHorizontallyAlpha( &sourceData[(uint64_t)nextXRow * sourceByteRowStride], filterX, rowBuffer.advanceRow()); } else { ConvolveHorizontallyNoAlpha( &sourceData[(uint64_t)nextXRow * sourceByteRowStride], filterX, rowBuffer.advanceRow()); } } nextXRow++; } } // Compute where in the output image this row of final data will go. unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride]; // Get the list of rows that the circular buffer has, in order. int firstRowInCircularBuffer; unsigned char* const* rowsToConvolve = rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); // Now compute the start of the subset of those rows that the filter // needs. unsigned char* const* firstRowForFilter = &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; if (convolveProcs.fConvolveVertically) { convolveProcs.fConvolveVertically(filterValues, filterLength, firstRowForFilter, filterX.numValues(), curOutputRow, sourceHasAlpha); } else { ConvolveVertically(filterValues, filterLength, firstRowForFilter, filterX.numValues(), curOutputRow, sourceHasAlpha); } } }