void SkMatrixConvolutionImageFilter::filterPixels(const SkBitmap& src, SkBitmap* result, const SkIRect& rect) {
    for (int y = rect.fTop; y < rect.fBottom; ++y) {
        SkPMColor* dptr = result->getAddr32(rect.fLeft, y);
        for (int x = rect.fLeft; x < rect.fRight; ++x) {
            SkScalar sumA = 0, sumR = 0, sumG = 0, sumB = 0;
            for (int cy = 0; cy < fKernelSize.fHeight; cy++) {
                for (int cx = 0; cx < fKernelSize.fWidth; cx++) {
                    SkPMColor s = PixelFetcher::fetch(src, x + cx - fTarget.fX, y + cy - fTarget.fY);
                    SkScalar k = fKernel[cy * fKernelSize.fWidth + cx];
                    if (convolveAlpha) {
                        sumA += SkScalarMul(SkIntToScalar(SkGetPackedA32(s)), k);
                    }
                    sumR += SkScalarMul(SkIntToScalar(SkGetPackedR32(s)), k);
                    sumG += SkScalarMul(SkIntToScalar(SkGetPackedG32(s)), k);
                    sumB += SkScalarMul(SkIntToScalar(SkGetPackedB32(s)), k);
                }
            }
            int a = convolveAlpha
                    ? SkClampMax(SkScalarFloorToInt(SkScalarMul(sumA, fGain) + fBias), 255)
                    : 255;
            int r = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumR, fGain) + fBias), a);
            int g = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumG, fGain) + fBias), a);
            int b = SkClampMax(SkScalarFloorToInt(SkScalarMul(sumB, fGain) + fBias), a);
            if (!convolveAlpha) {
                a = SkGetPackedA32(PixelFetcher::fetch(src, x, y));
                *dptr++ = SkPreMultiplyARGB(a, r, g, b);
            } else {
                *dptr++ = SkPackARGB32(a, r, g, b);
            }
        }
    }
}
Exemple #2
0
/**
 * This is the path for apply_kernel_interp() to be taken when the kernel
 * is wider than the source image.
 */
static void kernel_interp_clamped(uint8_t dst[], int rx, int ry,
                                  const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
    SkASSERT(2*rx > sw);

    int innerWeight = 255 - outerWeight;

    // round these guys up if they're bigger than 127
    outerWeight += outerWeight >> 7;
    innerWeight += innerWeight >> 7;

    uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
    uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));

    int sumStride = sw + 1;

    int dw = sw + 2*rx;
    int dh = sh + 2*ry;

    int prev_y = -2*ry;
    int next_y = 1;

    for (int y = 0; y < dh; ++y) {
        int py = SkClampPos(prev_y) * sumStride;
        int ny = SkFastMin32(next_y, sh) * sumStride;

        int ipy = SkClampPos(prev_y + 1) * sumStride;
        int iny = SkClampMax(next_y - 1, sh) * sumStride;

        int prev_x = -2*rx;
        int next_x = 1;

        for (int x = 0; x < dw; ++x) {
            int px = SkClampPos(prev_x);
            int nx = SkFastMin32(next_x, sw);

            int ipx = SkClampPos(prev_x + 1);
            int inx = SkClampMax(next_x - 1, sw);

            uint32_t outerSum = sum[px+py] + sum[nx+ny]
                                - sum[nx+py] - sum[px+ny];
            uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
                                - sum[inx+ipy] - sum[ipx+iny];
            *dst++ = SkToU8((outerSum * outerScale
                             + innerSum * innerScale) >> 24);

            prev_x += 1;
            next_x += 1;
        }
        prev_y += 1;
        next_y += 1;
    }
}
Exemple #3
0
// Perform a brute force convolution of a step function with a Gaussian.
// Return the right half in 'result'
static void brute_force_1d(SkScalar stepMin, SkScalar stepMax,
                           SkScalar gaussianSigma,
                           int* result, int resultCount) {

    int gaussianRange = SkScalarCeilToInt(10 * gaussianSigma);

    for (int i = 0; i < resultCount; ++i) {
        SkScalar sum = 0.0f;
        for (int j = -gaussianRange; j < gaussianRange; ++j) {
            sum += gaussian(j, gaussianSigma) * step(i-j, stepMin, stepMax);
        }

        result[i] = SkClampMax(SkClampPos(int(sum + 0.5f)), 255);
    }
}
bool SkAnimatorScript::EvalRGB(const char* function, size_t len, SkTDArray<SkScriptValue>& params,
        void* eng, SkScriptValue* value) {
    if (SK_LITERAL_STR_EQUAL("rgb", function, len) == false)
        return false;
    if (params.count() != 3)
        return false;
    SkScriptEngine* engine = (SkScriptEngine*) eng;
    unsigned result = 0xFF000000;
    int shift = 16;
    for (SkScriptValue* valuePtr = params.begin(); valuePtr < params.end(); valuePtr++) {
        engine->convertTo(SkType_Int, valuePtr);
        result |= SkClampMax(valuePtr->fOperand.fS32, 255) << shift;
        shift -= 8;
    }
    value->fOperand.fS32 = result;
    value->fType = SkType_Int;
    return true;
}
Exemple #5
0
// One-stop-shop shader for,
//   - nearest-neighbor sampling (_nofilter_),
//   - clamp tiling in X and Y both (Clamp_),
//   - with at most a scale and translate matrix (_DX_),
//   - and no extra alpha applied (_opaque_),
//   - sampling from 8888 (_S32_) and drawing to 8888 (_S32_).
static void Clamp_S32_opaque_D32_nofilter_DX_shaderproc(const void* sIn, int x, int y,
                                                        SkPMColor* dst, int count) {
    const SkBitmapProcState& s = *static_cast<const SkBitmapProcState*>(sIn);
    SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
                             SkMatrix::kScale_Mask)) == 0);
    SkASSERT(s.fAlphaScale == 256);

    const unsigned maxX = s.fPixmap.width() - 1;
    SkFractionalInt fx;
    int dstY;
    {
        const SkBitmapProcStateAutoMapper mapper(s, x, y);
        const unsigned maxY = s.fPixmap.height() - 1;
        dstY = SkClampMax(mapper.intY(), maxY);
        fx = mapper.fractionalIntX();
    }

    const SkPMColor* src = s.fPixmap.addr32(0, dstY);
    const SkFractionalInt dx = s.fInvSxFractionalInt;

    // Check if we're safely inside [0...maxX] so no need to clamp each computed index.
    //
    if ((uint64_t)SkFractionalIntToInt(fx) <= maxX &&
        (uint64_t)SkFractionalIntToInt(fx + dx * (count - 1)) <= maxX)
    {
        int count4 = count >> 2;
        for (int i = 0; i < count4; ++i) {
            SkPMColor src0 = src[SkFractionalIntToInt(fx)]; fx += dx;
            SkPMColor src1 = src[SkFractionalIntToInt(fx)]; fx += dx;
            SkPMColor src2 = src[SkFractionalIntToInt(fx)]; fx += dx;
            SkPMColor src3 = src[SkFractionalIntToInt(fx)]; fx += dx;
            dst[0] = src0;
            dst[1] = src1;
            dst[2] = src2;
            dst[3] = src3;
            dst += 4;
        }
        for (int i = (count4 << 2); i < count; ++i) {
            unsigned index = SkFractionalIntToInt(fx);
            SkASSERT(index <= maxX);
            *dst++ = src[index];
            fx += dx;
        }
    } else {
static SkBitmap indexed_bitmap() {
    SkBitmap n32bitmap;
    n32bitmap.allocN32Pixels(SCALE, SCALE);
    n32bitmap.eraseColor(SK_ColorTRANSPARENT);

    SkCanvas canvas(n32bitmap);
    color_wheel_native(&canvas);
    const SkColor colors[] = {
            SK_ColorTRANSPARENT,
            SK_ColorWHITE,
            SK_ColorBLACK,
            SK_ColorRED,
            SK_ColorGREEN,
            SK_ColorBLUE,
            SK_ColorCYAN,
            SK_ColorMAGENTA,
            SK_ColorYELLOW,
    };
    SkPMColor pmColors[SK_ARRAY_COUNT(colors)];
    for (size_t i = 0; i < SK_ARRAY_COUNT(colors); ++i) {
        pmColors[i] = premultiply_color(colors[i]);
    }
    SkBitmap bm;
    SkAutoTUnref<SkColorTable> ctable(new SkColorTable(pmColors, SK_ARRAY_COUNT(pmColors)));
    SkImageInfo info = SkImageInfo::Make(SCALE, SCALE, kIndex_8_SkColorType,
                                         kPremul_SkAlphaType);
    bm.allocPixels(info, nullptr, ctable);
    SkAutoLockPixels autoLockPixels1(n32bitmap);
    SkAutoLockPixels autoLockPixels2(bm);
    for (int y = 0; y < SCALE; ++y) {
        for (int x = 0; x < SCALE; ++x) {
            SkPMColor c = *n32bitmap.getAddr32(x, y);
            int idx = find(pmColors, SK_ARRAY_COUNT(pmColors), c);
            *bm.getAddr8(x, y) = SkClampMax(idx, SK_ARRAY_COUNT(pmColors) - 1);
        }
    }
    return bm;
}
        SkScalar fy, SkScalar dy,
        SkScalar b, SkScalar db,
        SkScalar fSr2D2, SkScalar foura, SkScalar fOneOverTwoA, bool posRoot,
        SkPMColor* SK_RESTRICT dstC, const SkPMColor* SK_RESTRICT cache,
        int count);

void shadeSpan_twopoint_clamp(SkScalar fx, SkScalar dx,
        SkScalar fy, SkScalar dy,
        SkScalar b, SkScalar db,
        SkScalar fSr2D2, SkScalar foura, SkScalar fOneOverTwoA, bool posRoot,
        SkPMColor* SK_RESTRICT dstC, const SkPMColor* SK_RESTRICT cache,
        int count) {
    for (; count > 0; --count) {
        SkFixed t = two_point_radial(b, fx, fy, fSr2D2, foura,
                                     fOneOverTwoA, posRoot);
        SkFixed index = SkClampMax(t, 0xFFFF);
        SkASSERT(index <= 0xFFFF);
        *dstC++ = cache[index >> SkGradientShaderBase::kCache32Shift];
        fx += dx;
        fy += dy;
        b += db;
    }
}
void shadeSpan_twopoint_mirror(SkScalar fx, SkScalar dx,
        SkScalar fy, SkScalar dy,
        SkScalar b, SkScalar db,
        SkScalar fSr2D2, SkScalar foura, SkScalar fOneOverTwoA, bool posRoot,
        SkPMColor* SK_RESTRICT dstC, const SkPMColor* SK_RESTRICT cache,
        int count) {
    for (; count > 0; --count) {
        SkFixed t = two_point_radial(b, fx, fy, fSr2D2, foura,
 static inline SkPMColor fetch(const SkBitmap& src, int x, int y) {
     x = SkClampMax(x, src.width() - 1);
     y = SkClampMax(y, src.height() - 1);
     return *src.getAddr32(x, y);
 }
template <typename Color, typename ColorPacker>
void highQualityFilter(ColorPacker pack, const SkBitmapProcState& s, int x, int y, Color* SK_RESTRICT colors, int count) {
    const int maxX = s.fBitmap->width();
    const int maxY = s.fBitmap->height();

    while (count-- > 0) {
        SkPoint srcPt;
        s.fInvProc(s.fInvMatrix, x + 0.5f,
                   y + 0.5f, &srcPt);
        srcPt.fX -= SK_ScalarHalf;
        srcPt.fY -= SK_ScalarHalf;

        SkScalar weight = 0;
        SkScalar fr = 0, fg = 0, fb = 0, fa = 0;

        int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY);
        int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()+1), maxY);
        int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX);
        int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width())+1, maxX);

        for (int srcY = y0; srcY < y1; srcY++) {
            SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY));

            for (int srcX = x0; srcX < x1 ; srcX++) {
                SkScalar xWeight = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));

                SkScalar combined_weight = SkScalarMul(xWeight, yWeight);

                SkPMColor c = *s.fBitmap->getAddr32(srcX, srcY);
                fr += combined_weight * SkGetPackedR32(c);
                fg += combined_weight * SkGetPackedG32(c);
bool SkBlurMask::BlurGroundTruth(SkMask* dst, const SkMask& src, SkScalar provided_radius,
                            Style style, SkIPoint* margin) {

    if (src.fFormat != SkMask::kA8_Format) {
        return false;
    }

    float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor));
    float stddev = SkScalarToFloat(radius) /2.0f;
    float variance = stddev * stddev;

    int windowSize = SkScalarCeil(stddev*4);
    // round window size up to nearest odd number
    windowSize |= 1;

    SkAutoTMalloc<float> gaussWindow(windowSize);

    int halfWindow = windowSize >> 1;

    gaussWindow[halfWindow] = 1;

    float windowSum = 1;
    for (int x = 1 ; x <= halfWindow ; ++x) {
        float gaussian = expf(-x*x / variance);
        gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
        windowSum += 2*gaussian;
    }

    // leave the filter un-normalized for now; we will divide by the normalization
    // sum later;

    int pad = halfWindow;
    if (margin) {
        margin->set( pad, pad );
    }

    dst->fBounds = src.fBounds;
    dst->fBounds.outset(pad, pad);

    dst->fRowBytes = dst->fBounds.width();
    dst->fFormat = SkMask::kA8_Format;
    dst->fImage = NULL;

    if (src.fImage) {

        size_t dstSize = dst->computeImageSize();
        if (0 == dstSize) {
            return false;   // too big to allocate, abort
        }

        int             srcWidth = src.fBounds.width();
        int             srcHeight = src.fBounds.height();
        int             dstWidth = dst->fBounds.width();

        const uint8_t*  srcPixels = src.fImage;
        uint8_t*        dstPixels = SkMask::AllocImage(dstSize);
        SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);

        // do the actual blur.  First, make a padded copy of the source.
        // use double pad so we never have to check if we're outside anything

        int padWidth = srcWidth + 4*pad;
        int padHeight = srcHeight;
        int padSize = padWidth * padHeight;

        SkAutoTMalloc<uint8_t> padPixels(padSize);
        memset(padPixels, 0, padSize);

        for (int y = 0 ; y < srcHeight; ++y) {
            uint8_t* padptr = padPixels + y * padWidth + 2*pad;
            const uint8_t* srcptr = srcPixels + y * srcWidth;
            memcpy(padptr, srcptr, srcWidth);
        }

        // blur in X, transposing the result into a temporary floating point buffer.
        // also double-pad the intermediate result so that the second blur doesn't
        // have to do extra conditionals.

        int tmpWidth = padHeight + 4*pad;
        int tmpHeight = padWidth - 2*pad;
        int tmpSize = tmpWidth * tmpHeight;

        SkAutoTMalloc<float> tmpImage(tmpSize);
        memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));

        for (int y = 0 ; y < padHeight ; ++y) {
            uint8_t *srcScanline = padPixels + y*padWidth;
            for (int x = pad ; x < padWidth - pad ; ++x) {
                float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
                uint8_t *windowCenter = srcScanline + x;
                for (int i = -pad ; i <= pad ; ++i) {
                    *outPixel += gaussWindow[pad+i]*windowCenter[i];
                }
                *outPixel /= windowSum;
            }
        }

        // blur in Y; now filling in the actual desired destination.  We have to do
        // the transpose again; these transposes guarantee that we read memory in
        // linear order.

        for (int y = 0 ; y < tmpHeight ; ++y) {
            float *srcScanline = tmpImage + y*tmpWidth;
            for (int x = pad ; x < tmpWidth - pad ; ++x) {
                float *windowCenter = srcScanline + x;
                float finalValue = 0;
                for (int i = -pad ; i <= pad ; ++i) {
                    finalValue += gaussWindow[pad+i]*windowCenter[i];
                }
                finalValue /= windowSum;
                uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
                int integerPixel = int(finalValue + 0.5f);
                *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
            }
        }

        dst->fImage = dstPixels;
        // if need be, alloc the "real" dst (same size as src) and copy/merge
        // the blur into it (applying the src)
        if (style == kInner_Style) {
            // now we allocate the "real" dst, mirror the size of src
            size_t srcSize = src.computeImageSize();
            if (0 == srcSize) {
                return false;   // too big to allocate, abort
            }
            dst->fImage = SkMask::AllocImage(srcSize);
            merge_src_with_blur(dst->fImage, src.fRowBytes,
                srcPixels, src.fRowBytes,
                dstPixels + pad*dst->fRowBytes + pad,
                dst->fRowBytes, srcWidth, srcHeight);
            SkMask::FreeImage(dstPixels);
        } else if (style != kNormal_Style) {
            clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
                dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
        }
        (void)autoCall.detach();
    }

    if (style == kInner_Style) {
        dst->fBounds = src.fBounds; // restore trimmed bounds
        dst->fRowBytes = src.fRowBytes;
    }

    return true;
}
}

static void Clamp_S32_D32_nofilter_trans_shaderproc(const void* sIn,
                                                    int x, int y,
                                                    SkPMColor* SK_RESTRICT colors,
                                                    int count) {
    const SkBitmapProcState& s = *static_cast<const SkBitmapProcState*>(sIn);
    SkASSERT(((s.fInvType & ~SkMatrix::kTranslate_Mask)) == 0);
    SkASSERT(s.fInvKy == 0);
    SkASSERT(count > 0 && colors != nullptr);
    SkASSERT(kNone_SkFilterQuality == s.fFilterQuality);

    const int maxX = s.fPixmap.width() - 1;
    const int maxY = s.fPixmap.height() - 1;
    int ix = s.fFilterOneX + x;
    int iy = SkClampMax(s.fFilterOneY + y, maxY);
    const SkPMColor* row = s.fPixmap.addr32(0, iy);

    // clamp to the left
    if (ix < 0) {
        int n = SkMin32(-ix, count);
        sk_memset32(colors, row[0], n);
        count -= n;
        if (0 == count) {
            return;
        }
        colors += n;
        SkASSERT(-ix == n);
        ix = 0;
    }
    // copy the middle
Exemple #12
0
/**
 *  sw and sh are the width and height of the src. Since the sum buffer
 *  matches that, but has an extra row and col at the beginning (with zeros),
 *  we can just use sw and sh as our "max" values for pinning coordinates
 *  when sampling into sum[][]
 *
 *  The inner loop is conceptually simple; we break it into several variants
 *  to improve performance. Here's the original version:
        for (int x = 0; x < dw; ++x) {
            int px = SkClampPos(prev_x);
            int nx = SkFastMin32(next_x, sw);

            int ipx = SkClampPos(prev_x + 1);
            int inx = SkClampMax(next_x - 1, sw);

            uint32_t outerSum = sum[px+py] + sum[nx+ny]
                               - sum[nx+py] - sum[px+ny];
            uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
                               - sum[inx+ipy] - sum[ipx+iny];
            *dst++ = SkToU8((outerSum * outerScale
                           + innerSum * innerScale) >> 24);

            prev_x += 1;
            next_x += 1;
        }
 *  The sections are:
 *     left-hand section, where prev_x is clamped to 0
 *     center section, where neither prev_x nor next_x is clamped
 *     right-hand section, where next_x is clamped to sw
 *  On some operating systems, the center section is unrolled for additional
 *  speedup.
*/
static void apply_kernel_interp(uint8_t dst[], int rx, int ry,
                                const uint32_t sum[], int sw, int sh, U8CPU outerWeight) {
    SkASSERT(rx > 0 && ry > 0);
    SkASSERT(outerWeight <= 255);

    if (2*rx > sw) {
        kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight);
        return;
    }

    int innerWeight = 255 - outerWeight;

    // round these guys up if they're bigger than 127
    outerWeight += outerWeight >> 7;
    innerWeight += innerWeight >> 7;

    uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1));
    uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1));

    int sumStride = sw + 1;

    int dw = sw + 2*rx;
    int dh = sh + 2*ry;

    int prev_y = -2*ry;
    int next_y = 1;

    SkASSERT(2*rx <= dw - 2*rx);

    for (int y = 0; y < dh; ++y) {
        int py = SkClampPos(prev_y) * sumStride;
        int ny = SkFastMin32(next_y, sh) * sumStride;

        int ipy = SkClampPos(prev_y + 1) * sumStride;
        int iny = SkClampMax(next_y - 1, sh) * sumStride;

        int prev_x = -2*rx;
        int next_x = 1;
        int x = 0;

        for (; x < 2*rx; ++x) {
            SkASSERT(prev_x < 0);
            SkASSERT(next_x <= sw);

            int px = 0;
            int nx = next_x;

            int ipx = 0;
            int inx = next_x - 1;

            uint32_t outerSum = sum[px+py] + sum[nx+ny]
                                - sum[nx+py] - sum[px+ny];
            uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
                                - sum[inx+ipy] - sum[ipx+iny];
            *dst++ = SkToU8((outerSum * outerScale
                             + innerSum * innerScale) >> 24);

            prev_x += 1;
            next_x += 1;
        }

        int i0 = prev_x + py;
        int i1 = next_x + ny;
        int i2 = next_x + py;
        int i3 = prev_x + ny;
        int i4 = prev_x + 1 + ipy;
        int i5 = next_x - 1 + iny;
        int i6 = next_x - 1 + ipy;
        int i7 = prev_x + 1 + iny;

#if UNROLL_KERNEL_LOOP
        for (; x < dw - 2*rx - 4; x += 4) {
            SkASSERT(prev_x >= 0);
            SkASSERT(next_x <= sw);

            uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
            uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
            *dst++ = SkToU8((outerSum * outerScale
                             + innerSum * innerScale) >> 24);
            outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
            innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
            *dst++ = SkToU8((outerSum * outerScale
                             + innerSum * innerScale) >> 24);
            outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
            innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
            *dst++ = SkToU8((outerSum * outerScale
                             + innerSum * innerScale) >> 24);
            outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
            innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
            *dst++ = SkToU8((outerSum * outerScale
                             + innerSum * innerScale) >> 24);

            prev_x += 4;
            next_x += 4;
        }
#endif

        for (; x < dw - 2*rx; ++x) {
            SkASSERT(prev_x >= 0);
            SkASSERT(next_x <= sw);

            uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++];
            uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++];
            *dst++ = SkToU8((outerSum * outerScale
                             + innerSum * innerScale) >> 24);

            prev_x += 1;
            next_x += 1;
        }

        for (; x < dw; ++x) {
            SkASSERT(prev_x >= 0);
            SkASSERT(next_x > sw);

            int px = prev_x;
            int nx = sw;

            int ipx = prev_x + 1;
            int inx = sw;

            uint32_t outerSum = sum[px+py] + sum[nx+ny]
                                - sum[nx+py] - sum[px+ny];
            uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny]
                                - sum[inx+ipy] - sum[ipx+iny];
            *dst++ = SkToU8((outerSum * outerScale
                             + innerSum * innerScale) >> 24);

            prev_x += 1;
            next_x += 1;
        }

        prev_y += 1;
        next_y += 1;
    }
}
bool SkBicubicImageFilter::onFilterImage(Proxy* proxy,
                                         const SkBitmap& source,
                                         const SkMatrix& matrix,
                                         SkBitmap* result,
                                         SkIPoint* loc) {
    SkBitmap src = source;
    if (getInput(0) && !getInput(0)->filterImage(proxy, source, matrix, &src, loc)) {
        return false;
    }

    if (src.config() != SkBitmap::kARGB_8888_Config) {
        return false;
    }

    SkAutoLockPixels alp(src);
    if (!src.getPixels()) {
        return false;
    }

    SkRect dstRect = SkRect::MakeWH(SkScalarMul(SkIntToScalar(src.width()), fScale.fWidth),
                                    SkScalarMul(SkIntToScalar(src.height()), fScale.fHeight));
    SkIRect dstIRect;
    dstRect.roundOut(&dstIRect);
    result->setConfig(src.config(), dstIRect.width(), dstIRect.height());
    result->allocPixels();
    if (!result->getPixels()) {
        return false;
    }

    SkRect srcRect;
    src.getBounds(&srcRect);
    SkMatrix inverse;
    inverse.setRectToRect(dstRect, srcRect, SkMatrix::kFill_ScaleToFit);
    inverse.postTranslate(SkFloatToScalar(-0.5f), SkFloatToScalar(-0.5f));

    for (int y = dstIRect.fTop; y < dstIRect.fBottom; ++y) {
        SkPMColor* dptr = result->getAddr32(dstIRect.fLeft, y);
        for (int x = dstIRect.fLeft; x < dstIRect.fRight; ++x) {
            SkPoint srcPt, dstPt = SkPoint::Make(SkIntToScalar(x), SkIntToScalar(y));
            inverse.mapPoints(&srcPt, &dstPt, 1);
            SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX);
            SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY);
            int sx = SkScalarFloorToInt(srcPt.fX);
            int sy = SkScalarFloorToInt(srcPt.fY);
            int x0 = SkClampMax(sx - 1, src.width() - 1);
            int x1 = SkClampMax(sx    , src.width() - 1);
            int x2 = SkClampMax(sx + 1, src.width() - 1);
            int x3 = SkClampMax(sx + 2, src.width() - 1);
            int y0 = SkClampMax(sy - 1, src.height() - 1);
            int y1 = SkClampMax(sy    , src.height() - 1);
            int y2 = SkClampMax(sy + 1, src.height() - 1);
            int y3 = SkClampMax(sy + 2, src.height() - 1);
            SkPMColor s00 = *src.getAddr32(x0, y0);
            SkPMColor s10 = *src.getAddr32(x1, y0);
            SkPMColor s20 = *src.getAddr32(x2, y0);
            SkPMColor s30 = *src.getAddr32(x3, y0);
            SkPMColor s0 = cubicBlend(fCoefficients, fractx, s00, s10, s20, s30);
            SkPMColor s01 = *src.getAddr32(x0, y1);
            SkPMColor s11 = *src.getAddr32(x1, y1);
            SkPMColor s21 = *src.getAddr32(x2, y1);
            SkPMColor s31 = *src.getAddr32(x3, y1);
            SkPMColor s1 = cubicBlend(fCoefficients, fractx, s01, s11, s21, s31);
            SkPMColor s02 = *src.getAddr32(x0, y2);
            SkPMColor s12 = *src.getAddr32(x1, y2);
            SkPMColor s22 = *src.getAddr32(x2, y2);
            SkPMColor s32 = *src.getAddr32(x3, y2);
            SkPMColor s2 = cubicBlend(fCoefficients, fractx, s02, s12, s22, s32);
            SkPMColor s03 = *src.getAddr32(x0, y3);
            SkPMColor s13 = *src.getAddr32(x1, y3);
            SkPMColor s23 = *src.getAddr32(x2, y3);
            SkPMColor s33 = *src.getAddr32(x3, y3);
            SkPMColor s3 = cubicBlend(fCoefficients, fractx, s03, s13, s23, s33);
            *dptr++ = cubicBlend(fCoefficients, fracty, s0, s1, s2, s3);
        }
    }
    return true;
}