static uint16_t darken_modeproc16_255(SkPMColor src, uint16_t dst) { SkASSERT(require_255(src)); unsigned r = SkFastMin32(SkPacked32ToR16(src), SkGetPackedR16(dst)); unsigned g = SkFastMin32(SkPacked32ToG16(src), SkGetPackedG16(dst)); unsigned b = SkFastMin32(SkPacked32ToB16(src), SkGetPackedB16(dst)); return SkPackRGB16(r, g, b); }
/** * This is the path for apply_kernel_interp() to be taken when the kernel * is wider than the source image. */ static void kernel_interp_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[], int sw, int sh, U8CPU outerWeight) { SkASSERT(2*rx > sw); int innerWeight = 255 - outerWeight; // round these guys up if they're bigger than 127 outerWeight += outerWeight >> 7; innerWeight += innerWeight >> 7; uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1)); uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1)); int sumStride = sw + 1; int dw = sw + 2*rx; int dh = sh + 2*ry; int prev_y = -2*ry; int next_y = 1; for (int y = 0; y < dh; ++y) { int py = SkClampPos(prev_y) * sumStride; int ny = SkFastMin32(next_y, sh) * sumStride; int ipy = SkClampPos(prev_y + 1) * sumStride; int iny = SkClampMax(next_y - 1, sh) * sumStride; int prev_x = -2*rx; int next_x = 1; for (int x = 0; x < dw; ++x) { int px = SkClampPos(prev_x); int nx = SkFastMin32(next_x, sw); int ipx = SkClampPos(prev_x + 1); int inx = SkClampMax(next_x - 1, sw); uint32_t outerSum = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] - sum[inx+ipy] - sum[ipx+iny]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); prev_x += 1; next_x += 1; } prev_y += 1; next_y += 1; } }
static SkPMColor lighten_modeproc(SkPMColor src, SkPMColor dst) { unsigned sa = SkGetPackedA32(src); unsigned da = SkGetPackedA32(dst); unsigned src_scale = SkAlpha255To256(255 - sa); unsigned dst_scale = SkAlpha255To256(255 - da); unsigned ra = sa + da - SkAlphaMulAlpha(sa, da); unsigned rr = lighten_p(SkGetPackedR32(src), SkGetPackedR32(dst), src_scale, dst_scale); unsigned rg = lighten_p(SkGetPackedG32(src), SkGetPackedG32(dst), src_scale, dst_scale); unsigned rb = lighten_p(SkGetPackedB32(src), SkGetPackedB32(dst), src_scale, dst_scale); return SkPackARGB32(ra, SkFastMin32(rr, ra), SkFastMin32(rg, ra), SkFastMin32(rb, ra)); }
/** * This is the path for apply_kernel() to be taken when the kernel * is wider than the source image. */ static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[], int sw, int sh) { SkASSERT(2*rx > sw); uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); int sumStride = sw + 1; int dw = sw + 2*rx; int dh = sh + 2*ry; int prev_y = -2*ry; int next_y = 1; for (int y = 0; y < dh; ++y) { int py = SkClampPos(prev_y) * sumStride; int ny = SkFastMin32(next_y, sh) * sumStride; int prev_x = -2*rx; int next_x = 1; for (int x = 0; x < dw; ++x) { int px = SkClampPos(prev_x); int nx = SkFastMin32(next_x, sw); // TODO: should we be adding 1/2 (1 << 23) to round to the // nearest integer here? uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; *dst++ = SkToU8(tmp * scale >> 24); prev_x += 1; next_x += 1; } prev_y += 1; next_y += 1; } }
/** * This is the path for apply_kernel() to be taken when the kernel * is wider than the source image. */ static void kernel_clamped(uint8_t dst[], int rx, int ry, const uint32_t sum[], int sw, int sh) { SkASSERT(2*rx > sw); uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); int sumStride = sw + 1; int dw = sw + 2*rx; int dh = sh + 2*ry; int prev_y = -2*ry; int next_y = 1; for (int y = 0; y < dh; y++) { int py = SkClampPos(prev_y) * sumStride; int ny = SkFastMin32(next_y, sh) * sumStride; int prev_x = -2*rx; int next_x = 1; for (int x = 0; x < dw; x++) { int px = SkClampPos(prev_x); int nx = SkFastMin32(next_x, sw); uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; *dst++ = SkToU8(tmp * scale >> 24); prev_x += 1; next_x += 1; } prev_y += 1; next_y += 1; } }
void SkScalerContext::getImage(const SkGlyph& origGlyph) { const SkGlyph* glyph = &origGlyph; SkGlyph tmpGlyph; if (fMaskFilter) { // restore the prefilter bounds tmpGlyph.init(origGlyph.fID); // need the original bounds, sans our maskfilter SkMaskFilter* mf = fMaskFilter; fMaskFilter = NULL; // temp disable this->getMetrics(&tmpGlyph); fMaskFilter = mf; // restore tmpGlyph.fImage = origGlyph.fImage; // we need the prefilter bounds to be <= filter bounds SkASSERT(tmpGlyph.fWidth <= origGlyph.fWidth); SkASSERT(tmpGlyph.fHeight <= origGlyph.fHeight); glyph = &tmpGlyph; } if (fRec.fFrameWidth > 0 || fPathEffect != NULL || fRasterizer != NULL) { SkPath devPath, fillPath; SkMatrix fillToDevMatrix; this->internalGetPath(*glyph, &fillPath, &devPath, &fillToDevMatrix); if (fRasterizer) { SkMask mask; glyph->toMask(&mask); mask.fFormat = SkMask::kA8_Format; sk_bzero(glyph->fImage, mask.computeImageSize()); if (!fRasterizer->rasterize(fillPath, fillToDevMatrix, NULL, fMaskFilter, &mask, SkMask::kJustRenderImage_CreateMode)) { return; } } else { SkBitmap bm; SkBitmap::Config config; SkMatrix matrix; SkRegion clip; SkPaint paint; SkDraw draw; if (SkMask::kA8_Format == fRec.fMaskFormat) { config = SkBitmap::kA8_Config; paint.setAntiAlias(true); } else { SkASSERT(SkMask::kBW_Format == fRec.fMaskFormat); config = SkBitmap::kA1_Config; paint.setAntiAlias(false); } clip.setRect(0, 0, glyph->fWidth, glyph->fHeight); matrix.setTranslate(-SkIntToScalar(glyph->fLeft), -SkIntToScalar(glyph->fTop)); bm.setConfig(config, glyph->fWidth, glyph->fHeight, glyph->rowBytes()); bm.setPixels(glyph->fImage); sk_bzero(glyph->fImage, bm.height() * bm.rowBytes()); draw.fClip = &clip; draw.fMatrix = &matrix; draw.fBitmap = &bm; draw.fBounder = NULL; draw.drawPath(devPath, paint); } } else { this->getGlyphContext(*glyph)->generateImage(*glyph); } if (fMaskFilter) { SkMask srcM, dstM; SkMatrix matrix; // the src glyph image shouldn't be 3D SkASSERT(SkMask::k3D_Format != glyph->fMaskFormat); glyph->toMask(&srcM); fRec.getMatrixFrom2x2(&matrix); if (fMaskFilter->filterMask(&dstM, srcM, matrix, NULL)) { int width = SkFastMin32(origGlyph.fWidth, dstM.fBounds.width()); int height = SkFastMin32(origGlyph.fHeight, dstM.fBounds.height()); int dstRB = origGlyph.rowBytes(); int srcRB = dstM.fRowBytes; const uint8_t* src = (const uint8_t*)dstM.fImage; uint8_t* dst = (uint8_t*)origGlyph.fImage; if (SkMask::k3D_Format == dstM.fFormat) { // we have to copy 3 times as much height *= 3; } // clean out our glyph, since it may be larger than dstM //sk_bzero(dst, height * dstRB); while (--height >= 0) { memcpy(dst, src, width); src += srcRB; dst += dstRB; } SkMask::FreeImage(dstM.fImage); } } // check to see if we should filter the alpha channel if (NULL == fMaskFilter && fRec.fMaskFormat != SkMask::kBW_Format && fRec.fMaskFormat != SkMask::kLCD16_Format && fRec.fMaskFormat != SkMask::kLCD32_Format && (fRec.fFlags & (kGammaForBlack_Flag | kGammaForWhite_Flag)) != 0) { const uint8_t* table = (fRec.fFlags & kGammaForBlack_Flag) ? gBlackGammaTable : gWhiteGammaTable; if (NULL != table) { uint8_t* dst = (uint8_t*)origGlyph.fImage; unsigned rowBytes = origGlyph.rowBytes(); for (int y = origGlyph.fHeight - 1; y >= 0; --y) { for (int x = origGlyph.fWidth - 1; x >= 0; --x) { dst[x] = table[dst[x]]; } dst += rowBytes; } } } }
void SkScalerContext::getImage(const SkGlyph& origGlyph) { const SkGlyph* glyph = &origGlyph; SkGlyph tmpGlyph; if (fMaskFilter) { // restore the prefilter bounds tmpGlyph.init(origGlyph.fID); // need the original bounds, sans our maskfilter SkMaskFilter* mf = fMaskFilter; fMaskFilter = NULL; // temp disable this->getMetrics(&tmpGlyph); fMaskFilter = mf; // restore tmpGlyph.fImage = origGlyph.fImage; // we need the prefilter bounds to be <= filter bounds SkASSERT(tmpGlyph.fWidth <= origGlyph.fWidth); SkASSERT(tmpGlyph.fHeight <= origGlyph.fHeight); glyph = &tmpGlyph; } if (fGenerateImageFromPath) { SkPath devPath, fillPath; SkMatrix fillToDevMatrix; SkMask mask; this->internalGetPath(*glyph, &fillPath, &devPath, &fillToDevMatrix); glyph->toMask(&mask); if (fRasterizer) { mask.fFormat = SkMask::kA8_Format; sk_bzero(glyph->fImage, mask.computeImageSize()); if (!fRasterizer->rasterize(fillPath, fillToDevMatrix, NULL, fMaskFilter, &mask, SkMask::kJustRenderImage_CreateMode)) { return; } } else { generateMask(mask, devPath); } } else { this->getGlyphContext(*glyph)->generateImage(*glyph); } if (fMaskFilter) { SkMask srcM, dstM; SkMatrix matrix; // the src glyph image shouldn't be 3D SkASSERT(SkMask::k3D_Format != glyph->fMaskFormat); glyph->toMask(&srcM); fRec.getMatrixFrom2x2(&matrix); if (fMaskFilter->filterMask(&dstM, srcM, matrix, NULL)) { int width = SkFastMin32(origGlyph.fWidth, dstM.fBounds.width()); int height = SkFastMin32(origGlyph.fHeight, dstM.fBounds.height()); int dstRB = origGlyph.rowBytes(); int srcRB = dstM.fRowBytes; const uint8_t* src = (const uint8_t*)dstM.fImage; uint8_t* dst = (uint8_t*)origGlyph.fImage; if (SkMask::k3D_Format == dstM.fFormat) { // we have to copy 3 times as much height *= 3; } // clean out our glyph, since it may be larger than dstM //sk_bzero(dst, height * dstRB); while (--height >= 0) { memcpy(dst, src, width); src += srcRB; dst += dstRB; } SkMask::FreeImage(dstM.fImage); } } }
void SkScalerContext::getImage(const SkGlyph& origGlyph) { const SkGlyph* glyph = &origGlyph; SkGlyph tmpGlyph; // in case we need to call generateImage on a mask-format that is different // (i.e. larger) than what our caller allocated by looking at origGlyph. SkAutoMalloc tmpGlyphImageStorage; // If we are going to draw-from-path, then we cannot generate color, since // the path only makes a mask. This case should have been caught up in // generateMetrics(). SkASSERT(!fGenerateImageFromPath || SkMask::kARGB32_Format != origGlyph.fMaskFormat); if (fMaskFilter) { // restore the prefilter bounds tmpGlyph.initGlyphIdFrom(origGlyph); // need the original bounds, sans our maskfilter SkMaskFilter* mf = fMaskFilter; fMaskFilter = nullptr; // temp disable this->getMetrics(&tmpGlyph); fMaskFilter = mf; // restore // we need the prefilter bounds to be <= filter bounds SkASSERT(tmpGlyph.fWidth <= origGlyph.fWidth); SkASSERT(tmpGlyph.fHeight <= origGlyph.fHeight); if (tmpGlyph.fMaskFormat == origGlyph.fMaskFormat) { tmpGlyph.fImage = origGlyph.fImage; } else { tmpGlyphImageStorage.reset(tmpGlyph.computeImageSize()); tmpGlyph.fImage = tmpGlyphImageStorage.get(); } glyph = &tmpGlyph; } if (fGenerateImageFromPath) { SkPath devPath, fillPath; SkMatrix fillToDevMatrix; SkMask mask; this->internalGetPath(*glyph, &fillPath, &devPath, &fillToDevMatrix); glyph->toMask(&mask); if (fRasterizer) { mask.fFormat = SkMask::kA8_Format; sk_bzero(glyph->fImage, mask.computeImageSize()); if (!fRasterizer->rasterize(fillPath, fillToDevMatrix, nullptr, fMaskFilter, &mask, SkMask::kJustRenderImage_CreateMode)) { return; } if (fPreBlend.isApplicable()) { applyLUTToA8Mask(mask, fPreBlend.fG); } } else { SkASSERT(SkMask::kARGB32_Format != mask.fFormat); generateMask(mask, devPath, fPreBlend); } } else { generateImage(*glyph); } if (fMaskFilter) { SkMask srcM, dstM; SkMatrix matrix; // the src glyph image shouldn't be 3D SkASSERT(SkMask::k3D_Format != glyph->fMaskFormat); SkAutoSMalloc<32*32> a8storage; glyph->toMask(&srcM); if (SkMask::kARGB32_Format == srcM.fFormat) { // now we need to extract the alpha-channel from the glyph's image // and copy it into a temp buffer, and then point srcM at that temp. srcM.fFormat = SkMask::kA8_Format; srcM.fRowBytes = SkAlign4(srcM.fBounds.width()); size_t size = srcM.computeImageSize(); a8storage.reset(size); srcM.fImage = (uint8_t*)a8storage.get(); extract_alpha(srcM, (const SkPMColor*)glyph->fImage, glyph->rowBytes()); } fRec.getMatrixFrom2x2(&matrix); if (fMaskFilter->filterMask(&dstM, srcM, matrix, nullptr)) { int width = SkFastMin32(origGlyph.fWidth, dstM.fBounds.width()); int height = SkFastMin32(origGlyph.fHeight, dstM.fBounds.height()); int dstRB = origGlyph.rowBytes(); int srcRB = dstM.fRowBytes; const uint8_t* src = (const uint8_t*)dstM.fImage; uint8_t* dst = (uint8_t*)origGlyph.fImage; if (SkMask::k3D_Format == dstM.fFormat) { // we have to copy 3 times as much height *= 3; } // clean out our glyph, since it may be larger than dstM //sk_bzero(dst, height * dstRB); while (--height >= 0) { memcpy(dst, src, width); src += srcRB; dst += dstRB; } SkMask::FreeImage(dstM.fImage); if (fPreBlendForFilter.isApplicable()) { applyLUTToA8Mask(srcM, fPreBlendForFilter.fG); } } } }
/** * sw and sh are the width and height of the src. Since the sum buffer * matches that, but has an extra row and col at the beginning (with zeros), * we can just use sw and sh as our "max" values for pinning coordinates * when sampling into sum[][] * * The inner loop is conceptually simple; we break it into several variants * to improve performance. Here's the original version: for (int x = 0; x < dw; ++x) { int px = SkClampPos(prev_x); int nx = SkFastMin32(next_x, sw); int ipx = SkClampPos(prev_x + 1); int inx = SkClampMax(next_x - 1, sw); uint32_t outerSum = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] - sum[inx+ipy] - sum[ipx+iny]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); prev_x += 1; next_x += 1; } * The sections are: * left-hand section, where prev_x is clamped to 0 * center section, where neither prev_x nor next_x is clamped * right-hand section, where next_x is clamped to sw * On some operating systems, the center section is unrolled for additional * speedup. */ static void apply_kernel_interp(uint8_t dst[], int rx, int ry, const uint32_t sum[], int sw, int sh, U8CPU outerWeight) { SkASSERT(rx > 0 && ry > 0); SkASSERT(outerWeight <= 255); if (2*rx > sw) { kernel_interp_clamped(dst, rx, ry, sum, sw, sh, outerWeight); return; } int innerWeight = 255 - outerWeight; // round these guys up if they're bigger than 127 outerWeight += outerWeight >> 7; innerWeight += innerWeight >> 7; uint32_t outerScale = (outerWeight << 16) / ((2*rx + 1)*(2*ry + 1)); uint32_t innerScale = (innerWeight << 16) / ((2*rx - 1)*(2*ry - 1)); int sumStride = sw + 1; int dw = sw + 2*rx; int dh = sh + 2*ry; int prev_y = -2*ry; int next_y = 1; SkASSERT(2*rx <= dw - 2*rx); for (int y = 0; y < dh; ++y) { int py = SkClampPos(prev_y) * sumStride; int ny = SkFastMin32(next_y, sh) * sumStride; int ipy = SkClampPos(prev_y + 1) * sumStride; int iny = SkClampMax(next_y - 1, sh) * sumStride; int prev_x = -2*rx; int next_x = 1; int x = 0; for (; x < 2*rx; ++x) { SkASSERT(prev_x < 0); SkASSERT(next_x <= sw); int px = 0; int nx = next_x; int ipx = 0; int inx = next_x - 1; uint32_t outerSum = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] - sum[inx+ipy] - sum[ipx+iny]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); prev_x += 1; next_x += 1; } int i0 = prev_x + py; int i1 = next_x + ny; int i2 = next_x + py; int i3 = prev_x + ny; int i4 = prev_x + 1 + ipy; int i5 = next_x - 1 + iny; int i6 = next_x - 1 + ipy; int i7 = prev_x + 1 + iny; #if UNROLL_KERNEL_LOOP for (; x < dw - 2*rx - 4; x += 4) { SkASSERT(prev_x >= 0); SkASSERT(next_x <= sw); uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); prev_x += 4; next_x += 4; } #endif for (; x < dw - 2*rx; ++x) { SkASSERT(prev_x >= 0); SkASSERT(next_x <= sw); uint32_t outerSum = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; uint32_t innerSum = sum[i4++] + sum[i5++] - sum[i6++] - sum[i7++]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); prev_x += 1; next_x += 1; } for (; x < dw; ++x) { SkASSERT(prev_x >= 0); SkASSERT(next_x > sw); int px = prev_x; int nx = sw; int ipx = prev_x + 1; int inx = sw; uint32_t outerSum = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; uint32_t innerSum = sum[ipx+ipy] + sum[inx+iny] - sum[inx+ipy] - sum[ipx+iny]; *dst++ = SkToU8((outerSum * outerScale + innerSum * innerScale) >> 24); prev_x += 1; next_x += 1; } prev_y += 1; next_y += 1; } }
/** * sw and sh are the width and height of the src. Since the sum buffer * matches that, but has an extra row and col at the beginning (with zeros), * we can just use sw and sh as our "max" values for pinning coordinates * when sampling into sum[][] * * The inner loop is conceptually simple; we break it into several sections * to improve performance. Here's the original version: for (int x = 0; x < dw; ++x) { int px = SkClampPos(prev_x); int nx = SkFastMin32(next_x, sw); uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; *dst++ = SkToU8(tmp * scale >> 24); prev_x += 1; next_x += 1; } * The sections are: * left-hand section, where prev_x is clamped to 0 * center section, where neither prev_x nor next_x is clamped * right-hand section, where next_x is clamped to sw * On some operating systems, the center section is unrolled for additional * speedup. */ static void apply_kernel(uint8_t dst[], int rx, int ry, const uint32_t sum[], int sw, int sh) { if (2*rx > sw) { kernel_clamped(dst, rx, ry, sum, sw, sh); return; } uint32_t scale = (1 << 24) / ((2*rx + 1)*(2*ry + 1)); int sumStride = sw + 1; int dw = sw + 2*rx; int dh = sh + 2*ry; int prev_y = -2*ry; int next_y = 1; SkASSERT(2*rx <= dw - 2*rx); for (int y = 0; y < dh; ++y) { int py = SkClampPos(prev_y) * sumStride; int ny = SkFastMin32(next_y, sh) * sumStride; int prev_x = -2*rx; int next_x = 1; int x = 0; for (; x < 2*rx; ++x) { SkASSERT(prev_x <= 0); SkASSERT(next_x <= sw); int px = 0; int nx = next_x; uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; *dst++ = SkToU8(tmp * scale >> 24); prev_x += 1; next_x += 1; } int i0 = prev_x + py; int i1 = next_x + ny; int i2 = next_x + py; int i3 = prev_x + ny; #if UNROLL_KERNEL_LOOP for (; x < dw - 2*rx - 4; x += 4) { SkASSERT(prev_x >= 0); SkASSERT(next_x <= sw); uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; *dst++ = SkToU8(tmp * scale >> 24); tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; *dst++ = SkToU8(tmp * scale >> 24); tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; *dst++ = SkToU8(tmp * scale >> 24); tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; *dst++ = SkToU8(tmp * scale >> 24); prev_x += 4; next_x += 4; } #endif for (; x < dw - 2*rx; ++x) { SkASSERT(prev_x >= 0); SkASSERT(next_x <= sw); uint32_t tmp = sum[i0++] + sum[i1++] - sum[i2++] - sum[i3++]; *dst++ = SkToU8(tmp * scale >> 24); prev_x += 1; next_x += 1; } for (; x < dw; ++x) { SkASSERT(prev_x >= 0); SkASSERT(next_x > sw); int px = prev_x; int nx = sw; uint32_t tmp = sum[px+py] + sum[nx+ny] - sum[nx+py] - sum[px+ny]; *dst++ = SkToU8(tmp * scale >> 24); prev_x += 1; next_x += 1; } prev_y += 1; next_y += 1; } }