inline void FELighting::platformApplyGeneric(LightingData& data, LightSource::PaintingData& paintingData) { int optimalThreadNumber = ((data.widthDecreasedByOne - 1) * (data.heightDecreasedByOne - 1)) / s_minimalRectDimension; if (optimalThreadNumber > 1) { // Initialize parallel jobs ParallelJobs<PlatformApplyGenericParameters> parallelJobs(&platformApplyGenericWorker, optimalThreadNumber); // Fill the parameter array int job = parallelJobs.numberOfJobs(); if (job > 1) { // Split the job into "yStep"-sized jobs but there a few jobs that need to be slightly larger since // yStep * jobs < total size. These extras are handled by the remainder "jobsWithExtra". const int yStep = (data.heightDecreasedByOne - 1) / job; const int jobsWithExtra = (data.heightDecreasedByOne - 1) % job; int yStart = 1; for (--job; job >= 0; --job) { PlatformApplyGenericParameters& params = parallelJobs.parameter(job); params.filter = this; params.data = data; params.paintingData = paintingData; params.yStart = yStart; yStart += job < jobsWithExtra ? yStep + 1 : yStep; params.yEnd = yStart; } parallelJobs.execute(); return; } // Fallback to single threaded mode. } platformApplyGenericPaint(data, paintingData, 1, data.heightDecreasedByOne); }
void FEMorphology::platformApply(PaintingData* paintingData) { int optimalThreadNumber = (paintingData->width * paintingData->height) / s_minimalArea; if (optimalThreadNumber > 1) { ParallelJobs<PlatformApplyParameters> parallelJobs(&WebCore::FEMorphology::platformApplyWorker, optimalThreadNumber); int numOfThreads = parallelJobs.numberOfJobs(); if (numOfThreads > 1) { // Split the job into "jobSize"-sized jobs but there a few jobs that need to be slightly larger since // jobSize * jobs < total size. These extras are handled by the remainder "jobsWithExtra". const int jobSize = paintingData->height / numOfThreads; const int jobsWithExtra = paintingData->height % numOfThreads; int currentY = 0; for (int job = numOfThreads - 1; job >= 0; --job) { PlatformApplyParameters& param = parallelJobs.parameter(job); param.filter = this; param.startY = currentY; currentY += job < jobsWithExtra ? jobSize + 1 : jobSize; param.endY = currentY; param.paintingData = paintingData; } parallelJobs.execute(); return; } // Fallback to single thread model } platformApplyGeneric(paintingData, 0, paintingData->height); }
void FEMorphology::platformApply(PaintingData* paintingData) { #if ENABLE(PARALLEL_JOBS) int optimalThreadNumber = (paintingData->width * paintingData->height) / s_minimalArea; if (optimalThreadNumber > 1) { ParallelJobs<PlatformApplyParameters> parallelJobs(&WebCore::FEMorphology::platformApplyWorker, optimalThreadNumber); int numOfThreads = parallelJobs.numberOfJobs(); if (numOfThreads > 1) { const int deltaY = 1 + paintingData->height / numOfThreads; int currentY = 0; for (int job = numOfThreads - 1; job >= 0; --job) { PlatformApplyParameters& param = parallelJobs.parameter(job); param.filter = this; param.startY = currentY; currentY += deltaY; param.endY = job ? currentY : paintingData->height; param.paintingData = paintingData; } parallelJobs.execute(); return; } // Fallback to single thread model } #endif platformApplyGeneric(paintingData, 0, paintingData->height); }
inline void FELighting::platformApplyGeneric(LightingData& data, LightSource::PaintingData& paintingData) { int optimalThreadNumber = ((data.widthDecreasedByOne - 1) * (data.heightDecreasedByOne - 1)) / s_minimalRectDimension; if (optimalThreadNumber > 1) { // Initialize parallel jobs WTF::ParallelJobs<PlatformApplyGenericParameters> parallelJobs(&platformApplyGenericWorker, optimalThreadNumber); // Fill the parameter array int job = parallelJobs.numberOfJobs(); if (job > 1) { int yStart = 1; int yStep = (data.heightDecreasedByOne - 1) / job; for (--job; job >= 0; --job) { PlatformApplyGenericParameters& params = parallelJobs.parameter(job); params.filter = this; params.data = data; params.paintingData = paintingData; params.yStart = yStart; if (job > 0) { params.yEnd = yStart + yStep; yStart += yStep; } else params.yEnd = data.heightDecreasedByOne; } parallelJobs.execute(); return; } // Fallback to single threaded mode. } platformApplyGenericPaint(data, paintingData, 1, data.heightDecreasedByOne); }
void FETurbulence::apply() { if (hasResult()) return; ByteArray* pixelArray = createUnmultipliedImageResult(); if (!pixelArray) return; if (absolutePaintRect().isEmpty()) return; PaintingData paintingData(m_seed, roundedIntSize(filterPrimitiveSubregion().size())); initPaint(paintingData); #if ENABLE(PARALLEL_JOBS) int optimalThreadNumber = (absolutePaintRect().width() * absolutePaintRect().height()) / s_minimalRectDimension; if (optimalThreadNumber > 1) { // Initialize parallel jobs ParallelJobs<FillRegionParameters> parallelJobs(&WebCore::FETurbulence::fillRegionWorker, optimalThreadNumber); // Fill the parameter array int i = parallelJobs.numberOfJobs(); if (i > 1) { int startY = 0; int stepY = absolutePaintRect().height() / i; for (; i > 0; --i) { FillRegionParameters& params = parallelJobs.parameter(i-1); params.filter = this; params.pixelArray = pixelArray; params.paintingData = &paintingData; params.startY = startY; if (i != 1) { params.endY = startY + stepY; startY = startY + stepY; } else params.endY = absolutePaintRect().height(); } // Execute parallel jobs parallelJobs.execute(); return; } } // Fallback to sequential mode if there is no room for a new thread or the paint area is too small #endif // ENABLE(PARALLEL_JOBS) fillRegion(pixelArray, paintingData, 0, absolutePaintRect().height()); }
void FETurbulence::applySoftware() { Uint8ClampedArray* pixelArray = createUnmultipliedImageResult(); if (!pixelArray) return; if (absolutePaintRect().isEmpty()) { pixelArray->zeroFill(); return; } PaintingData paintingData(m_seed, roundedIntSize(filterPrimitiveSubregion().size())); initPaint(paintingData); int optimalThreadNumber = (absolutePaintRect().width() * absolutePaintRect().height()) / s_minimalRectDimension; if (optimalThreadNumber > 1) { // Initialize parallel jobs ParallelJobs<FillRegionParameters> parallelJobs(&WebCore::FETurbulence::fillRegionWorker, optimalThreadNumber); // Fill the parameter array int i = parallelJobs.numberOfJobs(); if (i > 1) { // Split the job into "stepY"-sized jobs but there a few jobs that need to be slightly larger since // stepY * jobs < total size. These extras are handled by the remainder "jobsWithExtra". const int stepY = absolutePaintRect().height() / i; const int jobsWithExtra = absolutePaintRect().height() % i; int startY = 0; for (; i > 0; --i) { FillRegionParameters& params = parallelJobs.parameter(i-1); params.filter = this; params.pixelArray = pixelArray; params.paintingData = &paintingData; params.startY = startY; startY += i < jobsWithExtra ? stepY + 1 : stepY; params.endY = startY; params.baseFrequencyX = m_baseFrequencyX; params.baseFrequencyY = m_baseFrequencyY; } // Execute parallel jobs parallelJobs.execute(); return; } } // Fallback to single threaded mode if there is no room for a new thread or the paint area is too small. fillRegion(pixelArray, paintingData, 0, absolutePaintRect().height(), m_baseFrequencyX, m_baseFrequencyY); }
void FEConvolveMatrix::platformApplySoftware() { FilterEffect* in = inputEffect(0); Uint8ClampedArray* resultImage; if (m_preserveAlpha) resultImage = createUnmultipliedImageResult(); else resultImage = createPremultipliedImageResult(); if (!resultImage) return; IntRect effectDrawingRect = requestedRegionOfInputImageData(in->absolutePaintRect()); RefPtr<Uint8ClampedArray> srcPixelArray; if (m_preserveAlpha) srcPixelArray = in->asUnmultipliedImage(effectDrawingRect); else srcPixelArray = in->asPremultipliedImage(effectDrawingRect); IntSize paintSize = absolutePaintRect().size(); PaintingData paintingData; paintingData.srcPixelArray = srcPixelArray.get(); paintingData.dstPixelArray = resultImage; paintingData.width = paintSize.width(); paintingData.height = paintSize.height(); paintingData.bias = m_bias * 255; // Drawing fully covered pixels int clipRight = paintSize.width() - m_kernelSize.width(); int clipBottom = paintSize.height() - m_kernelSize.height(); if (clipRight >= 0 && clipBottom >= 0) { int optimalThreadNumber = (absolutePaintRect().width() * absolutePaintRect().height()) / s_minimalRectDimension; if (optimalThreadNumber > 1) { WTF::ParallelJobs<InteriorPixelParameters> parallelJobs(&WebCore::FEConvolveMatrix::setInteriorPixelsWorker, optimalThreadNumber); const int numOfThreads = parallelJobs.numberOfJobs(); // Split the job into "heightPerThread" jobs but there a few jobs that need to be slightly larger since // heightPerThread * jobs < total size. These extras are handled by the remainder "jobsWithExtra". const int heightPerThread = clipBottom / numOfThreads; const int jobsWithExtra = clipBottom % numOfThreads; int startY = 0; for (int job = 0; job < numOfThreads; ++job) { InteriorPixelParameters& param = parallelJobs.parameter(job); param.filter = this; param.paintingData = &paintingData; param.clipRight = clipRight; param.clipBottom = clipBottom; param.yStart = startY; startY += job < jobsWithExtra ? heightPerThread + 1 : heightPerThread; param.yEnd = startY; } parallelJobs.execute(); } else { // Fallback to single threaded mode. setInteriorPixels(paintingData, clipRight, clipBottom, 0, clipBottom); } clipRight += m_targetOffset.x() + 1; clipBottom += m_targetOffset.y() + 1; if (m_targetOffset.y() > 0) setOuterPixels(paintingData, 0, 0, paintSize.width(), m_targetOffset.y()); if (clipBottom < paintSize.height()) setOuterPixels(paintingData, 0, clipBottom, paintSize.width(), paintSize.height()); if (m_targetOffset.x() > 0) setOuterPixels(paintingData, 0, m_targetOffset.y(), m_targetOffset.x(), clipBottom); if (clipRight < paintSize.width()) setOuterPixels(paintingData, clipRight, m_targetOffset.y(), paintSize.width(), clipBottom); } else { // Rare situation, not optimizied for speed setOuterPixels(paintingData, 0, 0, paintSize.width(), paintSize.height()); } }
inline void FEGaussianBlur::platformApply(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize) { int scanline = 4 * paintSize.width(); int extraHeight = 3 * kernelSizeY * 0.5f; int optimalThreadNumber = (paintSize.width() * paintSize.height()) / (s_minimalRectDimension + extraHeight * paintSize.width()); if (optimalThreadNumber > 1) { WTF::ParallelJobs<PlatformApplyParameters> parallelJobs(&platformApplyWorker, optimalThreadNumber); int jobs = parallelJobs.numberOfJobs(); if (jobs > 1) { int blockHeight = paintSize.height() / jobs; --jobs; for (int job = jobs; job >= 0; --job) { PlatformApplyParameters& params = parallelJobs.parameter(job); params.filter = this; int startY; int endY; if (!job) { startY = 0; endY = blockHeight + extraHeight; params.srcPixelArray = srcPixelArray; params.dstPixelArray = tmpPixelArray; } else { if (job == jobs) { startY = job * blockHeight - extraHeight; endY = paintSize.height(); } else { startY = job * blockHeight - extraHeight; endY = (job + 1) * blockHeight + extraHeight; } int blockSize = (endY - startY) * scanline; params.srcPixelArray = Uint8ClampedArray::createUninitialized(blockSize); params.dstPixelArray = Uint8ClampedArray::createUninitialized(blockSize); memcpy(params.srcPixelArray->data(), srcPixelArray->data() + startY * scanline, blockSize); } params.width = paintSize.width(); params.height = endY - startY; params.kernelSizeX = kernelSizeX; params.kernelSizeY = kernelSizeY; } parallelJobs.execute(); // Copy together the parts of the image. for (int job = jobs; job >= 1; --job) { PlatformApplyParameters& params = parallelJobs.parameter(job); int sourceOffset; int destinationOffset; int size; if (job == jobs) { sourceOffset = extraHeight * scanline; destinationOffset = job * blockHeight * scanline; size = (paintSize.height() - job * blockHeight) * scanline; } else { sourceOffset = extraHeight * scanline; destinationOffset = job * blockHeight * scanline; size = blockHeight * scanline; } memcpy(srcPixelArray->data() + destinationOffset, params.srcPixelArray->data() + sourceOffset, size); } return; } // Fallback to single threaded mode. } // The selection here eventually should happen dynamically on some platforms. #if CPU(ARM_NEON) && COMPILER(GCC) platformApplyNeon(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize); #else platformApplyGeneric(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize); #endif }
void FEConvolveMatrix::apply() { if (hasResult()) return; FilterEffect* in = inputEffect(0); in->apply(); if (!in->hasResult()) return; ByteArray* resultImage; if (m_preserveAlpha) resultImage = createUnmultipliedImageResult(); else resultImage = createPremultipliedImageResult(); if (!resultImage) return; IntRect effectDrawingRect = requestedRegionOfInputImageData(in->absolutePaintRect()); RefPtr<ByteArray> srcPixelArray; if (m_preserveAlpha) srcPixelArray = in->asUnmultipliedImage(effectDrawingRect); else srcPixelArray = in->asPremultipliedImage(effectDrawingRect); IntSize paintSize = absolutePaintRect().size(); PaintingData paintingData; paintingData.srcPixelArray = srcPixelArray.get(); paintingData.dstPixelArray = resultImage; paintingData.width = paintSize.width(); paintingData.height = paintSize.height(); paintingData.bias = m_bias * 255; // Drawing fully covered pixels int clipRight = paintSize.width() - m_kernelSize.width(); int clipBottom = paintSize.height() - m_kernelSize.height(); if (clipRight >= 0 && clipBottom >= 0) { #if ENABLE(PARALLEL_JOBS) int optimalThreadNumber = (absolutePaintRect().width() * absolutePaintRect().height()) / s_minimalRectDimension; if (optimalThreadNumber > 1) { ParallelJobs<InteriorPixelParameters> parallelJobs(&WebCore::FEConvolveMatrix::setInteriorPixelsWorker, optimalThreadNumber); const int numOfThreads = parallelJobs.numberOfJobs(); const int heightPerThread = clipBottom / numOfThreads; int startY = 0; for (int job = 0; job < numOfThreads; ++job) { InteriorPixelParameters& param = parallelJobs.parameter(job); param.filter = this; param.paintingData = &paintingData; param.clipRight = clipRight; param.clipBottom = clipBottom; param.yStart = startY; if (job < numOfThreads - 1) { startY += heightPerThread; param.yEnd = startY - 1; } else param.yEnd = clipBottom; } parallelJobs.execute(); } else // Fallback to the default setInteriorPixels call. #endif setInteriorPixels(paintingData, clipRight, clipBottom, 0, clipBottom); clipRight += m_targetOffset.x() + 1; clipBottom += m_targetOffset.y() + 1; if (m_targetOffset.y() > 0) setOuterPixels(paintingData, 0, 0, paintSize.width(), m_targetOffset.y()); if (clipBottom < paintSize.height()) setOuterPixels(paintingData, 0, clipBottom, paintSize.width(), paintSize.height()); if (m_targetOffset.x() > 0) setOuterPixels(paintingData, 0, m_targetOffset.y(), m_targetOffset.x(), clipBottom); if (clipRight < paintSize.width()) setOuterPixels(paintingData, clipRight, m_targetOffset.y(), paintSize.width(), clipBottom); } else { // Rare situation, not optimizied for speed setOuterPixels(paintingData, 0, 0, paintSize.width(), paintSize.height()); } }
inline void FEGaussianBlur::platformApply(Uint8ClampedArray* srcPixelArray, Uint8ClampedArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize) { int scanline = 4 * paintSize.width(); int extraHeight = 3 * kernelSizeY * 0.5f; int optimalThreadNumber = (paintSize.width() * paintSize.height()) / (s_minimalRectDimension + extraHeight * paintSize.width()); if (optimalThreadNumber > 1) { WTF::ParallelJobs<PlatformApplyParameters> parallelJobs(&platformApplyWorker, optimalThreadNumber); int jobs = parallelJobs.numberOfJobs(); if (jobs > 1) { // Split the job into "blockHeight"-sized jobs but there a few jobs that need to be slightly larger since // blockHeight * jobs < total size. These extras are handled by the remainder "jobsWithExtra". const int blockHeight = paintSize.height() / jobs; const int jobsWithExtra = paintSize.height() % jobs; int currentY = 0; for (int job = 0; job < jobs; job++) { PlatformApplyParameters& params = parallelJobs.parameter(job); params.filter = this; int startY = !job ? 0 : currentY - extraHeight; currentY += job < jobsWithExtra ? blockHeight + 1 : blockHeight; int endY = job == jobs - 1 ? currentY : currentY + extraHeight; int blockSize = (endY - startY) * scanline; if (!job) { params.srcPixelArray = srcPixelArray; params.dstPixelArray = tmpPixelArray; } else { params.srcPixelArray = Uint8ClampedArray::createUninitialized(blockSize); params.dstPixelArray = Uint8ClampedArray::createUninitialized(blockSize); memcpy(params.srcPixelArray->data(), srcPixelArray->data() + startY * scanline, blockSize); } params.width = paintSize.width(); params.height = endY - startY; params.kernelSizeX = kernelSizeX; params.kernelSizeY = kernelSizeY; } parallelJobs.execute(); // Copy together the parts of the image. currentY = 0; for (int job = 1; job < jobs; job++) { PlatformApplyParameters& params = parallelJobs.parameter(job); int sourceOffset; int destinationOffset; int size; int adjustedBlockHeight = job < jobsWithExtra ? blockHeight + 1 : blockHeight; currentY += adjustedBlockHeight; sourceOffset = extraHeight * scanline; destinationOffset = currentY * scanline; size = adjustedBlockHeight * scanline; memcpy(srcPixelArray->data() + destinationOffset, params.srcPixelArray->data() + sourceOffset, size); } return; } // Fallback to single threaded mode. } // The selection here eventually should happen dynamically on some platforms. platformApplyGeneric(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize); }