void operator()(const blocked_range3d<int>& r) const{ int begin1 = r.rows().begin(), end1 = r.rows().end(); int begin2 = r.cols().begin(), end2 = r.cols().end(); int begin3 = r.pages().begin(), end3 = r.pages().end(); for (int i = begin1; i < end1; i++){ for (int j = begin2; j < end2; j++){ /* for(int k = begin3; k < end3; k++){ dst[i * src2w + j] += A[k] * B[k]; } */ /* const double *vec1, *vec2; vec1 = &(A[i * src1w]); vec2 = &(B[j * src1w]); ScalarMultiplicator mul(vec1, vec2); parallel_reduce(r.pages(), mul); dst[i * src2w + j] = mul.Result(); */ /* const double *vec1, *vec2; vec1 = &(A[i * src1w]); vec2 = &(B[j * src1w]); dst[i * src2w + j] = TBBSclMlt(vec1, vec2, src1w); */ const double *vec1, *vec2; vec1 = &(A[i * src1w + begin3]); vec2 = &(B[j * src1w + begin3]); dst[i * src2w + j] += TBBSclMlt(vec1, vec2, end3 - begin3); } } }
void operator()( const blocked_range3d<size_t>& r ) const { ContextPtr context = new Context( *m_parentContext, Context::Borrowed ); Context::Scope scope( context.get() ); const Box2i operationWindow( V2i( r.rows().begin()+m_dataWindow.min.x, r.cols().begin()+m_dataWindow.min.y ), V2i( r.rows().end()+m_dataWindow.min.x-1, r.cols().end()+m_dataWindow.min.y-1 ) ); V2i minTileOrigin = ImagePlug::tileOrigin( operationWindow.min ); V2i maxTileOrigin = ImagePlug::tileOrigin( operationWindow.max ); size_t imageStride = m_dataWindow.size().x + 1; for( size_t channelIndex = r.pages().begin(); channelIndex < r.pages().end(); ++channelIndex ) { context->set( ImagePlug::channelNameContextName, m_channelNames[channelIndex] ); float *channelBegin = m_imageChannelData[channelIndex]; for( int tileOriginY = minTileOrigin.y; tileOriginY <= maxTileOrigin.y; tileOriginY += m_tileSize ) { for( int tileOriginX = minTileOrigin.x; tileOriginX <= maxTileOrigin.x; tileOriginX += m_tileSize ) { context->set( ImagePlug::tileOriginContextName, V2i( tileOriginX, tileOriginY ) ); Box2i tileBound( V2i( tileOriginX, tileOriginY ), V2i( tileOriginX + m_tileSize - 1, tileOriginY + m_tileSize - 1 ) ); Box2i b = boxIntersection( tileBound, operationWindow ); size_t tileStrideSize = sizeof(float) * ( b.size().x + 1 ); ConstFloatVectorDataPtr tileData = m_channelDataPlug->getValue(); const float *tileDataBegin = &(tileData->readable()[0]); for( int y = b.min.y; y<=b.max.y; y++ ) { const float *tilePtr = tileDataBegin + (y - tileOriginY) * m_tileSize + (b.min.x - tileOriginX); float *channelPtr = channelBegin + ( m_dataWindow.size().y - ( y - m_dataWindow.min.y ) ) * imageStride + (b.min.x - m_dataWindow.min.x); std::memcpy( channelPtr, tilePtr, tileStrideSize ); } } } } }