void pull( Treap * a ){ a->sum = Sum( a->l ) + Sum( a->r ) + a->val; a->lsum = Sum( a->l ) + a->val + max( 0 , lSum( a->r ) ); if( a->l ) a->lsum = max( lSum( a->l ) , a->lsum ); a->rsum = Sum( a->r ) + a->val + max( 0 , rSum( a->l ) ); if( a->r ) a->rsum = max( rSum( a->r ) , a->rsum ); a->maxsum = max( 0 , rSum( a->l ) ) + a->val + max( 0 , lSum( a->r ) ); a->maxsum = max( a->maxsum , max( maxSum( a->l ) , maxSum( a->r ) ) ); a->sz = Size( a->l ) + Size( a->r ) + 1; }
void hoSPIRITOperator<T>::sum_over_src_channel(const ARRAY_TYPE& x, ARRAY_TYPE& r) { try { boost::shared_ptr< std::vector<size_t> > dim = x.get_dimensions(); size_t NDim = dim->size(); if (NDim < 2) return; std::vector<size_t> dimR(NDim - 1); std::vector<size_t> dimRInternal = *dim; dimRInternal[NDim - 2] = 1; size_t d; for (d = 0; d<NDim - 2; d++) { dimR[d] = (*dim)[d]; } dimR[NDim - 2] = (*dim)[NDim - 1]; if (!r.dimensions_equal(&dimR)) { r.create(&dimR); } if (x.get_size(NDim - 2) <= 1) { memcpy(r.begin(), x.begin(), x.get_number_of_bytes()); return; } hoNDArray<T> rSum(dimRInternal, r.begin()); GADGET_CATCH_THROW(Gadgetron::sum_over_dimension(x, rSum, NDim - 2)); } catch (...) { GADGET_THROW("Errors in hoSPIRITOperator<T>::sum_over_src_channel(const ARRAY_TYPE& x, ARRAY_TYPE& r) ... "); } }
/*! \brief Tests the **reduce_sum** kernel. * \details The kernel computes the sum of the elements of each row in an array. */ TEST (Reduce, reduce_sum) { try { const unsigned int rows = 1024; const unsigned int cols = 1024; const unsigned int bufferInSize = cols * rows * sizeof (cl_float); const unsigned int bufferOutSize = rows * sizeof (cl_float); // Setup the OpenCL environment clutils::CLEnv clEnv; clEnv.addContext (0); clEnv.addQueue (0, 0, CL_QUEUE_PROFILING_ENABLE); clEnv.addProgram (0, kernel_filename_reduce); // Configure kernel execution parameters clutils::CLEnvInfo<1> info (0, 0, 0, { 0 }, 0); cl_algo::ICP::Reduce<cl_algo::ICP::ReduceConfig::SUM, cl_float> rSum (clEnv, info); rSum.init (cols, rows); // Initialize data (writes on staging buffer directly) std::generate (rSum.hPtrIn, rSum.hPtrIn + bufferInSize / sizeof (cl_float), ICP::rNum_R_0_1); // ICP::printBuffer ("Original:", rSum.hPtrIn, cols, rows); rSum.write (); // Copy data to device rSum.run (); // Execute kernels (~ 44 us) cl_float *results = (cl_float *) rSum.read (); // Copy results to host // ICP::printBuffer ("Received:", results, 1, rows); // Produce reference array of distances cl_float *refSum = new cl_float[rows]; ICP::cpuReduceSum (rSum.hPtrIn, refSum, cols, rows); // ICP::printBuffer ("Expected:", refSum, 1, rows); // Verify blurred output float eps = 42000 * std::numeric_limits<float>::epsilon (); // 0.00500679 for (uint i = 0; i < rows; ++i) ASSERT_LT (std::abs (refSum[i] - results[i]), eps); // Profiling =========================================================== if (profiling) { const int nRepeat = 1; /* Number of times to perform the tests. */ // CPU clutils::CPUTimer<double, std::milli> cTimer; clutils::ProfilingInfo<nRepeat> pCPU ("CPU"); for (int i = 0; i < nRepeat; ++i) { cTimer.start (); ICP::cpuReduceSum (rSum.hPtrIn, refSum, cols, rows); pCPU[i] = cTimer.stop (); } // GPU clutils::GPUTimer<std::milli> gTimer (clEnv.devices[0][0]); clutils::ProfilingInfo<nRepeat> pGPU ("GPU"); for (int i = 0; i < nRepeat; ++i) pGPU[i] = rSum.run (gTimer); // Benchmark pGPU.print (pCPU, "Reduce<Sum>"); } } catch (const cl::Error &error) { std::cerr << error.what () << " (" << clutils::getOpenCLErrorCodeString (error.err ()) << ")" << std::endl; exit (EXIT_FAILURE); } }
/* $begin rsum-c */ int rSum(int *Start, int Count) { if (Count <= 0) return 0; return *Start + rSum(Start+1, Count-1); }
int rSum(int* start,int count){ if(count<=0)return 0; return *start + rSum(start+1,count-1); }
int rSumFits(int n, int m){ return rSum(n) <= m; }
void CrossNLMFilter::Apply(const TwoDArray<Color> &img, const TwoDArray<Feature> &featureImg, const TwoDArray<Feature> &featureVarImg, const TwoDArray<Color> &rImg, const TwoDArray<Color> &varImg, vector<TwoDArray<Color> > &fltArray, vector<TwoDArray<float> > &mseArray, vector<TwoDArray<float> > &priArray) const { #pragma omp parallel for num_threads(PbrtOptions.nCores) schedule(static) for(int taskId = 0; taskId < nTasks; taskId++) { int txs, txe, tys, tye; ComputeSubWindow(taskId, nTasks, width, height, &txs, &txe, &tys, &tye); for(int y = tys; y < tye; y++) for(int x = txs; x < txe; x++) { vector<Color> sum(scaleR.size(), Color(0.f)); vector<Color> rSum(scaleR.size(), Color(0.f)); vector<Color> rSumSq(scaleR.size(), Color(0.f)); vector<float> wSum(scaleR.size(), 0.f); vector<vector<float> > wArray(scaleR.size()); for(size_t p = 0; p < wArray.size(); p++) { wArray[p].resize(patchWidth*patchWidth); } Feature feature = featureImg(x, y); Feature featureVar = featureVarImg(x, y); // Filter using pixels within search range for(int dy = -searchRadius; dy <= searchRadius; dy++) for(int dx = -searchRadius; dx <= searchRadius; dx++) { int xx = x + dx; int yy = y + dy; if(xx < 0 || yy < 0 || xx >= width || yy >= height) continue; Color diffSqSum(0.f, 0.f, 0.f); // Calculate block distance for(int by = -patchRadius; by <= patchRadius; by++) for(int bx = -patchRadius; bx <= patchRadius; bx++) { int xbx = x + bx; int yby = y + by; int xxbx = xx + bx; int yyby = yy + by; if( xbx < 0 || xbx >= width || yby < 0 || yby >= height || xxbx < 0 || xxbx >= width || yyby < 0 || yyby >= height) continue; Color diff = rImg(xbx, yby) - rImg(xxbx, yyby); diffSqSum += (diff*diff); } diffSqSum *= invPatchSize; float dist = Avg(diffSqSum); Feature fDiff = feature - featureImg(xx, yy); Feature fVarSum = featureVar + featureVarImg(xx, yy); Feature fDist = (fDiff*fDiff)/fVarSum.Max(c_VarMax); // For each parameter, calculate information necessary for // filtering and SURE estimation for(size_t p = 0; p < scaleR.size(); p++) { if(scaleR[p] == 0.f) { continue; } float weight = fmath::exp(dist*scaleR[p] + Sum(fDist*scaleF)); sum[p] += weight * img(xx, yy); rSum[p] += weight * rImg(xx, yy); rSumSq[p] += weight * rImg(xx, yy) * rImg(xx, yy); wSum[p] += weight; if(dy >= -patchRadius && dy <= patchRadius && dx >= -patchRadius && dx <= patchRadius) { wArray[p][(dy+patchRadius)*patchWidth+(dx+patchRadius)] = weight; } } } for(size_t p = 0; p < scaleR.size(); p++) { if(scaleR[p] == 0.f) { fltArray[p](x, y) = img(x, y); mseArray[p](x, y) = Avg(2.f*varImg(x, y)); continue; } float invWSum = 1.f/wSum[p]; Color xl = sum[p] * invWSum; Color rxl = rSum[p] * invWSum; Color rxlSq = rSumSq[p] * invWSum; Color ryl = rImg(x, y); Color dxdy = (-2.f*scaleR[p])*(rxlSq - rxl*rxl)*invPatchSize + Color(invWSum); Color tmp; for(int by = -patchRadius; by <= patchRadius; by++) for(int bx = -patchRadius; bx <= patchRadius; bx++) { int xbpx = x+bx; int ybpy = y+by; int xbmx = x-bx; int ybmy = y-by; if( xbpx < 0 || xbpx >= width || ybpy < 0 || ybpy >= height || xbmx < 0 || xbmx >= width || ybmy < 0 || ybmy >= height) continue; Color rylpb = rImg(xbpx, ybpy); Color rylmb = rImg(xbmx, ybmy); float w = wArray[p][(-by+patchRadius)*patchWidth+(-bx+patchRadius)]; tmp += w*(ryl - rylpb)*(rxl - rylmb); } tmp *= (-2.f*scaleR[p])*invPatchSize*invWSum; dxdy += tmp; Color mse = (rxl-ryl)*(rxl-ryl) + 2.f*varImg(x, y)*dxdy - varImg(x, y); Color pri = (mse + varImg(x, y)); fltArray[p](x, y) = xl; mseArray[p](x, y) = Avg(mse); priArray[p](x, y) = Avg(pri) / (xl.Y()*xl.Y() + 1e-2f); } } } }