double oclRbm::sum(oclBuffer& iBuffer) { double lSum = 0; if (iBuffer.map(CL_MAP_READ)) { int lDim = iBuffer.count<cl_float>(); cl_float* lPtr = iBuffer.ptr<cl_float>(); for (int i=0; i<lDim; i++) { lSum += lPtr[i]; } iBuffer.unmap(); } return lSum; }
int oclConvolute::aniso2Dorth(oclDevice& iDevice, oclImage2D& bfSrce, oclImage2D& bfDest, oclImage2D& bfLine, oclBuffer& bfFilter) { cl_uint lIw = bfSrce.getImageInfo<size_t>(CL_IMAGE_WIDTH); cl_uint lIh = bfSrce.getImageInfo<size_t>(CL_IMAGE_HEIGHT); size_t lGlobalSize[2]; size_t lLocalSize[2]; clAniso2Dorth.localSize2D(iDevice, lGlobalSize, lLocalSize, lIw, lIh); cl_int lFilterSize = bfFilter.dim(0)/sizeof(cl_float); if (lFilterSize %2 == 0) { Log(ERR, this) << "Failure in call to oclConvolute::iso2D : kernel size must be odd "; } clSetKernelArg(clAniso2Dorth, 0, sizeof(cl_mem), bfSrce); clSetKernelArg(clAniso2Dorth, 1, sizeof(cl_mem), bfDest); clSetKernelArg(clAniso2Dorth, 2, sizeof(cl_mem), bfLine); clSetKernelArg(clAniso2Dorth, 3, sizeof(cl_mem), bfFilter); clSetKernelArg(clAniso2Dorth, 4, sizeof(cl_uint), &lFilterSize); clSetKernelArg(clAniso2Dorth, 5, sizeof(cl_uint), &lIw); clSetKernelArg(clAniso2Dorth, 6, sizeof(cl_uint), &lIh); sStatusCL = clEnqueueNDRangeKernel(iDevice, clAniso2Dorth, 2, NULL, lGlobalSize, lLocalSize, 0, NULL, clAniso2Dorth.getEvent()); ENQUEUE_VALIDATE return true; }
bool oclConvolute::gauss1D(float iSigma, oclBuffer& iBuffer) { if (iBuffer.map(CL_MAP_WRITE)) { int lKernelW = iBuffer.dim(0)/sizeof(cl_float); if (lKernelW < 3 || lKernelW % 2 == 0) { Log(ERR) << "Invalid buffer size of gauss1D size = " << iBuffer.dim(0)/sizeof(cl_float) << "*cl_float. Must be > 2 and odd"; return false; } calcGauss1D(iSigma, iBuffer.ptr<cl_float>(), lKernelW); iBuffer.unmap(); return true; } return false; }
bool oclConvolute::gauss2D(float iSigma, oclBuffer& iBuffer, int iKernelW, int iKernelH) { if (iBuffer.map(CL_MAP_WRITE)) { if (iKernelW % 2 == 0 || iKernelH % 2 == 0) { Log(ERR) << "Invalid buffer size of DoG2D." << iBuffer.dim(0)/sizeof(cl_float) << " Kernel dimensions must be odd"; return false; } int lSize = iBuffer.dim(0)/sizeof(cl_float); if (iKernelW*iKernelH > lSize) { Log(ERR) << "Invalid buffer size for given kernel dimension :" << iBuffer.dim(0)/sizeof(cl_float) << "*cl_float"; return false; } calcGauss2D(iSigma, iBuffer.ptr<cl_float>(), iKernelW, iKernelH); iBuffer.unmap(); return true; } return false; }
bool oclConvolute::DoG2D(float iSigmaA, float iSigmaB, float iSensitivity, oclBuffer& iBuffer, int iKernelW, int iKernelH) { if (iBuffer.map(CL_MAP_WRITE)) { if (iKernelW % 2 == 0 || iKernelH % 2 == 0) { Log(ERR) << "Invalid buffer size of DoG2D." << iBuffer.dim(0)/sizeof(cl_float) << " Kernel dimensions must be odd"; return false; } int lSize = iBuffer.dim(0)/sizeof(cl_float); if (iKernelW*iKernelH > lSize) { Log(ERR) << "Invalid buffer size for given kernel dimension :" << iBuffer.dim(0)/sizeof(cl_float) << "*cl_float"; return false; } cl_float* lGaussA = new cl_float[iKernelW*iKernelH]; calcGauss2D(iSigmaA, lGaussA, iKernelW, iKernelH); cl_float* lGaussB = new cl_float[iKernelW*iKernelH]; calcGauss2D(iSigmaB, lGaussB, iKernelW, iKernelH); cl_float* lBuffer = iBuffer.ptr<cl_float>(); cl_float total = 0; for (int i=0; i<iKernelW*iKernelH; i++) { lBuffer[i] = (lGaussA[i] - iSensitivity*lGaussB[i]); total += lBuffer[i]; } total /= iKernelW*iKernelH; for (int i=0; i<iKernelW*iKernelH; i++) { lBuffer[i] -= total; } iBuffer.unmap(); delete lGaussA; delete lGaussB; return true; } return false; }
bool oclConvolute::DoG1D(float iSigmaA, float iSigmaB, float iSensitivity, oclBuffer& iBuffer) { if (iBuffer.map(CL_MAP_WRITE)) { int lKernelW = iBuffer.dim(0)/sizeof(cl_float); if (lKernelW < 3 || lKernelW % 2 == 0) { Log(ERR) << "Invalid buffer size of DoG1D size = " << iBuffer.dim(0)/sizeof(cl_float) << "*cl_float. Must be > 2 and odd"; return false; } cl_float* lGaussA = new cl_float[lKernelW]; calcGauss1D(iSigmaA, lGaussA, lKernelW); cl_float* lGaussB = new cl_float[lKernelW]; calcGauss1D(iSigmaB, lGaussB, lKernelW); cl_float* lBuffer = iBuffer.ptr<cl_float>(); cl_float total = 0; for (int i=0; i<lKernelW; i++) { lBuffer[i] = (lGaussA[i] - iSensitivity*lGaussB[i]); total += lBuffer[i]; //Log(WARN) << lBuffer[i]; } total /= lKernelW; for (int i=0; i<lKernelW; i++) { lBuffer[i] -= total; } iBuffer.unmap(); delete lGaussA; delete lGaussB; return true; } return false; }