void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate) { using namespace cv::ocl::device::mog; int ch = frame.oclchannels(); int work_ch = ch; if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels()) initialize(frame.size(), frame.type()); fgmask.create(frameSize_, CV_8UC1); fgmask.setTo(cv::Scalar::all(0)); ++nframes_; learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history); CV_Assert(learningRate >= 0.0f); mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_); }
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient) { using namespace ::cv::ocl::canny; CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size()); if( low_thresh > high_thresh ) std::swap( low_thresh, high_thresh); dst.create(dx.size(), CV_8U); dst.setTo(Scalar::all(0)); buf.dx = dx; buf.dy = dy; buf.create(dx.size(), -1); buf.edgeBuf.setTo(Scalar::all(0)); calcMagnitude_gpu(buf.dx, buf.dy, buf.edgeBuf, dx.rows, dx.cols, L2gradient); CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh)); }
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags) { if(dft_size == Size(0, 0)) { dft_size = src.size(); } // check if the given dft size is of optimal dft size CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area())); // the two flags are not compatible CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) ); // similar assertions with cuda module CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); //bool is_1d_input = (src.rows == 1); //int is_row_dft = flags & DFT_ROWS; //int is_scaled_dft = flags & DFT_SCALE; int is_inverse = flags & DFT_INVERSE; bool is_complex_input = src.channels() == 2; bool is_complex_output = !(flags & DFT_REAL_OUTPUT); // We don't support real-to-real transform CV_Assert(is_complex_input || is_complex_output); FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1); switch(type) { case C2C: dst.create(src.rows, src.cols, CV_32FC2); break; case R2C: dst.create(src.rows, src.cols / 2 + 1, CV_32FC2); break; case C2R: CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows); dst.create(src.rows, dft_size.width, CV_32FC1); break; default: //std::runtime_error("does not support this convertion!"); std::cout << "Does not support this convertion!" << std::endl; throw std::exception(); break; } clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, flags, type)->getPlanHandle(); //get the buffersize size_t buffersize = 0; openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) ); //allocate the intermediate buffer // TODO, bind this with the current FftPlan cl_mem clMedBuffer = NULL; if (buffersize) { cl_int medstatus; clMedBuffer = clCreateBuffer ( *(cl_context*)(src.clCxt->getOpenCLContextPtr()), CL_MEM_READ_WRITE, buffersize, 0, &medstatus); openCLSafeCall( medstatus ); } cl_command_queue clq = *(cl_command_queue*)(src.clCxt->getOpenCLCommandQueuePtr()); openCLSafeCall( clAmdFftEnqueueTransform( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, 1, &clq, 0, NULL, NULL, (cl_mem *)&src.data, (cl_mem *)&dst.data, clMedBuffer ) ); openCLSafeCall( clFinish(clq) ); if(clMedBuffer) { openCLFree(clMedBuffer); } //fft_teardown(); }
void cv::ocl::OpticalFlowDual_TVL1_OCL::operator()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy) { CV_Assert( I0.type() == CV_8UC1 || I0.type() == CV_32FC1 ); CV_Assert( I0.size() == I1.size() ); CV_Assert( I0.type() == I1.type() ); CV_Assert( !useInitialFlow || (flowx.size() == I0.size() && flowx.type() == CV_32FC1 && flowy.size() == flowx.size() && flowy.type() == flowx.type()) ); CV_Assert( nscales > 0 ); // allocate memory for the pyramid structure I0s.resize(nscales); I1s.resize(nscales); u1s.resize(nscales); u2s.resize(nscales); //I0s_step == I1s_step I0.convertTo(I0s[0], CV_32F, I0.depth() == CV_8U ? 1.0 : 255.0); I1.convertTo(I1s[0], CV_32F, I1.depth() == CV_8U ? 1.0 : 255.0); if (!useInitialFlow) { flowx.create(I0.size(), CV_32FC1); flowy.create(I0.size(), CV_32FC1); } //u1s_step != u2s_step u1s[0] = flowx; u2s[0] = flowy; I1x_buf.create(I0.size(), CV_32FC1); I1y_buf.create(I0.size(), CV_32FC1); I1w_buf.create(I0.size(), CV_32FC1); I1wx_buf.create(I0.size(), CV_32FC1); I1wy_buf.create(I0.size(), CV_32FC1); grad_buf.create(I0.size(), CV_32FC1); rho_c_buf.create(I0.size(), CV_32FC1); p11_buf.create(I0.size(), CV_32FC1); p12_buf.create(I0.size(), CV_32FC1); p21_buf.create(I0.size(), CV_32FC1); p22_buf.create(I0.size(), CV_32FC1); diff_buf.create(I0.size(), CV_32FC1); // create the scales for (int s = 1; s < nscales; ++s) { ocl::pyrDown(I0s[s - 1], I0s[s]); ocl::pyrDown(I1s[s - 1], I1s[s]); if (I0s[s].cols < 16 || I0s[s].rows < 16) { nscales = s; break; } if (useInitialFlow) { ocl::pyrDown(u1s[s - 1], u1s[s]); ocl::pyrDown(u2s[s - 1], u2s[s]); //ocl::multiply(u1s[s], Scalar::all(0.5), u1s[s]); multiply(0.5, u1s[s], u1s[s]); //ocl::multiply(u2s[s], Scalar::all(0.5), u2s[s]); multiply(0.5, u1s[s], u2s[s]); } } // pyramidal structure for computing the optical flow for (int s = nscales - 1; s >= 0; --s) { // compute the optical flow at the current scale procOneScale(I0s[s], I1s[s], u1s[s], u2s[s]); // if this was the last scale, finish now if (s == 0) break; // otherwise, upsample the optical flow // zoom the optical flow for the next finer scale ocl::resize(u1s[s], u1s[s - 1], I0s[s - 1].size()); ocl::resize(u2s[s], u2s[s - 1], I0s[s - 1].size()); // scale the optical flow with the appropriate zoom factor multiply(2, u1s[s - 1], u1s[s - 1]); multiply(2, u2s[s - 1], u2s[s - 1]); } }
void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const oclMat &I1, oclMat &u1, oclMat &u2) { using namespace ocl_tvl1flow; const double scaledEpsilon = epsilon * epsilon * I0.size().area(); CV_DbgAssert( I1.size() == I0.size() ); CV_DbgAssert( I1.type() == I0.type() ); CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); CV_DbgAssert( u2.size() == u1.size() ); if (u1.empty()) { u1.create(I0.size(), CV_32FC1); u1.setTo(Scalar::all(0)); u2.create(I0.size(), CV_32FC1); u2.setTo(Scalar::all(0)); } oclMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); centeredGradient(I1, I1x, I1y); oclMat I1w = I1w_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat I1wx = I1wx_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat I1wy = I1wy_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat grad = grad_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat rho_c = rho_c_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat p11 = p11_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat p12 = p12_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat p21 = p21_buf(Rect(0, 0, I0.cols, I0.rows)); oclMat p22 = p22_buf(Rect(0, 0, I0.cols, I0.rows)); p11.setTo(Scalar::all(0)); p12.setTo(Scalar::all(0)); p21.setTo(Scalar::all(0)); p22.setTo(Scalar::all(0)); oclMat diff = diff_buf(Rect(0, 0, I0.cols, I0.rows)); const float l_t = static_cast<float>(lambda * theta); const float taut = static_cast<float>(tau / theta); for (int warpings = 0; warpings < warps; ++warpings) { warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); double error = numeric_limits<double>::max(); double prev_error = 0; for (int n = 0; error > scaledEpsilon && n < iterations; ++n) { // some tweaks to make sum operation less frequently char calc_error = (n & 0x1) && (prev_error < scaledEpsilon); estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast<float>(theta), calc_error); if(calc_error) { error = ocl::sum(diff)[0]; prev_error = error; } else { error = numeric_limits<double>::max(); prev_error -= scaledEpsilon; } estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); } } }
static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn) { Size sz = src.size(); int scn = src.channels(), depth = src.depth(), bidx; CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F); switch (code) { case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR: case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA: { CV_Assert(scn == 3 || scn == 4); dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3; bool reverse = !(code == CV_BGR2BGRA || code == CV_BGRA2BGR); dst.create(sz, CV_MAKE_TYPE(depth, dcn)); RGB_caller(src, dst, reverse); break; } case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555: case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555: { CV_Assert((scn == 3 || scn == 4) && depth == CV_8U ); bidx = code == CV_BGR2BGR565 || code == CV_BGR2BGR555 || code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2; int greenbits = code == CV_BGR2BGR565 || code == CV_RGB2BGR565 || code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5; dst.create(sz, CV_8UC2); toRGB5x5_caller(src, dst, bidx, greenbits, "RGB2RGB5x5"); break; } case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB: case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA: { dcn = code == CV_BGR5652BGRA || code == CV_BGR5552BGRA || code == CV_BGR5652RGBA || code == CV_BGR5552RGBA ? 4 : 3; CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U); bidx = code == CV_BGR5652BGR || code == CV_BGR5552BGR || code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2; int greenbits = code == CV_BGR5652BGR || code == CV_BGR5652RGB || code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5; dst.create(sz, CV_MAKETYPE(depth, dcn)); fromRGB5x5_caller(src, dst, bidx, greenbits, "RGB5x52RGB"); break; } case CV_BGR5652GRAY: case CV_BGR5552GRAY: { CV_Assert(scn == 2 && depth == CV_8U); dst.create(sz, CV_8UC1); int greenbits = code == CV_BGR5652GRAY ? 6 : 5; fromRGB5x5_caller(src, dst, -1, greenbits, "BGR5x52Gray"); break; } case CV_GRAY2BGR565: case CV_GRAY2BGR555: { CV_Assert(scn == 1 && depth == CV_8U); dst.create(sz, CV_8UC2); int greenbits = code == CV_GRAY2BGR565 ? 6 : 5; toRGB5x5_caller(src, dst, -1, greenbits, "Gray2BGR5x5"); break; } case CV_RGB2GRAY: case CV_BGR2GRAY: case CV_RGBA2GRAY: case CV_BGRA2GRAY: { CV_Assert(scn == 3 || scn == 4); bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; dst.create(sz, CV_MAKETYPE(depth, 1)); fromRGB_caller(src, dst, bidx, "RGB2Gray"); break; } case CV_GRAY2BGR: case CV_GRAY2BGRA: { CV_Assert(scn == 1); dcn = code == CV_GRAY2BGRA ? 4 : 3; dst.create(sz, CV_MAKETYPE(depth, dcn)); fromGray_caller(src, dst, 0, "Gray2RGB"); break; } case CV_BGR2YUV: case CV_RGB2YUV: { CV_Assert(scn == 3 || scn == 4); bidx = code == CV_BGR2YUV ? 0 : 2; dst.create(sz, CV_MAKETYPE(depth, 3)); fromRGB_caller(src, dst, bidx, "RGB2YUV"); break; } case CV_YUV2BGR: case CV_YUV2RGB: { if( dcn <= 0 ) dcn = 3; CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); bidx = code == CV_YUV2BGR ? 0 : 2; dst.create(sz, CV_MAKETYPE(depth, dcn)); toRGB_caller(src, dst, bidx, "YUV2RGB"); break; } case CV_YUV2RGB_NV12: case CV_YUV2BGR_NV12: case CV_YUV2RGBA_NV12: case CV_YUV2BGRA_NV12: { CV_Assert(scn == 1); CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U ); dcn = code == CV_YUV2BGRA_NV12 || code == CV_YUV2RGBA_NV12 ? 4 : 3; bidx = code == CV_YUV2BGRA_NV12 || code == CV_YUV2BGR_NV12 ? 0 : 2; Size dstSz(sz.width, sz.height * 2 / 3); dst.create(dstSz, CV_MAKETYPE(depth, dcn)); toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12"); break; } case CV_BGR2YCrCb: case CV_RGB2YCrCb: { CV_Assert(scn == 3 || scn == 4); bidx = code == CV_BGR2YCrCb ? 0 : 2; dst.create(sz, CV_MAKETYPE(depth, 3)); fromRGB_caller(src, dst, bidx, "RGB2YCrCb"); break; } case CV_YCrCb2BGR: case CV_YCrCb2RGB: { if( dcn <= 0 ) dcn = 3; CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); bidx = code == CV_YCrCb2BGR ? 0 : 2; dst.create(sz, CV_MAKETYPE(depth, dcn)); toRGB_caller(src, dst, bidx, "YCrCb2RGB"); break; } case CV_BGR2XYZ: case CV_RGB2XYZ: { CV_Assert(scn == 3 || scn == 4); bidx = code == CV_BGR2XYZ ? 0 : 2; dst.create(sz, CV_MAKE_TYPE(depth, 3)); Mat c; if (depth == CV_32F) { float coeffs[] = { 0.412453f, 0.357580f, 0.180423f, 0.212671f, 0.715160f, 0.072169f, 0.019334f, 0.119193f, 0.950227f }; if (bidx == 0) { std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[3], coeffs[5]); std::swap(coeffs[6], coeffs[8]); } Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c); } else { int coeffs[] = { 1689, 1465, 739, 871, 2929, 296, 79, 488, 3892 }; if (bidx == 0) { std::swap(coeffs[0], coeffs[2]); std::swap(coeffs[3], coeffs[5]); std::swap(coeffs[6], coeffs[8]); } Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c); } oclMat oclCoeffs(c); fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs); break; } case CV_XYZ2BGR: case CV_XYZ2RGB: { if (dcn <= 0) dcn = 3; CV_Assert(scn == 3 && (dcn == 3 || dcn == 4)); bidx = code == CV_XYZ2BGR ? 0 : 2; dst.create(sz, CV_MAKE_TYPE(depth, dcn)); Mat c; if (depth == CV_32F) { float coeffs[] = { 3.240479f, -1.53715f, -0.498535f, -0.969256f, 1.875991f, 0.041556f, 0.055648f, -0.204043f, 1.057311f }; if (bidx == 0) { std::swap(coeffs[0], coeffs[6]); std::swap(coeffs[1], coeffs[7]); std::swap(coeffs[2], coeffs[8]); } Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c); } else { int coeffs[] = { 13273, -6296, -2042, -3970, 7684, 170, 228, -836, 4331 }; if (bidx == 0) { std::swap(coeffs[0], coeffs[6]); std::swap(coeffs[1], coeffs[7]); std::swap(coeffs[2], coeffs[8]); } Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c); } oclMat oclCoeffs(c); toRGB_caller(src, dst, bidx, "XYZ2RGB", "", oclCoeffs); break; } case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL: case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL: { CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F)); bidx = code == CV_BGR2HSV || code == CV_BGR2HLS || code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2; int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV || code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256; bool is_hsv = code == CV_BGR2HSV || code == CV_RGB2HSV || code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL; dst.create(sz, CV_MAKETYPE(depth, 3)); std::string kernelName = std::string("RGB2") + (is_hsv ? "HSV" : "HLS"); if (is_hsv && depth == CV_8U) { static oclMat sdiv_data; static oclMat hdiv_data180; static oclMat hdiv_data256; static int sdiv_table[256]; static int hdiv_table180[256]; static int hdiv_table256[256]; static volatile bool initialized180 = false, initialized256 = false; volatile bool & initialized = hrange == 180 ? initialized180 : initialized256; if (!initialized) { int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12; oclMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256; sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0; int v = 255 << hsv_shift; if (!initialized180 && !initialized256) { for(int i = 1; i < 256; i++ ) sdiv_table[i] = saturate_cast<int>(v/(1.*i)); sdiv_data.upload(Mat(1, 256, CV_32SC1, sdiv_table)); } v = hrange << hsv_shift; for (int i = 1; i < 256; i++ ) hdiv_table[i] = saturate_cast<int>(v/(6.*i)); hdiv_data.upload(Mat(1, 256, CV_32SC1, hdiv_table)); initialized = true; } toHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180); return; } toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f))); break; } case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL: case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL: { if (dcn <= 0) dcn = 3; CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F)); bidx = code == CV_HSV2BGR || code == CV_HLS2BGR || code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2; int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB || code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255; bool is_hsv = code == CV_HSV2BGR || code == CV_HSV2RGB || code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL; dst.create(sz, CV_MAKETYPE(depth, dcn)); std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB"; fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange)); break; } case CV_RGBA2mRGBA: case CV_mRGBA2RGBA: { CV_Assert(scn == 4 && depth == CV_8U); dst.create(sz, CV_MAKETYPE(depth, 4)); std::string kernelName = code == CV_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA"; fromRGB_caller(src, dst, 0, kernelName); break; } default: CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" ); } }
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, const oclMat &src3, double beta, oclMat &dst, int flags) { CV_Assert(src1.cols == src2.rows && (src3.empty() || (src1.rows == src3.rows && src2.cols == src3.cols))); CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported if(!src3.empty()) { src3.copyTo(dst); } else { dst.create(src1.rows, src2.cols, src1.type()); dst.setTo(Scalar::all(0)); } clBlasSetup(); const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasOrder order = clAmdBlasRowMajor; const int M = src1.rows; const int N = src2.cols; const int K = src1.cols; int lda = src1.step; int ldb = src2.step; int ldc = dst.step; int offa = src1.offset; int offb = src2.offset; int offc = dst.offset; cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr(); switch(src1.type()) { case CV_32FC1: lda /= sizeof(float); ldb /= sizeof(float); ldc /= sizeof(float); offa /= sizeof(float); offb /= sizeof(float); offc /= sizeof(float); openCLSafeCall ( clAmdBlasSgemmEx(order, transA, transB, M, N, K, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); break; case CV_64FC1: lda /= sizeof(double); ldb /= sizeof(double); ldc /= sizeof(double); offa /= sizeof(double); offb /= sizeof(double); offc /= sizeof(double); openCLSafeCall ( clAmdBlasDgemmEx(order, transA, transB, M, N, K, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); break; case CV_32FC2: { lda /= (2*sizeof(float)); ldb /= (2*sizeof(float)); ldc /= (2*sizeof(float)); offa /= (2*sizeof(float)); offb /= (2*sizeof(float)); offc /= (2*sizeof(float)); cl_float2 alpha_2 = {{alpha, 0}}; cl_float2 beta_2 = {{beta, 0}}; openCLSafeCall ( clAmdBlasCgemmEx(order, transA, transB, M, N, K, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); } break; case CV_64FC2: { lda /= (2*sizeof(double)); ldb /= (2*sizeof(double)); ldc /= (2*sizeof(double)); offa /= (2*sizeof(double)); offb /= (2*sizeof(double)); offc /= (2*sizeof(double)); cl_double2 alpha_2 = {{alpha, 0}}; cl_double2 beta_2 = {{beta, 0}}; openCLSafeCall ( clAmdBlasZgemmEx(order, transA, transB, M, N, K, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); } break; } }
///////////////////////////////////k - means ///////////////////////////////////////////////////////// double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels, TermCriteria criteria, int attempts, int flags, oclMat &_centers) { const int SPP_TRIALS = 3; bool isrow = _src.rows == 1 && _src.oclchannels() > 1; int N = !isrow ? _src.rows : _src.cols; int dims = (!isrow ? _src.cols : 1) * _src.oclchannels(); int type = _src.depth(); attempts = std::max(attempts, 1); CV_Assert(type == CV_32F && K > 0 ); CV_Assert( N >= K ); Mat _labels; if( flags & KMEANS_USE_INITIAL_LABELS ) { CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) && _bestLabels.cols * _bestLabels.rows == N && _bestLabels.type() == CV_32S ); _bestLabels.download(_labels); } else { if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) && _bestLabels.cols * _bestLabels.rows == N && _bestLabels.type() == CV_32S && _bestLabels.isContinuous())) _bestLabels.create(N, 1, CV_32S); _labels.create(_bestLabels.size(), _bestLabels.type()); } int* labels = _labels.ptr<int>(); Mat data; _src.download(data); Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); std::vector<int> counters(K); std::vector<Vec2f> _box(dims); Vec2f* box = &_box[0]; double best_compactness = DBL_MAX, compactness = 0; RNG& rng = theRNG(); int a, iter, i, j, k; if( criteria.type & TermCriteria::EPS ) criteria.epsilon = std::max(criteria.epsilon, 0.); else criteria.epsilon = FLT_EPSILON; criteria.epsilon *= criteria.epsilon; if( criteria.type & TermCriteria::COUNT ) criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100); else criteria.maxCount = 100; if( K == 1 ) { attempts = 1; criteria.maxCount = 2; } const float* sample = data.ptr<float>(); for( j = 0; j < dims; j++ ) box[j] = Vec2f(sample[j], sample[j]); for( i = 1; i < N; i++ ) { sample = data.ptr<float>(i); for( j = 0; j < dims; j++ ) { float v = sample[j]; box[j][0] = std::min(box[j][0], v); box[j][1] = std::max(box[j][1], v); } } for( a = 0; a < attempts; a++ ) { double max_center_shift = DBL_MAX; for( iter = 0;; ) { swap(centers, old_centers); if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) ) { if( flags & KMEANS_PP_CENTERS ) generateCentersPP(data, centers, K, rng, SPP_TRIALS); else { for( k = 0; k < K; k++ ) generateRandomCenter(_box, centers.ptr<float>(k), rng); } } else { if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) ) { for( i = 0; i < N; i++ ) CV_Assert( (unsigned)labels[i] < (unsigned)K ); } // compute centers centers = Scalar(0); for( k = 0; k < K; k++ ) counters[k] = 0; for( i = 0; i < N; i++ ) { sample = data.ptr<float>(i); k = labels[i]; float* center = centers.ptr<float>(k); j=0; #if CV_ENABLE_UNROLLED for(; j <= dims - 4; j += 4 ) { float t0 = center[j] + sample[j]; float t1 = center[j+1] + sample[j+1]; center[j] = t0; center[j+1] = t1; t0 = center[j+2] + sample[j+2]; t1 = center[j+3] + sample[j+3]; center[j+2] = t0; center[j+3] = t1; } #endif for( ; j < dims; j++ ) center[j] += sample[j]; counters[k]++; } if( iter > 0 ) max_center_shift = 0; for( k = 0; k < K; k++ ) { if( counters[k] != 0 ) continue; // if some cluster appeared to be empty then: // 1. find the biggest cluster // 2. find the farthest from the center point in the biggest cluster // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster. int max_k = 0; for( int k1 = 1; k1 < K; k1++ ) { if( counters[max_k] < counters[k1] ) max_k = k1; } double max_dist = 0; int farthest_i = -1; float* new_center = centers.ptr<float>(k); float* old_center = centers.ptr<float>(max_k); float* _old_center = temp.ptr<float>(); // normalized float scale = 1.f/counters[max_k]; for( j = 0; j < dims; j++ ) _old_center[j] = old_center[j]*scale; for( i = 0; i < N; i++ ) { if( labels[i] != max_k ) continue; sample = data.ptr<float>(i); double dist = normL2Sqr_(sample, _old_center, dims); if( max_dist <= dist ) { max_dist = dist; farthest_i = i; } } counters[max_k]--; counters[k]++; labels[farthest_i] = k; sample = data.ptr<float>(farthest_i); for( j = 0; j < dims; j++ ) { old_center[j] -= sample[j]; new_center[j] += sample[j]; } } for( k = 0; k < K; k++ ) { float* center = centers.ptr<float>(k); CV_Assert( counters[k] != 0 ); float scale = 1.f/counters[k]; for( j = 0; j < dims; j++ ) center[j] *= scale; if( iter > 0 ) { double dist = 0; const float* old_center = old_centers.ptr<float>(k); for( j = 0; j < dims; j++ ) { double t = center[j] - old_center[j]; dist += t*t; } max_center_shift = std::max(max_center_shift, dist); } } } if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon ) break; // assign labels oclMat _dists(1, N, CV_64F); _bestLabels.upload(_labels); _centers.upload(centers); distanceToCenters(_dists, _bestLabels, _src, _centers); Mat dists; _dists.download(dists); _bestLabels.download(_labels); double* dist = dists.ptr<double>(0); compactness = 0; for( i = 0; i < N; i++ ) { compactness += dist[i]; } } if( compactness < best_compactness ) { best_compactness = compactness; } } return best_compactness; }