void convertTo( const oclMat &src, oclMat &dst, int rtype, double alpha, double beta ) { //cout << "cv::ocl::oclMat::convertTo()" << endl; bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon(); if( rtype < 0 ) rtype = src.type(); else rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), src.oclchannels()); int sdepth = src.depth(), ddepth = CV_MAT_DEPTH(rtype); if( sdepth == ddepth && noScale ) { src.copyTo(dst); return; } oclMat temp; const oclMat *psrc = &src; if( sdepth != ddepth && psrc == &dst ) psrc = &(temp = src); dst.create( src.size(), rtype ); convert_run_cus(*psrc, dst, alpha, beta); }
static void split(const oclMat &mat_src, oclMat *mat_dst) { CV_Assert(mat_dst); int depth = mat_src.depth(); int num_channels = mat_src.oclchannels(); Size size = mat_src.size(); if(num_channels == 1) { mat_src.copyTo(mat_dst[0]); return; } int i; for(i = 0; i < num_channels; i++) mat_dst[i].create(size, CV_MAKETYPE(depth, 1)); split_vector_run(mat_src, mat_dst); }
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, const oclMat &src3, double beta, oclMat &dst, int flags) { CV_Assert(src1.cols == src2.rows && (src3.empty() || (src1.rows == src3.rows && src2.cols == src3.cols))); CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported if(!src3.empty()) { src3.copyTo(dst); } else { dst.create(src1.rows, src2.cols, src1.type()); dst.setTo(Scalar::all(0)); } clBlasSetup(); const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans; const clAmdBlasOrder order = clAmdBlasRowMajor; const int M = src1.rows; const int N = src2.cols; const int K = src1.cols; int lda = src1.step; int ldb = src2.step; int ldc = dst.step; int offa = src1.offset; int offb = src2.offset; int offc = dst.offset; cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr(); switch(src1.type()) { case CV_32FC1: lda /= sizeof(float); ldb /= sizeof(float); ldc /= sizeof(float); offa /= sizeof(float); offb /= sizeof(float); offc /= sizeof(float); openCLSafeCall ( clAmdBlasSgemmEx(order, transA, transB, M, N, K, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); break; case CV_64FC1: lda /= sizeof(double); ldb /= sizeof(double); ldc /= sizeof(double); offa /= sizeof(double); offb /= sizeof(double); offc /= sizeof(double); openCLSafeCall ( clAmdBlasDgemmEx(order, transA, transB, M, N, K, alpha, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); break; case CV_32FC2: { lda /= (2*sizeof(float)); ldb /= (2*sizeof(float)); ldc /= (2*sizeof(float)); offa /= (2*sizeof(float)); offb /= (2*sizeof(float)); offc /= (2*sizeof(float)); cl_float2 alpha_2 = {{alpha, 0}}; cl_float2 beta_2 = {{beta, 0}}; openCLSafeCall ( clAmdBlasCgemmEx(order, transA, transB, M, N, K, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); } break; case CV_64FC2: { lda /= (2*sizeof(double)); ldb /= (2*sizeof(double)); ldc /= (2*sizeof(double)); offa /= (2*sizeof(double)); offb /= (2*sizeof(double)); offc /= (2*sizeof(double)); cl_double2 alpha_2 = {{alpha, 0}}; cl_double2 beta_2 = {{beta, 0}}; openCLSafeCall ( clAmdBlasZgemmEx(order, transA, transB, M, N, K, alpha_2, (const cl_mem)src1.data, offa, lda, (const cl_mem)src2.data, offb, ldb, beta_2, (cl_mem)dst.data, offc, ldc, 1, &clq, 0, NULL, NULL) ); } break; } }