void backward_convolutional_layer(convolutional_layer l, network_state state) { int i; int m = l.n; int n = l.size*l.size*l.c; int k = convolutional_out_height(l)* convolutional_out_width(l); gradient_array(l.output, m*k*l.batch, l.activation, l.delta); backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); for(i = 0; i < l.batch; ++i){ float *a = l.delta + i*m*k; float *b = l.col_image; float *c = l.filter_updates; float *im = state.input+i*l.c*l.h*l.w; im2col_cpu(im, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); if(state.delta){ a = l.filters; b = l.delta + i*m*k; c = l.col_image; gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); col2im_cpu(l.col_image, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w); } } }
void backward_connected_layer(connected_layer l, network_state state) { int i; gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); for(i = 0; i < l.batch; ++i){ axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); } int m = l.outputs; int k = l.batch; int n = l.inputs; float *a = l.delta; float *b = state.input; float *c = l.weight_updates; gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); m = l.batch; k = l.outputs; n = l.inputs; a = l.delta; b = l.weights; c = state.delta; if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); }
void backward_connected_layer(connected_layer l, network_state state) { int i; gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); for(i = 0; i < l.batch; ++i){ axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); } if(l.batch_normalize){ backward_scale_cpu(l.x_norm, l.delta, l.batch, l.outputs, 1, l.scale_updates); scale_bias(l.delta, l.scales, l.batch, l.outputs, 1); mean_delta_cpu(l.delta, l.variance, l.batch, l.outputs, 1, l.mean_delta); variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.outputs, 1, l.variance_delta); normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.outputs, 1, l.delta); } int m = l.outputs; int k = l.batch; int n = l.inputs; float *a = l.delta; float *b = state.input; float *c = l.weight_updates; gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); m = l.batch; k = l.outputs; n = l.inputs; a = l.delta; b = l.weights; c = state.delta; if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); }
/** * @brief Tikhonov regulated CGNR least squares solution of ||Ax - b||_2 + l * ||x||_2 * * @param A Matrix A * @param b Vector b * @param maxit Maximum number of CG iterations * @param conv Convergence of residuals * @param lambda Tikhonov weight * @return Vector x */ template<class T> static Matrix<T> MCGLS (const Matrix<T>& A, const Matrix<T>& b, const size_t& maxit = 100, const double& conv = 1.0e-6, const double& lambda = 0.0) { size_t ah = size(A, 0); size_t bh = size(b, 0); size_t bw = size(b, 1); assert (bw == 1); // Column vector x assert (ah == bh); // Check inner dimensions of A'*x. ticks tic = getticks(); Matrix<T> p = gemm (A, b, 'C'); Matrix<T> r = p; Matrix<T> x = p; Matrix<T> q; T ts; float rn = 0.0; float xn = pow(creal(norm(p)), 2); std::vector<double> res; for (size_t i = 0; i < maxit; i++) { rn = pow(creal(norm(r)), 2); res.push_back(rn/xn); if (std::isnan(res.at(i)) || res.at(i) <= conv) break; if (i % 5 == 0 && i > 0) printf ("\n"); printf (" %03lu %.7f", i, res.at(i)); q = gemm (A, p); q = gemm (A, q, 'C'); q += lambda * p; ts = (rn / (p.dotc(q))); x += (p * ts); r -= (q * ts); p *= pow(creal(norm(r)), 2)/rn; p += r; } printf ("\n MCGLS time: %.4f s\n", elapsed(getticks(), tic) / Toolbox::Instance()->ClockRate()); return x; }
const Mat& KalmanFilter::correct(const Mat& measurement) { // temp2 = H*P'(k) temp2 = measurementMatrix * errorCovPre; // temp3 = temp2*Ht + R gemm(temp2, measurementMatrix, 1, measurementNoiseCov, 1, temp3, GEMM_2_T); // temp4 = inv(temp3)*temp2 = Kt(k) solve(temp3, temp2, temp4, DECOMP_SVD); // K(k) gain = temp4.t(); // temp5 = z(k) - H*x'(k) temp5 = measurement - measurementMatrix*statePre; // x(k) = x'(k) + K(k)*temp5 statePost = statePre + gain*temp5; // P(k) = P'(k) - K(k)*temp2 errorCovPost = errorCovPre - gain*temp2; return statePost; }
void forward_deconvolutional_layer(const layer l, network_state state) { int i; int out_h = l.out_h; int out_w = l.out_w; int size = out_h*out_w; int m = l.size*l.size*l.n; int n = l.h*l.w; int k = l.c; fill_cpu(l.outputs*l.batch, 0, l.output, 1); for(i = 0; i < l.batch; ++i){ float *a = l.weights; float *b = state.input + i*l.c*l.h*l.w; float *c = state.workspace; gemm(1,0,m,n,k,1,a,m,b,n,0,c,n); col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size); } if(l.batch_normalize){ forward_batchnorm_layer(l, state); } else { add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); } activate_array(l.output, l.batch*l.n*size, l.activation); }
Mat Eigenfaces::reconstruct(const Mat& src) { Mat W = _eigenvectors; Mat mean = _mean; // get number of samples and dimension int n = src.rows; int d = src.cols; // make sure the data has the correct shape if (W.cols != d) { string error_message = format("Wrong shapes for given matrices. Was size(src) = (%d,%d), size(W) = (%d,%d).", src.rows, src.cols, W.rows, W.cols); CV_Error(CV_StsBadArg, error_message); } // make sure mean is correct if not empty if (!mean.empty() && (mean.total() != W.rows)) { string error_message = format("Wrong mean shape for the given eigenvector matrix. Expected %d, but was %d.", W.cols, mean.total()); CV_Error(CV_StsBadArg, error_message); } // initalize temporary matrices Mat X, Y; // copy data & make sure we are using the correct type src.convertTo(Y, W.type()); // calculate the reconstruction gemm(Y, W, 1.0, Mat(), 0.0, X, GEMM_2_T); // safe to do because of above assertion if (!mean.empty()) { for (int i = 0; i<n; i++) { Mat r_i = X.row(i); add(r_i, mean.reshape(1, 1), r_i); } } return X; }
void forward_convolutional_layer(const convolutional_layer l, network_state state) { int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); int i; bias_output(l.output, l.biases, l.batch, l.n, out_h*out_w); int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; float *a = l.filters; float *b = l.col_image; float *c = l.output; for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); c += n*m; state.input += l.c*l.h*l.w; } activate_array(l.output, m*n*l.batch, l.activation); }
void forward_convolutional_layer(convolutional_layer l, network_state state) { int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); int i; fill_cpu(l.outputs*l.batch, 0, l.output, 1); /* if(l.binary){ binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters); binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales); swap_binary(&l); } */ if(l.binary){ int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; char *a = l.cfilters; float *b = state.workspace; float *c = l.output; for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm_bin(m,n,k,1,a,k,b,n,c,n); c += n*m; state.input += l.c*l.h*l.w; } scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w); add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); return; } int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; float *a = l.filters; float *b = state.workspace; float *c = l.output; for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); c += n*m; state.input += l.c*l.h*l.w; } if(l.batch_normalize){ forward_batchnorm_layer(l, state); } add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); }
Mat Eigenfaces::project(const Mat& src) { Mat W = _eigenvectors; Mat mean = _mean; // get number of samples and dimension int n = src.rows; int d = src.cols; // make sure the data has the correct shape if (W.rows != d) { string error_message = format("Wrong shapes for given matrices. Was size(src) = (%d,%d), size(W) = (%d,%d).", src.rows, src.cols, W.rows, W.cols); CV_Error(CV_StsBadArg, error_message); } // make sure mean is correct if not empty if (!mean.empty() && (mean.total() != d)) { string error_message = format("Wrong mean shape for the given data matrix. Expected %d, but was %d.", d, mean.total()); CV_Error(CV_StsBadArg, error_message); } // create temporary matrices Mat X, Y; // make sure you operate on correct type src.convertTo(X, W.type()); // safe to do, because of above assertion // safe to do, because of above assertion if (!mean.empty()) { for (int i = 0; i<n; i++) { Mat r_i = X.row(i); subtract(r_i, mean.reshape(1, 1), r_i); } } // finally calculate projection as Y = (X-mean)*W gemm(X, W, 1.0, Mat(), 0.0, Y); return Y; }
void gemm( matrix_expression<MatrA> const &matA, matrix_expression<MatrB> const &matB, matrix_expression<MatrC>& matC, typename MatrC::value_type alpha, boost::mpl::true_ ) { SIZE_CHECK(matA().size1() == matC().size1()); SIZE_CHECK(matB().size2() == matC().size2()); SIZE_CHECK(matA().size2()== matB().size1()); CBLAS_TRANSPOSE transA = traits::same_orientation(matA,matC)?CblasNoTrans:CblasTrans; CBLAS_TRANSPOSE transB = traits::same_orientation(matB,matC)?CblasNoTrans:CblasTrans; std::size_t m = matC().size1(); std::size_t n = matC().size2(); std::size_t k = matA().size2(); CBLAS_ORDER stor_ord = (CBLAS_ORDER) storage_order<typename MatrC::orientation >::value; gemm(stor_ord, transA, transB, (int)m, (int)n, (int)k, alpha, traits::storage(matA()), traits::leading_dimension(matA()), traits::storage(matB()), traits::leading_dimension(matB()), typename MatrC::value_type(1), traits::storage(matC()), traits::leading_dimension(matC()) ); }
void forward_connected_layer(connected_layer l, network_state state) { int i; fill_cpu(l.outputs*l.batch, 0, l.output, 1); int m = l.batch; int k = l.inputs; int n = l.outputs; float *a = state.input; float *b = l.weights; float *c = l.output; gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); if(l.batch_normalize){ if(state.train){ mean_cpu(l.output, l.batch, l.outputs, 1, l.mean); variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance); scal_cpu(l.outputs, .95, l.rolling_mean, 1); axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1); scal_cpu(l.outputs, .95, l.rolling_variance, 1); axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1); copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1); copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); } else { normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1); } scale_bias(l.output, l.scales, l.batch, l.outputs, 1); } for(i = 0; i < l.batch; ++i){ axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1); } activate_array(l.output, l.outputs*l.batch, l.activation); }
void forward_local_layer(const local_layer l, network_state state) { int out_h = local_out_height(l); int out_w = local_out_width(l); int i, j; int locations = out_h * out_w; for (i = 0; i < l.batch; ++i) { copy_cpu(l.outputs, l.biases, 1, l.output + i * l.outputs, 1); } for (i = 0; i < l.batch; ++i) { float *input = state.input + i * l.w * l.h * l.c; im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, l.col_image); float *output = l.output + i * l.outputs; for (j = 0; j < locations; ++j) { float *a = l.weights + j * l.size * l.size * l.c * l.n; float *b = l.col_image + j; float *c = output + j; int m = l.n; int n = 1; int k = l.size * l.size * l.c; gemm(0, 0, m, n, k, 1, a, k, b, locations, 1, c, locations); } } activate_array(l.output, l.outputs * l.batch, l.activation); }
matrix< complex<T> > gemm( const matrix<T> &a, const char transa, const matrix< complex<T> > &b, const char transb ) { matrix< complex<T> > temp(a.nrow,a.ncol); for (int irow=0;irow<a.nrow;++irow) for (int icol=0;icol<a.ncol;++icol) temp(irow,icol) = a(irow,icol); return gemm(temp,transa,b,transb); }
void gemm(const value_type& alpha, const matrix_type_a &a, const matrix_type_b &b, const value_type &beta, matrix_type_c &c ) { gemm( traits::NO_TRANSPOSE, traits::NO_TRANSPOSE, alpha, a, b, beta, c ) ; }
matrix< complex<T> > gemm(const matrix< complex<T> > &a, const char transa, const matrix<T> &b, const char transb) { matrix< complex<T> > temp(b.nrow,b.ncol); for (int irow=0;irow<b.nrow;++irow) for (int icol=0;icol<b.ncol;++icol) temp(irow,icol) = b(irow,icol); return gemm(a,transa,temp,transb); }
void backward_local_layer(local_layer l, network_state state) { int i, j; int locations = l.out_w*l.out_h; gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); for(i = 0; i < l.batch; ++i){ axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); } for(i = 0; i < l.batch; ++i){ float *input = state.input + i*l.w*l.h*l.c; im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, l.col_image); for(j = 0; j < locations; ++j){ float *a = l.delta + i*l.outputs + j; float *b = l.col_image + j; float *c = l.filter_updates + j*l.size*l.size*l.c*l.n; int m = l.n; int n = l.size*l.size*l.c; int k = 1; gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); } if(state.delta){ for(j = 0; j < locations; ++j){ float *a = l.filters + j*l.size*l.size*l.c*l.n; float *b = l.delta + i*l.outputs + j; float *c = l.col_image + j; int m = l.size*l.size*l.c; int n = 1; int k = l.n; gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); } col2im_cpu(l.col_image, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w); } } }
void gemm(Matrix& result, double beta, const Matrix& left, bool transposeLeft, double alpha, const Matrix& right, bool transposeRight) { assert(left.isLeadingDimensionContiguous()); assert(right.isLeadingDimensionContiguous()); assert(result.isLeadingDimensionContiguous()); gemm(DynamicView(result), beta, ConstDynamicView(left), transposeLeft, alpha, ConstDynamicView(right), transposeRight); }
/* *-------------------------------------------------------------------------------------- * Class: States * Method: States :: dynamics * Description: Apply motion model to state and return predicted state. *-------------------------------------------------------------------------------------- */ States States::dynamics ( const Sensors& s ) { States predicted_state; Matx33d A; cv::Vec3d w; Matx33d Rb2w, Rw2b; Rb2w = s.quaternion.rotation(); Rw2b = Rb2w.t(); w =cv::Vec3d(s.angular_velocity); Vec3d gw(0,0,GRAVITY); A = Matx33d( 0, -w[2], w[1], w[2], 0, -w[0], -w[1], w[0], 0 ); // Generalized matrix multiplication gemm( Rb2w, V, 1, Mat(), 0, predicted_state.X ); gemm( -A, V, 1, s.acceleration, 1, predicted_state.V ); gemm( Rw2b, gw, -1, predicted_state.V, 1, predicted_state.V); Fiter pib=features.begin(); for( int i=0; pib!=features.end(); ++pib,++i ) { Feature fi; cv::Vec3d bp; bp = (*pib)->get_body_position(); fi.set_body_position( cv::Vec3d( (-V[1] + bp[0]*V[0])*bp[2] + bp[1]*w[0] - (1 + bp[0]*bp[0])*w[2] + bp[0]*bp[1]*w[1], (-V[2] + bp[1]*V[0])*bp[2] - bp[0]*w[0] + (1 + bp[1]*bp[1])*w[1] - bp[0]*bp[1]*w[2], (-w[2] * bp[0]+w[1] *bp[1])* bp[2]+V[0] * bp[2]*bp[2]) ); predicted_state.addFeature(fi); } V-=b; b=cv::Vec3d(0,0,0); return predicted_state; } /* ----- end of method States::dynamics ----- */
void backward_deconvolutional_layer(layer l, network_state state) { float alpha = 1./l.batch; int out_h = deconvolutional_out_height(l); int out_w = deconvolutional_out_width(l); int size = out_h*out_w; int i; gradient_array(l.output, size*l.n*l.batch, l.activation, l.delta); if(l.batch_normalize){ backward_batchnorm_layer(l, state); } else { backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h); } for(i = 0; i < l.batch; ++i){ int m = l.c; int n = l.size*l.size*l.n; int k = l.h*l.w; float *a = state.input + i*m*n; float *b = state.workspace; float *c = l.weight_updates; im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w, l.size, l.stride, 0, b); gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n); if(state.delta){ int m = l.c; int n = l.h*l.w; int k = l.size*l.size*l.n; float *a = l.weights; float *b = state.workspace; float *c = state.delta + i*n*m; gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); } } }
Matrix gemm(const Matrix& left, bool transposeLeft, double alpha, const Matrix& right, bool transposeRight) { size_t rows = transposeLeft ? left.size()[1] : left.size()[0]; size_t columns = transposeRight ? right.size()[0] : right.size()[1]; Matrix result({rows, columns}, left.precision()); gemm(result, 0.0, left, transposeLeft, alpha, right, transposeRight); return result; }
void backward_convolutional_layer(convolutional_layer l, network net) { int i, j; int m = l.n/l.groups; int n = l.size*l.size*l.c/l.groups; int k = l.out_w*l.out_h; gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); if(l.batch_normalize){ backward_batchnorm_layer(l, net); } else { backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); } for(i = 0; i < l.batch; ++i){ for(j = 0; j < l.groups; ++j){ float *a = l.delta + (i*l.groups + j)*m*k; float *b = net.workspace; float *c = l.weight_updates + j*l.nweights/l.groups; float *im = net.input+(i*l.groups + j)*l.c/l.groups*l.h*l.w; im2col_cpu(im, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); if(net.delta){ a = l.weights + j*l.nweights/l.groups; b = l.delta + (i*l.groups + j)*m*k; c = net.workspace; gemm(1,0,n,k,m,1,a,n,b,k,0,c,k); col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w); } } } }
void Math_gemm(void *fp) { F_Math_gemm *f = fp; int nrowa, ncola, nrowb, ncolb, mn, ld, m, n; double *adata = 0, *bdata = 0, *cdata; int nota = f->transa=='N'; int notb = f->transb=='N'; if(nota){ nrowa = f->m; ncola = f->k; }else{ nrowa = f->k; ncola = f->m; } if(notb){ nrowb = f->k; ncolb = f->n; }else{ nrowb = f->n; ncolb = f->k; } if( (!nota && f->transa!='C' && f->transa!='T') || (!notb && f->transb!='C' && f->transb!='T') || (f->m < 0 || f->n < 0 || f->k < 0) ){ error(exMathia); } if(f->a != H){ mn = f->a->len; adata = (double*)(f->a->data); ld = f->lda; if(ld<nrowa || ld*(ncola-1)>mn) error(exBounds); } if(f->b != H){ mn = f->b->len; ld = f->ldb; bdata = (double*)(f->b->data); if(ld<nrowb || ld*(ncolb-1)>mn) error(exBounds); } m = f->m; n = f->n; mn = f->c->len; cdata = (double*)(f->c->data); ld = f->ldc; if(ld<m || ld*(n-1)>mn) error(exBounds); gemm(f->transa, f->transb, f->m, f->n, f->k, f->alpha, adata, f->lda, bdata, f->ldb, f->beta, cdata, f->ldc); }
void bi::cross(const M1 X, const M2 Y, const V1 muX, const V2 muY, M3 SigmaXY) { /* pre-conditions */ BI_ASSERT(X.size2() == muX.size()); BI_ASSERT(Y.size2() == muY.size()); BI_ASSERT(X.size1() == Y.size1()); BI_ASSERT(SigmaXY.size1() == muX.size() && SigmaXY.size2() == muY.size()); const int N = X.size1(); gemm(1.0/(N - 1.0), X, Y, 0.0, SigmaXY, 'T', 'N'); ger(-N/(N - 1.0), muX, muY, SigmaXY); }
void forward_connected_layer(connected_layer l, network_state state) { int i; for(i = 0; i < l.batch; ++i){ copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); } int m = l.batch; int k = l.inputs; int n = l.outputs; float *a = state.input; float *b = l.weights; float *c = l.output; gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); activate_array(l.output, l.outputs*l.batch, l.activation); }
const Mat& KalmanFilter::predict(const Mat& control) { // update the state: x'(k) = A*x(k) statePre = transitionMatrix*statePost; if( control.data ) // x'(k) = x'(k) + B*u(k) statePre += controlMatrix*control; // update error covariance matrices: temp1 = A*P(k) temp1 = transitionMatrix*errorCovPost; // P'(k) = temp1*At + Q gemm(temp1, transitionMatrix, 1, processNoiseCov, 1, errorCovPre, GEMM_2_T); return statePre; }
void NEGEMMAArch32Kernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); const int lda = _input0->info()->strides_in_bytes().y() / sizeof(float); const int ldb = _input1->info()->strides_in_bytes().y() / sizeof(float); const int ldc = _output->info()->strides_in_bytes().y() / sizeof(float); const auto in1_ptr = reinterpret_cast<const float *>(_input1->buffer()); const int M = std::min(_output->info()->tensor_shape().y(), static_cast<size_t>(window.y().end())) - window.y().start(); const int N = _output->info()->tensor_shape().x(); const int K = _input0->info()->tensor_shape().x(); // Only iterate over batches Window win(window); win.set(0, Window::Dimension(0, 1, 1)); win.set(1, Window::Dimension(0, 1, 1)); Iterator in0(_input0, window); Iterator out(_output, window); GemmInterleaved<sgemm_8x6, float, float> gemm(&info.cpu_info, M, N, K, !_transform_0, !_transform_1); constexpr size_t alignment = 4096; const size_t offset = (gemm.get_working_size() + alignment - 1) * info.thread_id; void *workspace = _workspace->buffer() + offset; size_t workspace_size = _workspace->info()->total_size(); if(support::cpp11::align(alignment, gemm.get_working_size(), workspace, workspace_size) == nullptr) { ARM_COMPUTE_ERROR("Not enough space to align buffer!"); } execute_window_loop(win, [&](const Coordinates & id) { gemm.execute(reinterpret_cast<const float *>(in0.ptr()), lda, reinterpret_cast<const float *>(in1_ptr), ldb, reinterpret_cast<float *>(out.ptr()), ldc, _alpha, _beta, workspace); }, in0, out); }
bool add(RealMatrix &A, const REAL alpha, const REAL beta, const RealMatrix &B) { /// A = alpha*A + beta*B bool flag = true; RealSquare *X = new RealSquare(B.col); X->unitise(); if (NULL == &A || NULL == &B) { flag = false; goto end; } if (A.row != B.row || A.col != B.col) { flag = false; goto end; } gemm(A, alpha, beta, *X, B); end: delete X; X = NULL; return flag; }
//------------------------------------------------------------------------------ // cv::subspace::project //------------------------------------------------------------------------------ Mat cv::subspace::project(InputArray _W, InputArray _mean, InputArray _src) { // get data matrices Mat W = _W.getMat(); Mat mean = _mean.getMat(); Mat src = _src.getMat(); // create temporary matrices Mat X, Y; // copy data & make sure we are using the correct type src.convertTo(X, W.type()); // get number of samples and dimension int n = X.rows; int d = X.cols; // center the data if correct aligned sample mean is given if(mean.total() == d) subtract(X, repeat(mean.reshape(1,1), n, 1), X); // finally calculate projection as Y = (X-mean)*W gemm(X, W, 1.0, Mat(), 0.0, Y); return Y; }
/* Generates <sample> from multivariate normal distribution, where <mean> - is an average row vector, <cov> - symmetric covariation matrix */ void randMVNormal( InputArray _mean, InputArray _cov, int nsamples, OutputArray _samples ) { Mat mean = _mean.getMat(), cov = _cov.getMat(); int dim = (int)mean.total(); _samples.create(nsamples, dim, CV_32F); Mat samples = _samples.getMat(); randu(samples, 0., 1.); Mat utmat; Cholesky(cov, utmat); int flags = mean.cols == 1 ? 0 : GEMM_3_T; for( int i = 0; i < nsamples; i++ ) { Mat sample = samples.row(i); gemm(sample, utmat, 1, mean, 1, sample, flags); } }