C++ (Cpp) gemm 예제들

예제 #1

0

파일 보기

파일: convolutional_layer.c 프로젝트: qianglan/DirectConvOnTegra

void backward_convolutional_layer(convolutional_layer l, network_state state)
{
    int i;
    int m = l.n;
    int n = l.size*l.size*l.c;
    int k = convolutional_out_height(l)*
        convolutional_out_width(l);

    gradient_array(l.output, m*k*l.batch, l.activation, l.delta);
    backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);

    for(i = 0; i < l.batch; ++i){
        float *a = l.delta + i*m*k;
        float *b = l.col_image;
        float *c = l.filter_updates;

        float *im = state.input+i*l.c*l.h*l.w;

        im2col_cpu(im, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);

        if(state.delta){
            a = l.filters;
            b = l.delta + i*m*k;
            c = l.col_image;

            gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

            col2im_cpu(l.col_image, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
        }
    }
}

예제 #2

0

파일 보기

파일: connected_layer.c 프로젝트: GYZHikari/darknet

void backward_connected_layer(connected_layer l, network_state state)
{
    int i;
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }
    int m = l.outputs;
    int k = l.batch;
    int n = l.inputs;
    float *a = l.delta;
    float *b = state.input;
    float *c = l.weight_updates;
    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = l.batch;
    k = l.outputs;
    n = l.inputs;

    a = l.delta;
    b = l.weights;
    c = state.delta;

    if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}

예제 #3

0

파일 보기

파일: connected_layer.cpp 프로젝트: abggcv/darknet-1

void backward_connected_layer(connected_layer l, network_state state)
{
    int i;
    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }
    if(l.batch_normalize){
        backward_scale_cpu(l.x_norm, l.delta, l.batch, l.outputs, 1, l.scale_updates);

        scale_bias(l.delta, l.scales, l.batch, l.outputs, 1);

        mean_delta_cpu(l.delta, l.variance, l.batch, l.outputs, 1, l.mean_delta);
        variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.outputs, 1, l.variance_delta);
        normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.outputs, 1, l.delta);
    }

    int m = l.outputs;
    int k = l.batch;
    int n = l.inputs;
    float *a = l.delta;
    float *b = state.input;
    float *c = l.weight_updates;
    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

    m = l.batch;
    k = l.outputs;
    n = l.inputs;

    a = l.delta;
    b = l.weights;
    c = state.delta;

    if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}

예제 #4

0

파일 보기

파일: MCGLS.hpp 프로젝트: kvahed/codeare

/**
 * @brief         Tikhonov regulated CGNR least squares solution of ||Ax - b||_2 + l * ||x||_2
 *
 * @param  A      Matrix A
 * @param  b      Vector b
 * @param  maxit  Maximum number of CG iterations
 * @param  conv   Convergence of residuals
 * @param  lambda Tikhonov weight
 * @return        Vector x
 */
template<class T> static Matrix<T> 
MCGLS (const Matrix<T>& A, const Matrix<T>& b, const size_t& maxit = 100,
		const double& conv = 1.0e-6, const double& lambda = 0.0) {
	
	size_t ah   = size(A, 0);
	size_t bh   = size(b, 0);
	size_t bw   = size(b, 1);
	
	assert (bw == 1);  // Column vector x
	assert (ah == bh); // Check inner dimensions of A'*x. 
	
	ticks tic   = getticks();
	
	Matrix<T> p = gemm (A, b, 'C');
	Matrix<T> r = p;
	
	Matrix<T> x = p;
	Matrix<T> q;
	
	T         ts;
	
	float     rn = 0.0;
	float     xn = pow(creal(norm(p)), 2);
	
	std::vector<double> res;
	
	for (size_t i = 0; i < maxit; i++) {
		
		rn  = pow(creal(norm(r)), 2);
		res.push_back(rn/xn);
		if (std::isnan(res.at(i)) || res.at(i) <= conv) break;
		
		if (i % 5 == 0 && i > 0) printf ("\n");
		printf ("    %03lu %.7f", i, res.at(i));
		
		q   = gemm (A, p);
		q   = gemm (A, q, 'C');
		q  += lambda * p;
		
		ts  = (rn / (p.dotc(q)));
		x  += (p * ts);
		r  -= (q * ts);
		p  *= pow(creal(norm(r)), 2)/rn;
		p  += r;
		
	}
	
	printf ("\n  MCGLS time: %.4f s\n", elapsed(getticks(), tic) /
			Toolbox::Instance()->ClockRate());
	
	return x;
	
}

예제 #5

0

파일 보기

파일: kalman.cpp 프로젝트: colombc/Sankore-ThirdParty

const Mat& KalmanFilter::correct(const Mat& measurement)
{
    // temp2 = H*P'(k)
    temp2 = measurementMatrix * errorCovPre;

    // temp3 = temp2*Ht + R
    gemm(temp2, measurementMatrix, 1, measurementNoiseCov, 1, temp3, GEMM_2_T); 

    // temp4 = inv(temp3)*temp2 = Kt(k)
    solve(temp3, temp2, temp4, DECOMP_SVD);

    // K(k)
    gain = temp4.t();
    
    // temp5 = z(k) - H*x'(k)
    temp5 = measurement - measurementMatrix*statePre;

    // x(k) = x'(k) + K(k)*temp5
    statePost = statePre + gain*temp5;

    // P(k) = P'(k) - K(k)*temp2
    errorCovPost = errorCovPre - gain*temp2;

    return statePost;
}

예제 #6

0

파일 보기

파일: deconvolutional_layer.c 프로젝트: Darzu/darknet

void forward_deconvolutional_layer(const layer l, network_state state)
{
    int i;
    int out_h = l.out_h;
    int out_w = l.out_w;
    int size = out_h*out_w;

    int m = l.size*l.size*l.n;
    int n = l.h*l.w;
    int k = l.c;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    for(i = 0; i < l.batch; ++i){
        float *a = l.weights;
        float *b = state.input + i*l.c*l.h*l.w;
        float *c = state.workspace;

        gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);

        col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size);
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }
    activate_array(l.output, l.batch*l.n*size, l.activation);
}

예제 #7

0

파일 보기

파일: eigenfaces.cpp 프로젝트: mzhao971/ECSE415_CV_faceRecognition

Mat Eigenfaces::reconstruct(const Mat& src) {
	Mat W = _eigenvectors;
	Mat mean = _mean;
	// get number of samples and dimension
	int n = src.rows;
	int d = src.cols;
	// make sure the data has the correct shape
	if (W.cols != d) {
		string error_message = format("Wrong shapes for given matrices. Was size(src) = (%d,%d), size(W) = (%d,%d).", src.rows, src.cols, W.rows, W.cols);
		CV_Error(CV_StsBadArg, error_message);
	}
	// make sure mean is correct if not empty
	if (!mean.empty() && (mean.total() != W.rows)) {
		string error_message = format("Wrong mean shape for the given eigenvector matrix. Expected %d, but was %d.", W.cols, mean.total());
		CV_Error(CV_StsBadArg, error_message);
	}
	// initalize temporary matrices
	Mat X, Y;
	// copy data & make sure we are using the correct type
	src.convertTo(Y, W.type());
	// calculate the reconstruction
	gemm(Y, W, 1.0, Mat(), 0.0, X, GEMM_2_T);
	// safe to do because of above assertion
	if (!mean.empty()) {
		for (int i = 0; i<n; i++) {
			Mat r_i = X.row(i);
			add(r_i, mean.reshape(1, 1), r_i);
		}
	}
	return X;
}

예제 #8

0

파일 보기

파일: convolutional_layer.c 프로젝트: yangmeitang/darknet

void forward_convolutional_layer(const convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    bias_output(l.output, l.biases, l.batch, l.n, out_h*out_w);

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = l.col_image;
    float *c = l.output;

    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
            l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }
    activate_array(l.output, m*n*l.batch, l.activation);
}

예제 #9

0

파일 보기

파일: convolutional_layer.c 프로젝트: EricDoug/darknet

void forward_convolutional_layer(convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    /*
       if(l.binary){
       binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters);
       binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales);
       swap_binary(&l);
       }
     */

    if(l.binary){
        int m = l.n;
        int k = l.size*l.size*l.c;
        int n = out_h*out_w;

        char  *a = l.cfilters;
        float *b = state.workspace;
        float *c = l.output;

        for(i = 0; i < l.batch; ++i){
            im2col_cpu(state.input, l.c, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm_bin(m,n,k,1,a,k,b,n,c,n);
            c += n*m;
            state.input += l.c*l.h*l.w;
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
        add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
        activate_array(l.output, m*n*l.batch, l.activation);
        return;
    }

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = state.workspace;
    float *c = l.output;

    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    }
    add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);

    activate_array(l.output, m*n*l.batch, l.activation);
}

예제 #10

0

파일 보기

파일: eigenfaces.cpp 프로젝트: mzhao971/ECSE415_CV_faceRecognition

Mat Eigenfaces::project(const Mat& src) {
	Mat W = _eigenvectors;
	Mat mean = _mean;
	// get number of samples and dimension
	int n = src.rows;
	int d = src.cols;
	// make sure the data has the correct shape
	if (W.rows != d) {
		string error_message = format("Wrong shapes for given matrices. Was size(src) = (%d,%d), size(W) = (%d,%d).", src.rows, src.cols, W.rows, W.cols);
		CV_Error(CV_StsBadArg, error_message);
	}
	// make sure mean is correct if not empty
	if (!mean.empty() && (mean.total() != d)) {
		string error_message = format("Wrong mean shape for the given data matrix. Expected %d, but was %d.", d, mean.total());
		CV_Error(CV_StsBadArg, error_message);
	}
	// create temporary matrices
	Mat X, Y;
	// make sure you operate on correct type
	src.convertTo(X, W.type());
	// safe to do, because of above assertion
	// safe to do, because of above assertion
	if (!mean.empty()) {
		for (int i = 0; i<n; i++) {
			Mat r_i = X.row(i);
			subtract(r_i, mean.reshape(1, 1), r_i);
		}
	}
	// finally calculate projection as Y = (X-mean)*W
	gemm(X, W, 1.0, Mat(), 0.0, Y);
	return Y;
}

예제 #11

0

파일 보기

파일: gemm.hpp 프로젝트: QuentinFiard/shark

void gemm(
	matrix_expression<MatrA> const &matA,
	matrix_expression<MatrB> const &matB,
	matrix_expression<MatrC>& matC, 
	typename MatrC::value_type alpha,
	boost::mpl::true_
) {
	SIZE_CHECK(matA().size1() == matC().size1());
	SIZE_CHECK(matB().size2() == matC().size2());
	SIZE_CHECK(matA().size2()== matB().size1());
	
	CBLAS_TRANSPOSE transA = traits::same_orientation(matA,matC)?CblasNoTrans:CblasTrans;
	CBLAS_TRANSPOSE transB = traits::same_orientation(matB,matC)?CblasNoTrans:CblasTrans;
	std::size_t m = matC().size1();
	std::size_t n = matC().size2();
	std::size_t k = matA().size2();
	CBLAS_ORDER stor_ord = (CBLAS_ORDER) storage_order<typename MatrC::orientation >::value;

	gemm(stor_ord, transA, transB, (int)m, (int)n, (int)k, alpha,
		traits::storage(matA()),
		traits::leading_dimension(matA()),
		traits::storage(matB()),
		traits::leading_dimension(matB()),
		typename MatrC::value_type(1),
		traits::storage(matC()),
		traits::leading_dimension(matC())
	);
}

예제 #12

0

파일 보기

파일: connected_layer.cpp 프로젝트: abggcv/darknet-1

void forward_connected_layer(connected_layer l, network_state state)
{
    int i;
    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float *a = state.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(l.batch_normalize){
        if(state.train){
            mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
            variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);

            scal_cpu(l.outputs, .95, l.rolling_mean, 1);
            axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1);
            scal_cpu(l.outputs, .95, l.rolling_variance, 1);
            axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1);

            copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1);   
            copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1);
        }
        scale_bias(l.output, l.scales, l.batch, l.outputs, 1);
    }
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1);
    }
    activate_array(l.output, l.outputs*l.batch, l.activation);
}

예제 #13

0

파일 보기

파일: local_layer.c 프로젝트: dagoliveira/radiation-benchmarks

void forward_local_layer(const local_layer l, network_state state) {
	int out_h = local_out_height(l);
	int out_w = local_out_width(l);
	int i, j;
	int locations = out_h * out_w;

	for (i = 0; i < l.batch; ++i) {
		copy_cpu(l.outputs, l.biases, 1, l.output + i * l.outputs, 1);
	}

	for (i = 0; i < l.batch; ++i) {
		float *input = state.input + i * l.w * l.h * l.c;
		im2col_cpu(input, l.c, l.h, l.w, l.size, l.stride, l.pad, l.col_image);
		float *output = l.output + i * l.outputs;
		for (j = 0; j < locations; ++j) {
			float *a = l.weights + j * l.size * l.size * l.c * l.n;
			float *b = l.col_image + j;
			float *c = output + j;

			int m = l.n;
			int n = 1;
			int k = l.size * l.size * l.c;

			gemm(0, 0, m, n, k, 1, a, k, b, locations, 1, c, locations);
		}
	}
	activate_array(l.output, l.outputs * l.batch, l.activation);
}

예제 #14

0

파일 보기

파일: matrices.hpp 프로젝트: farnsworth/ising1D

matrix< complex<T> > gemm( const matrix<T> &a, const char transa, const matrix< complex<T> > &b, const char transb )
{
  matrix< complex<T> > temp(a.nrow,a.ncol);
  for (int irow=0;irow<a.nrow;++irow)
    for (int icol=0;icol<a.ncol;++icol)
      temp(irow,icol) = a(irow,icol);
  return gemm(temp,transa,b,transb);
}

예제 #15

0

파일 보기

파일: blas3.hpp 프로젝트: KratosCSIC/trunk

  void gemm(const value_type& alpha,
	    const matrix_type_a &a,
	    const matrix_type_b &b,
	    const value_type &beta,
	    matrix_type_c &c
	    )
  {
    gemm( traits::NO_TRANSPOSE, traits::NO_TRANSPOSE, alpha, a, b, beta, c ) ;
  }

예제 #16

0

파일 보기

파일: matrices.hpp 프로젝트: farnsworth/ising1D

matrix< complex<T> > gemm(const matrix< complex<T> > &a, const char transa, const matrix<T> &b, const char transb)
{
  matrix< complex<T> > temp(b.nrow,b.ncol);
  for (int irow=0;irow<b.nrow;++irow)
    for (int icol=0;icol<b.ncol;++icol)
      temp(irow,icol) = b(irow,icol);
  
  return gemm(a,transa,temp,transb);
}

예제 #17

0

파일 보기

파일: local_layer.c 프로젝트: Benjamin-Vencill/Darknet-Yolo

void backward_local_layer(local_layer l, network_state state)
{
    int i, j;
    int locations = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }

    for(i = 0; i < l.batch; ++i){
        float *input = state.input + i*l.w*l.h*l.c;
        im2col_cpu(input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, l.col_image);

        for(j = 0; j < locations; ++j){ 
            float *a = l.delta + i*l.outputs + j;
            float *b = l.col_image + j;
            float *c = l.filter_updates + j*l.size*l.size*l.c*l.n;
            int m = l.n;
            int n = l.size*l.size*l.c;
            int k = 1;

            gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
        }

        if(state.delta){
            for(j = 0; j < locations; ++j){ 
                float *a = l.filters + j*l.size*l.size*l.c*l.n;
                float *b = l.delta + i*l.outputs + j;
                float *c = l.col_image + j;

                int m = l.size*l.size*l.c;
                int n = 1;
                int k = l.n;

                gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
            }

            col2im_cpu(l.col_image, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
        }
    }
}

예제 #18

0

파일 보기

파일: blas_operations.cpp 프로젝트: DLlearn/persistent-rnn

void gemm(Matrix& result, double beta,
    const Matrix& left,  bool transposeLeft, double alpha,
    const Matrix& right, bool transposeRight)
{
    assert(left.isLeadingDimensionContiguous());
    assert(right.isLeadingDimensionContiguous());
    assert(result.isLeadingDimensionContiguous());

    gemm(DynamicView(result), beta, ConstDynamicView(left), transposeLeft, alpha,
        ConstDynamicView(right), transposeRight);
}

예제 #19

0

파일 보기

파일: states.cpp 프로젝트: hongbin8237/slam

/*
 *--------------------------------------------------------------------------------------
 *       Class:  States
 *      Method:  States :: dynamics
 * Description:  Apply motion model to state and return predicted state.
 *--------------------------------------------------------------------------------------
 */
    States
States::dynamics ( const Sensors& s )
{
    States predicted_state;
    Matx33d A;
    cv::Vec3d w;
    Matx33d Rb2w, Rw2b; 

    Rb2w = s.quaternion.rotation();
    Rw2b = Rb2w.t();

    w =cv::Vec3d(s.angular_velocity);

    Vec3d gw(0,0,GRAVITY); 
    A = Matx33d( 0, -w[2], w[1],
            w[2], 0, -w[0],
            -w[1], w[0], 0 );
    
    // Generalized matrix multiplication
    gemm( Rb2w, V, 1, Mat(), 0, predicted_state.X );

    gemm( -A, V, 1, s.acceleration, 1, predicted_state.V );
    gemm( Rw2b, gw, -1, predicted_state.V, 1, predicted_state.V);

    Fiter pib=features.begin();
    for( int i=0; pib!=features.end(); ++pib,++i )
    {
        Feature fi;
        cv::Vec3d bp;
        bp = (*pib)->get_body_position();
        fi.set_body_position( cv::Vec3d( 
            (-V[1] + bp[0]*V[0])*bp[2] + bp[1]*w[0] - (1 + bp[0]*bp[0])*w[2] + bp[0]*bp[1]*w[1],
            (-V[2] + bp[1]*V[0])*bp[2] - bp[0]*w[0] + (1 + bp[1]*bp[1])*w[1] - bp[0]*bp[1]*w[2],
            (-w[2] * bp[0]+w[1] *bp[1])* bp[2]+V[0] *      bp[2]*bp[2])
        );
        predicted_state.addFeature(fi);
    }
    V-=b;
    b=cv::Vec3d(0,0,0);
    return predicted_state;
}		/* -----  end of method States::dynamics  ----- */

예제 #20

0

파일 보기

파일: deconvolutional_layer.c 프로젝트: Darzu/darknet

void backward_deconvolutional_layer(layer l, network_state state)
{
    float alpha = 1./l.batch;
    int out_h = deconvolutional_out_height(l);
    int out_w = deconvolutional_out_width(l);
    int size = out_h*out_w;
    int i;

    gradient_array(l.output, size*l.n*l.batch, l.activation, l.delta);
    if(l.batch_normalize){
        backward_batchnorm_layer(l, state);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
    }

    for(i = 0; i < l.batch; ++i){
        int m = l.c;
        int n = l.size*l.size*l.n;
        int k = l.h*l.w;

        float *a = state.input + i*m*n;
        float *b = state.workspace;
        float *c = l.weight_updates;

        im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w, 
                l.size, l.stride, 0, b);
        gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n);

        if(state.delta){
            int m = l.c;
            int n = l.h*l.w;
            int k = l.size*l.size*l.n;

            float *a = l.weights;
            float *b = state.workspace;
            float *c = state.delta + i*n*m;

            gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }
}

예제 #21

0

파일 보기

파일: blas_operations.cpp 프로젝트: DLlearn/persistent-rnn

Matrix gemm(const Matrix& left, bool transposeLeft, double alpha,
    const Matrix& right, bool transposeRight)
{
    size_t rows    = transposeLeft  ? left.size()[1]  : left.size()[0];
    size_t columns = transposeRight ? right.size()[0] : right.size()[1];

    Matrix result({rows, columns}, left.precision());

    gemm(result, 0.0, left, transposeLeft, alpha, right, transposeRight);

    return result;
}

예제 #22

0

파일 보기

파일: convolutional_layer.c 프로젝트: dicksonyuan/lightnet

void backward_convolutional_layer(convolutional_layer l, network net)
{
    int i, j;
    int m = l.n/l.groups;
    int n = l.size*l.size*l.c/l.groups;
    int k = l.out_w*l.out_h;

    gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);

    if(l.batch_normalize){
        backward_batchnorm_layer(l, net);
    } else {
        backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
    }

    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.delta + (i*l.groups + j)*m*k;
            float *b = net.workspace;
            float *c = l.weight_updates + j*l.nweights/l.groups;

            float *im = net.input+(i*l.groups + j)*l.c/l.groups*l.h*l.w;

            im2col_cpu(im, l.c/l.groups, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);

            if(net.delta){
                a = l.weights + j*l.nweights/l.groups;
                b = l.delta + (i*l.groups + j)*m*k;
                c = net.workspace;

                gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);

                col2im_cpu(net.workspace, l.c/l.groups, l.h, l.w, l.size, l.stride, 
                    l.pad, net.delta + (i*l.groups + j)*l.c/l.groups*l.h*l.w);
            }
        }
    }
}

예제 #23

0

파일 보기

파일: math.c 프로젝트: JamesLinus/inferno

void
Math_gemm(void *fp)
{
	F_Math_gemm *f = fp;
	int nrowa, ncola, nrowb, ncolb, mn, ld, m, n;
	double *adata = 0, *bdata = 0, *cdata;
	int nota = f->transa=='N';
	int notb = f->transb=='N';
	if(nota){
		nrowa = f->m;
		ncola = f->k;
	}else{
		nrowa = f->k;
		ncola = f->m;
	}
	if(notb){
		nrowb = f->k;
		ncolb = f->n;
	}else{
		nrowb = f->n;
		ncolb = f->k;
	}
	if(     (!nota && f->transa!='C' && f->transa!='T') ||
		(!notb && f->transb!='C' && f->transb!='T') ||
		(f->m < 0 || f->n < 0 || f->k < 0) ){
		error(exMathia);
	}
	if(f->a != H){
		mn = f->a->len;
		adata = (double*)(f->a->data);
		ld = f->lda;
		if(ld<nrowa || ld*(ncola-1)>mn)
			error(exBounds);
	}
	if(f->b != H){
		mn = f->b->len;
		ld = f->ldb;
		bdata = (double*)(f->b->data);
		if(ld<nrowb || ld*(ncolb-1)>mn)
			error(exBounds);
	}
	m = f->m;
	n = f->n;
	mn = f->c->len;
	cdata = (double*)(f->c->data);
	ld = f->ldc;
	if(ld<m || ld*(n-1)>mn)
		error(exBounds);

	gemm(f->transa, f->transb, f->m, f->n, f->k, f->alpha,
		adata, f->lda, bdata, f->ldb, f->beta, cdata, f->ldc);
}

예제 #24

0

파일 보기

파일: misc.hpp 프로젝트: milthorpe/LibBi

void bi::cross(const M1 X, const M2 Y, const V1 muX, const V2 muY,
    M3 SigmaXY) {
  /* pre-conditions */
  BI_ASSERT(X.size2() == muX.size());
  BI_ASSERT(Y.size2() == muY.size());
  BI_ASSERT(X.size1() == Y.size1());
  BI_ASSERT(SigmaXY.size1() == muX.size() && SigmaXY.size2() == muY.size());

  const int N = X.size1();

  gemm(1.0/(N - 1.0), X, Y, 0.0, SigmaXY, 'T', 'N');
  ger(-N/(N - 1.0), muX, muY, SigmaXY);
}

예제 #25

0

파일 보기

파일: connected_layer.c 프로젝트: GYZHikari/darknet

void forward_connected_layer(connected_layer l, network_state state)
{
    int i;
    for(i = 0; i < l.batch; ++i){
        copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1);
    }
    int m = l.batch;
    int k = l.inputs;
    int n = l.outputs;
    float *a = state.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    activate_array(l.output, l.outputs*l.batch, l.activation);
}

예제 #26

0

파일 보기

파일: kalman.cpp 프로젝트: colombc/Sankore-ThirdParty

const Mat& KalmanFilter::predict(const Mat& control)
{
    // update the state: x'(k) = A*x(k)
    statePre = transitionMatrix*statePost;

    if( control.data )
        // x'(k) = x'(k) + B*u(k)
        statePre += controlMatrix*control;

    // update error covariance matrices: temp1 = A*P(k)
    temp1 = transitionMatrix*errorCovPost;

    // P'(k) = temp1*At + Q
    gemm(temp1, transitionMatrix, 1, processNoiseCov, 1, errorCovPre, GEMM_2_T);

    return statePre;
}

예제 #27

0

파일 보기

파일: NEGEMMAArch32Kernel.cpp 프로젝트: zhaofenqiang/ACLPerformanceTest

void NEGEMMAArch32Kernel::run(const Window &window, const ThreadInfo &info)
{
    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);

    const int lda = _input0->info()->strides_in_bytes().y() / sizeof(float);
    const int ldb = _input1->info()->strides_in_bytes().y() / sizeof(float);
    const int ldc = _output->info()->strides_in_bytes().y() / sizeof(float);

    const auto in1_ptr = reinterpret_cast<const float *>(_input1->buffer());

    const int M = std::min(_output->info()->tensor_shape().y(), static_cast<size_t>(window.y().end())) - window.y().start();
    const int N = _output->info()->tensor_shape().x();
    const int K = _input0->info()->tensor_shape().x();

    // Only iterate over batches
    Window win(window);
    win.set(0, Window::Dimension(0, 1, 1));
    win.set(1, Window::Dimension(0, 1, 1));

    Iterator in0(_input0, window);
    Iterator out(_output, window);

    GemmInterleaved<sgemm_8x6, float, float> gemm(&info.cpu_info, M, N, K, !_transform_0, !_transform_1);
    constexpr size_t alignment      = 4096;
    const size_t     offset         = (gemm.get_working_size() + alignment - 1) * info.thread_id;
    void            *workspace      = _workspace->buffer() + offset;
    size_t           workspace_size = _workspace->info()->total_size();

    if(support::cpp11::align(alignment, gemm.get_working_size(), workspace, workspace_size) == nullptr)
    {
        ARM_COMPUTE_ERROR("Not enough space to align buffer!");
    }

    execute_window_loop(win, [&](const Coordinates & id)
    {
        gemm.execute(reinterpret_cast<const float *>(in0.ptr()), lda,
                     reinterpret_cast<const float *>(in1_ptr), ldb,
                     reinterpret_cast<float *>(out.ptr()), ldc,
                     _alpha, _beta, workspace);
    },
    in0, out);
}

예제 #28

0

파일 보기

파일: dense.cpp 프로젝트: mdqyy/strategy_core

bool add(RealMatrix &A, const REAL alpha, const REAL beta, const RealMatrix &B) { /// A = alpha*A + beta*B
  bool flag = true;
  RealSquare *X = new RealSquare(B.col);
  X->unitise();
  if (NULL == &A || NULL == &B) {
    flag = false;
	  goto end;
  }
  if (A.row != B.row || A.col != B.col) {
    flag = false;
	  goto end;
  }

  gemm(A, alpha, beta, *X, B);

end:
  delete X; X = NULL;
  return flag;
}

예제 #29

0

파일 보기

파일: subspace.cpp 프로젝트: Abai/libfacerec

//------------------------------------------------------------------------------
// cv::subspace::project
//------------------------------------------------------------------------------
Mat cv::subspace::project(InputArray _W, InputArray _mean, InputArray _src) {
    // get data matrices
    Mat W = _W.getMat();
    Mat mean = _mean.getMat();
    Mat src = _src.getMat();
    // create temporary matrices
    Mat X, Y;
    // copy data & make sure we are using the correct type
    src.convertTo(X, W.type());
    // get number of samples and dimension
    int n = X.rows;
    int d = X.cols;
    // center the data if correct aligned sample mean is given
    if(mean.total() == d)
        subtract(X, repeat(mean.reshape(1,1), n, 1), X);
    // finally calculate projection as Y = (X-mean)*W
    gemm(X, W, 1.0, Mat(), 0.0, Y);
    return Y;
}

예제 #30

0

파일 보기

파일: inner_functions.cpp 프로젝트: 2december/opencv

/* Generates <sample> from multivariate normal distribution, where <mean> - is an
   average row vector, <cov> - symmetric covariation matrix */
void randMVNormal( InputArray _mean, InputArray _cov, int nsamples, OutputArray _samples )
{
    Mat mean = _mean.getMat(), cov = _cov.getMat();
    int dim = (int)mean.total();

    _samples.create(nsamples, dim, CV_32F);
    Mat samples = _samples.getMat();
    randu(samples, 0., 1.);

    Mat utmat;
    Cholesky(cov, utmat);
    int flags = mean.cols == 1 ? 0 : GEMM_3_T;

    for( int i = 0; i < nsamples; i++ )
    {
        Mat sample = samples.row(i);
        gemm(sample, utmat, 1, mean, 1, sample, flags);
    }
}