Exemplo n.º 1
0
void caffe_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
    const int N, const float alpha, const float* A, const float* x,
    const float beta, float* y) {
  cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
}
Exemplo n.º 2
0
 void wrapper_cblas_sgemv(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha,
                          const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY)
   {
              cblas_sgemv(Order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY);
   }
Exemplo n.º 3
0
	void Decoder::process(const float* input, float* output)
	{
		cblas_sgemv(CblasRowMajor, CblasNoTrans, m_number_of_loudspeakers, m_number_of_harmonics, 1.f, m_decoder_matrix_float, m_number_of_harmonics, input, 1, 0.f, output, 1);
	}
Exemplo n.º 4
0
int main(int argc, char **argv)
{
	int rows = atoi(argv[1]);
	int cols = atoi(argv[2]);

	float *A, *B, *C;
	A = (float *)malloc(rows * cols * sizeof(float));
	B = (float *)malloc(cols * sizeof(float));
	C = (float *)malloc(rows * sizeof(float));

	// init
	for(int i=0; i<rows; i++) {
		for(int j=0; j<cols; j++) {
			//A[i * cols + j] = j;
			A[i * cols + j] = i;
		}
	}

	for(int i=0; i<cols; i++) {
		B[i] = 1.f;
	}


	float alpha = 1.f;
	float beta = 0.f;

	// timer
	struct timeval start_event, end_event;
	struct timezone tz;

	if (gettimeofday(&start_event, &tz) == -1)                                     
		perror("Error: calling gettimeofday() not successful.\n");

	cblas_sgemv(CblasRowMajor, 
			CblasTrans,
			rows,
			cols,
			alpha,
			A,
			rows,
			B,
			1,
			beta,
			C,
			1);

	if (gettimeofday(&end_event, &tz) == -1)                                     
		perror("Error: calling gettimeofday() not successful.\n");


	double runtime_ms = ((double)(end_event.tv_sec-start_event.tv_sec) * 1000.0 +
			(double)(end_event.tv_usec-start_event.tv_usec)) / 1000.0;

	printf("runtime %f (ms)\n", runtime_ms);


#if DBG
	// print
	for(int i=0; i<rows; i++) {
		printf("%f ", C[i]);
	}
	printf("\n");
#endif

	free(A);
	free(B);
	free(C);

	return 0;
}
Exemplo n.º 5
0
    void DecoderIrregular::process(const float* input, float* output)
	{
		cblas_sgemv(CblasRowMajor, CblasNoTrans, m_number_of_channels, m_number_of_harmonics, 1.f, m_decoder_matrix_float, m_number_of_harmonics, input, 1, 0.f, output, 1);
	}
Exemplo n.º 6
0
 inline static void f (const  CBLAS_ORDER order,
              const  CBLAS_TRANSPOSE trans, const int m, const int n,
              const float alpha, const float * a, const int lda,
              const float * x, const int incx, float beta,
              float * y, const int incy)
  { cblas_sgemv(order, trans, m, n, alpha, a, lda, x, incx, beta, y, incy);}
Exemplo n.º 7
0
void THBlas_matVec(int trans, long nRow, long nColumn, real alpha, real *m, long mStride, real *x, long xStride, real beta, real *y, long yStride)
{
  if(nColumn == 1)
    mStride = nRow;
  
#if USE_CBLAS
  if( (nRow < INT_MAX) && (nColumn < INT_MAX) && (mStride < INT_MAX)  && (xStride < INT_MAX) && (yStride < INT_MAX) )
  {
#ifdef USE_DOUBLE
    if(trans)
      cblas_dgemv(CblasColMajor, CblasTrans, nRow, nColumn, alpha, m, mStride, x, xStride, beta, y, yStride);
    else
      cblas_dgemv(CblasColMajor, CblasNoTrans, nRow, nColumn, alpha, m, mStride, x, xStride, beta, y, yStride);
#else
    if(trans)
      cblas_sgemv(CblasColMajor, CblasTrans, nRow, nColumn, alpha, m, mStride, x, xStride, beta, y, yStride);
    else
      cblas_sgemv(CblasColMajor, CblasNoTrans, nRow, nColumn, alpha, m, mStride, x, xStride, beta, y, yStride);
#endif
    return;
  }
#endif
  {
    long r, c;

    if(trans)
    {
      if(beta == 1)
      {
        for(c = 0; c < nColumn; c++)
        {
          real sum = 0;
          real *column_ = m+mStride*c;
          for(r = 0; r < nRow; r++)
            sum += x[r*xStride]*column_[r];
          y[yStride*c] += alpha*sum;
        }
      }
      else
      {
        for(c = 0; c < nColumn; c++)
        {
          real sum = 0;
          real *column_ = m+mStride*c;
          for(r = 0; r < nRow; r++)
            sum += x[r*xStride]*column_[r];
          y[yStride*c] = beta*y[yStride*c] + alpha*sum;
        }
      }
    }
    else
    {
      if(beta != 1)
        THBlas_scale(nRow, beta, y, yStride);
      
      for(c = 0; c < nColumn; c++)
      {
        real *column_ = m+mStride*c;
        real z = alpha*x[c*xStride];
        for(r = 0; r < nRow; r++)
          y[yStride*r] += z*column_[r];
      }
    }
  }
}
Exemplo n.º 8
0
int main(int argc, char const *argv[]) {
	if (argc < 4) {
		printf("Not enough arguments\n");
		return -1;
	}

	int max_num_thread = atoi(argv[1]);
	int max_iter = atoi(argv[2]);
	int test_method = atoi(argv[3]);

	openblas_set_num_threads(max_num_thread);
	omp_set_num_threads(max_num_thread);

	int m = 1024;
	int n = 1024;
	float *A = new float[m * n];
	for (int i = 0; i < m * n; i++) {
    	A[i] = rand() / RAND_MAX;
	}
	float *b = new float[n];
	for (int i = 0; i < n; i++) {
    	b[i] = rand() / RAND_MAX;
	}
	float *Ab = new float[m];

	switch (test_method) {
		case 0: {
			printf("Runing Matrix-Vector Multiplication by OpenMP (%d threads)\n", omp_get_max_threads());
			double begTime = CycleTimer::currentSeconds();
			for (int iter = 0; iter < max_iter; ++iter) {
				#pragma omp parallel for
				for (int i=0; i<m; ++i) {
					for (int j=0; j<n; ++j) {
						Ab[i] += A[i*n+j] * b[j];
					}
				}
			}
			double endTime = CycleTimer::currentSeconds();
			printf("%f\n", (endTime - begTime) / float(max_iter));
			break;
		}
		case 1: {
			double begTime = CycleTimer::currentSeconds();
			printf("Runing Matrix-Vector Multiplication by OpenBlas (%d threads)\n", omp_get_max_threads());
			for (int iter = 0; iter < max_iter; ++iter) {
				cblas_sgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, A, n, b, 1, 1.0, Ab, 1);
			}
			double endTime = CycleTimer::currentSeconds();
			printf("%f\n", (endTime - begTime) / float(max_iter));
			break;
		}
		case 2: {
			int block_size = (m + max_num_thread - 1)/ max_num_thread;			
			double begTime = CycleTimer::currentSeconds();
			printf("Runing Matrix-Vector Multiplication by OpenMP (%d threads) with OpenBlas\n", omp_get_max_threads());
			for (int iter = 0; iter < max_iter; ++iter) {
				#pragma omp parallel for
				for (int i = 0; i < max_num_thread; ++i) {
					int actual_size = std::min(block_size, m-i*block_size);
					cblas_sgemv(CblasRowMajor, CblasNoTrans, actual_size, n, 1.0, A+i*block_size*n, n, b, 1, 1.0, Ab+i*block_size, 1);
				}
			}
			double endTime = CycleTimer::currentSeconds();
			printf("%f\n", (endTime - begTime) / float(max_iter));
			break;
		}
		default:
			printf("No matched test method\n");
			break;
	}

	delete [] A;
	delete [] b;
	delete [] Ab;

	return 0;
}
Exemplo n.º 9
0
    void Projector::process(const float* inputs, float* outputs)
	{
		cblas_sgemv(CblasRowMajor, CblasNoTrans, m_number_of_channels, m_number_of_harmonics, 1.f, m_projector_matrix_float, m_number_of_harmonics, inputs, 1, 0.f, outputs, 1);
	}
Exemplo n.º 10
0
inline void gemv( const Order, const Trans, const int m, const int n,
        const float alpha, const float* a, const int lda, const float* x,
        const int incx, const float beta, float* y, const int incy ) {
    cblas_sgemv( cblas_option< Order >::value, cblas_option< Trans >::value,
            m, n, alpha, a, lda, x, incx, beta, y, incy );
}