Пример #1
0
Файл: blas.c Проект: ngjw/mlgp
void MLGP_GEMV(char transA, unsigned M, unsigned N, FLOAT a,
               FLOAT* A, unsigned LDA, FLOAT* X, unsigned incX, FLOAT b,
               FLOAT* Y, unsigned incY)
{
  #ifdef DOUBLE
  #define GEMV(...) dgemv_(__VA_ARGS__)
  #else
  #define GEMV(...) sgemv_(__VA_ARGS__)
  #endif
  return GEMV(&transA, &M, &N, &a, A, &LDA, X, &incX, &b, Y, &incY);
}
Пример #2
0
void Orthogonalize(OrthoContext* c, double* p, int numBases, double* orthonormalBases)
{
	memcpy(c->Pv->Data, p, c->Pv->Count * sizeof(double));
	memcpy(c->Bases->Data, orthonormalBases, numBases * c->Pv->Count * sizeof(double));
	c->Bases->RowCount = numBases;
	c->Dp->Count = numBases;

	int basisLen = c->Pv->Count;
	GEMV(1, c->Bases, c->Pv, 0, c->Dp);
	for (int i = 0, offset = 0; i < numBases; i++, offset += basisLen)
		AXPY2(-1 * c->Dp->Data[i], c->Bases->Data + offset, basisLen, c->Pv->Data);
	double mag = cblas_dnrm2(basisLen, c->Pv->Data, 1);
	cblas_dscal(basisLen, 1.0 / mag, c->Pv->Data, 1);
	memcpy(p, c->Pv->Data, basisLen * sizeof(double));
}
//=========================================================================
int EpetraExt_BlockDiagMatrix::ApplyInverse(const Epetra_MultiVector& X, Epetra_MultiVector& Y) const{
  int info;
  // Sanity Checks
  int NumVectors=X.NumVectors();  
  if(NumVectors!=Y.NumVectors())
    EPETRA_CHK_ERR(-1);
  if(!HasComputed_ && (ApplyMode_==AM_INVERT || ApplyMode_==AM_FACTOR))
    EPETRA_CHK_ERR(-2);
  
  //NTS: MultiVector's MyLength and [] Operators are  "points" level operators
  //not a "block/element" level operators.

  const int *vlist=DataMap_->FirstPointInElementList();
  const int *xlist=Map().FirstPointInElementList();
  const int *blocksize=Map().ElementSizeList();
  
  if(ApplyMode_==AM_MULTIPLY || ApplyMode_==AM_INVERT){
    // Multiply & Invert mode have the same apply
    int NumBlocks=NumMyBlocks();
    for(int i=0;i<NumBlocks;i++){
      int Nb=blocksize[i];
      int vidx0=vlist[i];
      int xidx0=xlist[i];
      for(int j=0;j<NumVectors;j++){	
	if(Nb==1) {
	  // Optimize for size = 1
	  Y[j][xidx0]=Values_[vidx0]*X[j][xidx0];
	}
	else if(Nb==2){
	  // Optimize for size = 2
	  Y[j][xidx0  ]=Values_[vidx0  ]*X[j][xidx0] + Values_[vidx0+2]*X[j][xidx0+1];
	  Y[j][xidx0+1]=Values_[vidx0+1]*X[j][xidx0] + Values_[vidx0+3]*X[j][xidx0+1];
	}
	else{
	  // "Large" Block - Use BLAS
	  //void 	GEMV (const char TRANS, const int M, const int N, const double ALPHA, const double *A, const int LDA, const double *X, const double BETA, double *Y, const int INCX=1, const int INCY=1) const 
	  GEMV('N',Nb,Nb,1.0,&Values_[vidx0],Nb,&X[j][xidx0],0.0,&Y[j][xidx0]);
	}
      }   
    }
  }
  else{
    // Factorization mode has a different apply
    int NumBlocks=NumMyBlocks();
    for(int i=0;i<NumBlocks;i++){
      int Nb=blocksize[i];
      int vidx0=vlist[i];
      int xidx0=xlist[i];      
      for(int j=0;j<NumVectors;j++){
	if(Nb==1) {
	  // Optimize for size = 1 - use the inverse
	  Y[j][xidx0]=Values_[vidx0]*X[j][xidx0];
	}
	else if(Nb==2){
	  // Optimize for size = 2 - use the inverse
	  Y[j][xidx0  ]=Values_[vidx0  ]*X[j][xidx0] + Values_[vidx0+2]*X[j][xidx0+1];
	  Y[j][xidx0+1]=Values_[vidx0+1]*X[j][xidx0] + Values_[vidx0+3]*X[j][xidx0+1];
	}
	else{
	  // "Large" Block - use LAPACK
	  //    void 	GETRS (const char TRANS, const int N, const int NRHS, const double *A, const int LDA, const int *IPIV, double *X, const int LDX, int *INFO) const 
	  for(int k=0;k<Nb;k++) Y[j][xidx0+k]=X[j][xidx0+k];
	  LAPACK.GETRS('N',Nb,1,&Values_[vidx0],Nb,&Pivots_[xidx0],&Y[j][xidx0],Nb,&info);
	  if(info) EPETRA_CHK_ERR(info);
	}
      }
    }    
  }  
  return 0;
}
Пример #4
0
int main(int argc, char *argv[]){

  FLOAT *a, *x, *y;
  FLOAT alpha[] = {1.0, 1.0};
  FLOAT beta [] = {1.0, 1.0};
  char trans='N';
  blasint m, i, j;
  blasint inc_x=1,inc_y=1;
  blasint n=0;
  int has_param_n = 0;
  int has_param_m = 0;
  int loops = 1;
  int l;
  char *p;

  int from =   1;
  int to   = 200;
  int step =   1;

  struct timeval start, stop;
  double time1,timeg;

  argc--;argv++;

  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}


  int tomax = to;

  if ((p = getenv("OPENBLAS_LOOPS")))  loops = atoi(p);
  if ((p = getenv("OPENBLAS_INCX")))   inc_x = atoi(p);
  if ((p = getenv("OPENBLAS_INCY")))   inc_y = atoi(p);
  if ((p = getenv("OPENBLAS_TRANS")))  trans=*p;
  if ((p = getenv("OPENBLAS_PARAM_N"))) {
	  n = atoi(p);
	  if ((n>0)) has_param_n = 1;
  	  if ( n > tomax ) tomax = n;
  }
  if ( has_param_n == 0 )
  	if ((p = getenv("OPENBLAS_PARAM_M"))) {
		  m = atoi(p);
		  if ((m>0)) has_param_m = 1;
  	  	  if ( m > tomax ) tomax = m;
  	}



  fprintf(stderr, "From : %3d  To : %3d Step = %3d Trans = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,trans,inc_x,inc_y,loops);

  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * tomax * tomax * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  if (( x = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_x) * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  if (( y = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_y) * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

#ifdef linux
  srandom(getpid());
#endif

  fprintf(stderr, "   SIZE       Flops\n");

  if (has_param_m == 0)
  {

  	for(m = from; m <= to; m += step)
  	{
   		timeg=0;
   		if ( has_param_n == 0 ) n = m;
   		fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
   		for(j = 0; j < m; j++){
      			for(i = 0; i < n * COMPSIZE; i++){
				a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
      			}
   		}

    		for (l=0; l<loops; l++)
    		{

   			for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
				x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
   			}

   			for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
				y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
   			}
    			gettimeofday( &start, (struct timezone *)0);
    			GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
    			gettimeofday( &stop, (struct timezone *)0);
    			time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
			timeg += time1;

    		}

    		timeg /= loops;

    		fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);

  	}
  }
  else
  {

  	for(n = from; n <= to; n += step)
  	{
   		timeg=0;
   		fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
   		for(j = 0; j < m; j++){
      			for(i = 0; i < n * COMPSIZE; i++){
				a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
      			}
   		}

    		for (l=0; l<loops; l++)
    		{

   			for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
				x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
   			}

   			for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
				y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
   			}
    			gettimeofday( &start, (struct timezone *)0);
    			GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
    			gettimeofday( &stop, (struct timezone *)0);
    			time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
			timeg += time1;

    		}

    		timeg /= loops;

    		fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);

  	}
  }

  return 0;
}