コード例 #1
0
ファイル: cholesky.c プロジェクト: NodLabs/OpenBLAS
int main(int argc, char *argv[]){

#ifndef COMPLEX
  char *trans[] = {"T", "N"};
#else
  char *trans[] = {"C", "N"};
#endif
  char *uplo[]  = {"U", "L"};
  FLOAT alpha[] = {1.0, 0.0};
  FLOAT beta [] = {0.0, 0.0};

  FLOAT *a, *b;

  blasint m, i, j, info, uplos;

  int from =   1;
  int to   = 200;
  int step =   1;

  FLOAT maxerr;

  struct timeval start, stop;
  double time1;

  argc--;argv++;

  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}

  fprintf(stderr, "From : %3d  To : %3d Step = %3d\n", from, to, step);

  if (( a    = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  if (( b    = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  for(m = from; m <= to; m += step){

    fprintf(stderr, "M = %6d : ", (int)m);

    for (uplos = 0; uplos < 2; uplos ++) {

#ifndef COMPLEX
      if (uplos & 1) {
	for (j = 0; j < m; j++) {
	  for(i = 0; i < j; i++)     a[i + j * m] = 0.;
	                             a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
	  for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
	}
      } else {
	for (j = 0; j < m; j++) {
	  for(i = 0; i < j; i++)     a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
	                             a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
	  for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
	}
      }
#else
      if (uplos & 1) {
	for (j = 0; j < m; j++) {
	  for(i = 0; i < j; i++) {
	    a[(i + j * m) * 2 + 0] = 0.;
	    a[(i + j * m) * 2 + 1] = 0.;
	  }

	  a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
	  a[(j + j * m) * 2 + 1] = 0.;

	  for(i = j + 1; i < m; i++) {
	    a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
	    a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
	  }
	}
      } else {
	for (j = 0; j < m; j++) {
	  for(i = 0; i < j; i++) {
	    a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
	    a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
	  }

	  a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
	  a[(j + j * m) * 2 + 1] = 0.;

	  for(i = j + 1; i < m; i++) {
	    a[(i + j * m) * 2 + 0] = 0.;
	    a[(i + j * m) * 2 + 1] = 0.;
	  }
	}
      }
#endif

      SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);

      gettimeofday( &start, (struct timezone *)0);

      POTRF(uplo[uplos], &m, b, &m, &info);

      gettimeofday( &stop, (struct timezone *)0);

      if (info != 0) {
	fprintf(stderr, "Info = %d\n", info);
	exit(1);
      }

     time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

      maxerr = 0.;

      if (!(uplos & 1)) {
	for (j = 0; j < m; j++) {
	  for(i = 0; i <= j; i++) {
#ifndef COMPLEX
	    if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
#else
	    if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
	    if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
#endif
	  }
	}
      } else {
	for (j = 0; j < m; j++) {
	  for(i = j; i < m; i++) {
#ifndef COMPLEX
	    if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
#else
	    if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
	    if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
#endif
	  }
	}
      }

      fprintf(stderr,
#ifdef XDOUBLE
	      "  %Le  %10.3f MFlops", maxerr,
#else
	      "  %e  %10.3f MFlops", maxerr,
#endif
	      getmflops(COMPSIZE * COMPSIZE, m, time1));

      if (maxerr > 1.e-3) {
	fprintf(stderr, "Hmm, probably it has bug.\n");
	exit(1);
      }

    }
    fprintf(stderr, "\n");

  }

  return 0;
}
コード例 #2
0
ファイル: syrk.c プロジェクト: 4ker/OpenBLAS
int main(int argc, char *argv[]){

  FLOAT *a, *c;
  FLOAT alpha[] = {1.0, 1.0};
  FLOAT beta [] = {1.0, 1.0};
  char *p;

  char uplo='U';
  char trans='N';

  if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
  if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

  blasint m, i, j;

  int from =   1;
  int to   = 200;
  int step =   1;

  struct timeval start, stop;
  double time1;

  argc--;argv++;

  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}

  fprintf(stderr, "From : %3d  To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);


  if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }



#ifdef linux
  srandom(getpid());
#endif

  fprintf(stderr, "   SIZE       Flops\n");

  for(m = from; m <= to; m += step)
  {

    fprintf(stderr, " %6d : ", (int)m);

    for(j = 0; j < m; j++){
      for(i = 0; i < m * COMPSIZE; i++){
	a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
	c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
      }
    }

    gettimeofday( &start, (struct timezone *)0);

    SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );

    gettimeofday( &stop, (struct timezone *)0);

    time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

    gettimeofday( &start, (struct timezone *)0);

    fprintf(stderr,
	    " %10.2f MFlops\n",
	    COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

  }

  return 0;
}
コード例 #3
0
void l2ls_learn_basis_dual(DOUBLE *Dopt, DOUBLE *Dorig, DOUBLE *X, DOUBLE *S, DOUBLE l2norm, INT length, INT N, INT K, INT numSamples) {

	DOUBLE *SSt = (DOUBLE *) MALLOC(K * K * sizeof(DOUBLE));
	
	CHAR uplo = 'U';
	CHAR trans = 'N';
	INT SYRKN = K;
	INT SYRKK = numSamples;
	DOUBLE alpha = 1;
	INT SYRKLDA = K;
	DOUBLE beta = 0;
	INT SYRKLDC = K;
	
	SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, S, &SYRKLDA, &beta, SSt, &SYRKLDC);
	
	DOUBLE *XSt = (DOUBLE *) MALLOC(N * K * sizeof(DOUBLE));

	CHAR transa = 'N';
	CHAR transb = 'T';
	INT GEMMM = N;
	INT GEMMN = K;
	INT GEMMK = numSamples;
	alpha = 1;
	INT GEMMLDA = N;
	INT GEMMLDB = K;
	beta = 0;
	INT GEMMLDC = N;
	
	GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, X, &GEMMLDA, S, &GEMMLDB, &beta, XSt, &GEMMLDC);
	DOUBLE *SXt = (DOUBLE *) MALLOC(N * K * sizeof(DOUBLE));
	transpose(XSt, SXt, N, K);
	
	INT iterK;	
	DOUBLE *dualLambdaOrig = (DOUBLE *) MALLOC(K * sizeof(DOUBLE));
	if (Dorig == NULL) {
		srand(time(NULL));
		for (iterK = 0; iterK < K; ++iterK) {
			dualLambdaOrig[iterK] = 10 * (DOUBLE) rand() / (DOUBLE) RAND_MAX;
		}
	} else {
		
		INT maxNK = IMAX(N, K);
		DOUBLE *B = (DOUBLE *) MALLOC(maxNK * maxNK * sizeof(DOUBLE));
		for (iterK = 0; iterK < K; ++iterK) {
			datacpy(&B[iterK * maxNK], &XSt[iterK * N], K);
		}
		
		INT GELSYM = N;
		INT GELSYN = K;
		INT GELSYNRHS = K;
		INT GELSYLDA = N;
		INT GELSYLDB = maxNK;
		INT *jpvt = (INT *) MALLOC(K * sizeof(INT));
		DOUBLE rcond;
		INT rank;
		INT lwork = -1;
		DOUBLE work_temp;
		DOUBLE *work;
		INT INFO;

		GELSY(&GELSYM, &GELSYN, &GELSYNRHS, Dorig, &GELSYLDA, B, &GELSYLDB, jpvt, &rcond, &rank, &work_temp, &lwork, &INFO);
		
		lwork = (INT) work_temp;
		work = (DOUBLE*) MALLOC(lwork * sizeof(DOUBLE));

		
		GELSY(&GELSYM, &GELSYN, &GELSYNRHS, Dorig, &GELSYLDA, XSt, &GELSYLDB, jpvt, &rcond, &rank, work, &lwork, &INFO);

		for (iterK = 0; iterK < K; ++iterK) {
			dualLambdaOrig[K] = B[iterK * K + iterK] - SSt[iterK * K + iterK];
		}
		
		FREE(work);
		FREE(B);
		FREE(jpvt);
	}

	DOUBLE *SXtXSt = (DOUBLE *) MALLOC(K * K * sizeof(DOUBLE));
	
	uplo = 'U';
	trans = 'N';
	SYRKN = K;
	SYRKK = N;
	alpha = 1;
	SYRKLDA = K;
	beta = 0;
	SYRKLDC = K;
	
	SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, SXt, &SYRKLDA, &beta, SXtXSt, &SYRKLDC);

	DOUBLE c = SQR(l2norm);

	CHAR norm = 'F';
	INT LANGEM = N;
	INT LANGEN = numSamples;
	INT LANGELDA = N;
	
	DOUBLE trXXt = LANGE(&norm, &LANGEM, &LANGEN, X, &LANGELDA, NULL);
	trXXt = SQR(trXXt);
	
/*
	DOUBLE *dualLambdaOpt = (DOUBLE *) MALLOC(K * sizeof(DOUBLE));
*/
	DOUBLE *dualLambdaOpt = XSt;
	minimize_dual(dualLambdaOpt, dualLambdaOrig, length, SSt, SXt, SXtXSt, trXXt, c, N, K);

	for (iterK = 0; iterK < K; ++iterK) {
		SSt[iterK * K + iterK] += dualLambdaOpt[iterK];
	}

	uplo = 'U';
	INT POTRSN = K;
	INT POTRSLDA = K;
	INT INFO;
	POTRF(&uplo, &POTRSN, SSt, &POTRSLDA, &INFO);
	
	INT POTRSNRHS = N;
	INT POTRSLDB = K;
	
	POTRS(&uplo, &POTRSN, &POTRSNRHS, SSt, &POTRSLDA, SXt, &POTRSLDB, &INFO);

	transpose(SXt, Dopt, K, N);
	
	FREE(SSt);
	FREE(XSt);
	FREE(SXt);
	FREE(dualLambdaOrig);
	FREE(SXtXSt);
}
コード例 #4
0
ファイル: potrf.c プロジェクト: NodLabs/OpenBLAS
int main(int argc, char *argv[]){

#ifndef COMPLEX
  char *trans[] = {"T", "N"};
#else
  char *trans[] = {"C", "N"};
#endif
  char *uplo[]  = {"U", "L"};
  FLOAT alpha[] = {1.0, 0.0};
  FLOAT beta [] = {0.0, 0.0};

  FLOAT *a, *b;

  char *p;
  char btest = 'F';

  blasint m, i, j, info, uplos=0;
  double flops;

  int from =   1;
  int to   = 200;
  int step =   1;

  struct timeval start, stop;
  double time1;

  argc--;argv++;

  if (argc > 0) { from     = atol(*argv);		argc--; argv++;}
  if (argc > 0) { to       = MAX(atol(*argv), from);	argc--; argv++;}
  if (argc > 0) { step     = atol(*argv);		argc--; argv++;}

  if ((p = getenv("OPENBLAS_UPLO")))
	if (*p == 'L') uplos=1;

  if ((p = getenv("OPENBLAS_TEST"))) btest=*p;

  fprintf(stderr, "From : %3d  To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]);

  if (( a    = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  if (( b    = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  for(m = from; m <= to; m += step){

#ifndef COMPLEX
      if (uplos & 1) {
	for (j = 0; j < m; j++) {
	  for(i = 0; i < j; i++)     a[i + j * m] = 0.;
	                             a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
	  for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
	}
      } else {
	for (j = 0; j < m; j++) {
	  for(i = 0; i < j; i++)     a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
	                             a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
	  for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
	}
      }
#else
      if (uplos & 1) {
	for (j = 0; j < m; j++) {
	  for(i = 0; i < j; i++) {
	    a[(i + j * m) * 2 + 0] = 0.;
	    a[(i + j * m) * 2 + 1] = 0.;
	  }

	  a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
	  a[(j + j * m) * 2 + 1] = 0.;

	  for(i = j + 1; i < m; i++) {
	    a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
	    a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
	  }
	}
      } else {
	for (j = 0; j < m; j++) {
	  for(i = 0; i < j; i++) {
	    a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
	    a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
	  }

	  a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
	  a[(j + j * m) * 2 + 1] = 0.;

	  for(i = j + 1; i < m; i++) {
	    a[(i + j * m) * 2 + 0] = 0.;
	    a[(i + j * m) * 2 + 1] = 0.;
	  }
	}
      }
#endif

      SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);

      gettimeofday( &start, (struct timezone *)0);

      POTRF(uplo[uplos], &m, b, &m, &info);

      gettimeofday( &stop, (struct timezone *)0);

      if (info != 0) {
	fprintf(stderr, "Potrf info = %d\n", info);
	exit(1);
      }

      time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
      flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;

      if ( btest == 'S' )
      {
	
 	for(j = 0; j < to; j++){
      		for(i = 0; i < to * COMPSIZE; i++){
        		a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
      		}
    	}

      	gettimeofday( &start, (struct timezone *)0);

      	POTRS(uplo[uplos], &m, &m, b, &m, a, &m,  &info);

      	gettimeofday( &stop, (struct timezone *)0);

      	if (info != 0) {
		fprintf(stderr, "Potrs info = %d\n", info);
		exit(1);
        }
        time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
        flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;

      }
	
      if ( btest == 'I' )
      {
	
      	gettimeofday( &start, (struct timezone *)0);

      	POTRI(uplo[uplos], &m, b, &m, &info);

      	gettimeofday( &stop, (struct timezone *)0);

      	if (info != 0) {
		fprintf(stderr, "Potri info = %d\n", info);
		exit(1);
        }

        time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
        flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
      }
	
      fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest);


  }


  return 0;
}
コード例 #5
0
void nuclear_psd_hard_thresholding(DOUBLE *X, DOUBLE *norm, INT rank, INT M, DOUBLE *eigv, \
		DOUBLE *eigvec, DOUBLE *work, INT lwork) {

	CHAR jobz = 'V';
	CHAR uplo = 'U';
	INT SYEVN = M;
	INT SYEVLDA = M;
	INT info;

	if (lwork == - 1) {
		SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, work, &lwork, &info);
		return;
	}

	INT eigvFlag = 0;
	if (eigv == NULL) {
		eigv = (DOUBLE *) MALLOC(M * 1 * sizeof(DOUBLE));
		eigvFlag = 1;
	}

	INT eigvecFlag = 0;
	if (eigvec == NULL) {
		eigvec = (DOUBLE *) MALLOC(M * M * sizeof(DOUBLE));
		eigvecFlag = 1;
	}

	datacpy(eigvec, X, M * M);
	INT workFlag = 0;
	if (lwork == 0) {
		DOUBLE workTemp;
		lwork = -1;
		SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, &workTemp, &lwork, &info);
		if (info != 0) {
			PRINTF("Error, INFO = %d. ", info);
			ERROR("LAPACK error.");
		}

		lwork = (INT) workTemp;
		work = (DOUBLE *) MALLOC(lwork * 1 * sizeof(DOUBLE));
		workFlag = 1;
	}

	// TODO: Perhaps replace with SYEVR?
	SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, work, &lwork, &info);
	if (info != 0) {
		PRINTF("Error, INFO = %d. ", info);
		ERROR("LAPACK error.");
	}

	INT iterM;
	DOUBLE normtemp = 0;
	DOUBLE alpha;
	INT SCALN = M;
	INT incx = 1;
	for (iterM = 0; iterM < M; ++iterM) {
		if ((eigv[iterM] < 0) || (iterM < M - rank)){
			eigv[iterM] = 0;
		} else {
			normtemp += eigv[iterM];
			alpha = SQRT(eigv[iterM]);
			SCAL(&SCALN, &alpha, &eigvec[iterM * M], &incx);
		}
	}
	if (norm != NULL) {
		*norm = normtemp;
	}

	uplo = 'U';
	CHAR trans = 'N';
	INT SYRKN = M;
	INT SYRKK = rank;
	alpha = 1;
	INT SYRKLDA = M;
	DOUBLE beta = 0;
	INT SYRKLDC = M;
	SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, &eigvec[(M - rank) * M], &SYRKLDA, &beta, X, &SYRKLDC);

/* 	NOTE: alternative 1, somewhat slower than version above.
	INT iterM;
	DOUBLE normtemp = 0;
	memset((void *) X, 0, M * M * sizeof(DOUBLE));
	uplo = 'U';
	INT SYRN = M;
	DOUBLE alpha;
	INT SYRLDA = M;
	INT incx = 1;
	for (iterM = 0; iterM < M; ++iterM) {
		eigv[iterM] = eigv[iterM] - tau;
		if (eigv[iterM] < 0) {
			eigv[iterM] = 0;
		} else {
			normtemp += eigv[iterM];
			alpha = eigv[iterM];
			SYR(&uplo, &SYRN, &alpha, &eigvec[iterM * M], &incx, X, &SYRLDA);
		}
	}
	*norm = normtemp;
 */

	INT iterN;
	for (iterM = 0; iterM < M; ++iterM) {
		for (iterN = iterM + 1; iterN < M; ++iterN) {
			X[iterM * M + iterN] = X[iterN * M + iterM];
		}
	}

	if (eigvFlag == 1) {
		FREE(eigv);
	}

	if (eigvecFlag == 1) {
		FREE(eigvec);
	}

	if (workFlag == 1) {
		FREE(work);
	}
}