int main(int argc, char *argv[]){ #ifndef COMPLEX char *trans[] = {"T", "N"}; #else char *trans[] = {"C", "N"}; #endif char *uplo[] = {"U", "L"}; FLOAT alpha[] = {1.0, 0.0}; FLOAT beta [] = {0.0, 0.0}; FLOAT *a, *b; blasint m, i, j, info, uplos; int from = 1; int to = 200; int step = 1; FLOAT maxerr; struct timeval start, stop; double time1; argc--;argv++; if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;} fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } for(m = from; m <= to; m += step){ fprintf(stderr, "M = %6d : ", (int)m); for (uplos = 0; uplos < 2; uplos ++) { #ifndef COMPLEX if (uplos & 1) { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) a[i + j * m] = 0.; a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.; for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5; } } else { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5; a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.; for(i = j + 1; i < m; i++) a[i + j * m] = 0.; } } #else if (uplos & 1) { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) { a[(i + j * m) * 2 + 0] = 0.; a[(i + j * m) * 2 + 1] = 0.; } a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; a[(j + j * m) * 2 + 1] = 0.; for(i = j + 1; i < m; i++) { a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; } } } else { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) { a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; } a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; a[(j + j * m) * 2 + 1] = 0.; for(i = j + 1; i < m; i++) { a[(i + j * m) * 2 + 0] = 0.; a[(i + j * m) * 2 + 1] = 0.; } } } #endif SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); gettimeofday( &start, (struct timezone *)0); POTRF(uplo[uplos], &m, b, &m, &info); gettimeofday( &stop, (struct timezone *)0); if (info != 0) { fprintf(stderr, "Info = %d\n", info); exit(1); } time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; maxerr = 0.; if (!(uplos & 1)) { for (j = 0; j < m; j++) { for(i = 0; i <= j; i++) { #ifndef COMPLEX if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]); #else if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]); if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]); #endif } } } else { for (j = 0; j < m; j++) { for(i = j; i < m; i++) { #ifndef COMPLEX if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]); #else if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]); if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]); #endif } } } fprintf(stderr, #ifdef XDOUBLE " %Le %10.3f MFlops", maxerr, #else " %e %10.3f MFlops", maxerr, #endif getmflops(COMPSIZE * COMPSIZE, m, time1)); if (maxerr > 1.e-3) { fprintf(stderr, "Hmm, probably it has bug.\n"); exit(1); } } fprintf(stderr, "\n"); } return 0; }
int main(int argc, char *argv[]){ FLOAT *a, *c; FLOAT alpha[] = {1.0, 1.0}; FLOAT beta [] = {1.0, 1.0}; char *p; char uplo='U'; char trans='N'; if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; blasint m, i, j; int from = 1; int to = 200; int step = 1; struct timeval start, stop; double time1; argc--;argv++; if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;} fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } #ifdef linux srandom(getpid()); #endif fprintf(stderr, " SIZE Flops\n"); for(m = from; m <= to; m += step) { fprintf(stderr, " %6d : ", (int)m); for(j = 0; j < m; j++){ for(i = 0; i < m * COMPSIZE; i++){ a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; } } gettimeofday( &start, (struct timezone *)0); SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); gettimeofday( &stop, (struct timezone *)0); time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; gettimeofday( &start, (struct timezone *)0); fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); } return 0; }
void l2ls_learn_basis_dual(DOUBLE *Dopt, DOUBLE *Dorig, DOUBLE *X, DOUBLE *S, DOUBLE l2norm, INT length, INT N, INT K, INT numSamples) { DOUBLE *SSt = (DOUBLE *) MALLOC(K * K * sizeof(DOUBLE)); CHAR uplo = 'U'; CHAR trans = 'N'; INT SYRKN = K; INT SYRKK = numSamples; DOUBLE alpha = 1; INT SYRKLDA = K; DOUBLE beta = 0; INT SYRKLDC = K; SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, S, &SYRKLDA, &beta, SSt, &SYRKLDC); DOUBLE *XSt = (DOUBLE *) MALLOC(N * K * sizeof(DOUBLE)); CHAR transa = 'N'; CHAR transb = 'T'; INT GEMMM = N; INT GEMMN = K; INT GEMMK = numSamples; alpha = 1; INT GEMMLDA = N; INT GEMMLDB = K; beta = 0; INT GEMMLDC = N; GEMM(&transa, &transb, &GEMMM, &GEMMN, &GEMMK, &alpha, X, &GEMMLDA, S, &GEMMLDB, &beta, XSt, &GEMMLDC); DOUBLE *SXt = (DOUBLE *) MALLOC(N * K * sizeof(DOUBLE)); transpose(XSt, SXt, N, K); INT iterK; DOUBLE *dualLambdaOrig = (DOUBLE *) MALLOC(K * sizeof(DOUBLE)); if (Dorig == NULL) { srand(time(NULL)); for (iterK = 0; iterK < K; ++iterK) { dualLambdaOrig[iterK] = 10 * (DOUBLE) rand() / (DOUBLE) RAND_MAX; } } else { INT maxNK = IMAX(N, K); DOUBLE *B = (DOUBLE *) MALLOC(maxNK * maxNK * sizeof(DOUBLE)); for (iterK = 0; iterK < K; ++iterK) { datacpy(&B[iterK * maxNK], &XSt[iterK * N], K); } INT GELSYM = N; INT GELSYN = K; INT GELSYNRHS = K; INT GELSYLDA = N; INT GELSYLDB = maxNK; INT *jpvt = (INT *) MALLOC(K * sizeof(INT)); DOUBLE rcond; INT rank; INT lwork = -1; DOUBLE work_temp; DOUBLE *work; INT INFO; GELSY(&GELSYM, &GELSYN, &GELSYNRHS, Dorig, &GELSYLDA, B, &GELSYLDB, jpvt, &rcond, &rank, &work_temp, &lwork, &INFO); lwork = (INT) work_temp; work = (DOUBLE*) MALLOC(lwork * sizeof(DOUBLE)); GELSY(&GELSYM, &GELSYN, &GELSYNRHS, Dorig, &GELSYLDA, XSt, &GELSYLDB, jpvt, &rcond, &rank, work, &lwork, &INFO); for (iterK = 0; iterK < K; ++iterK) { dualLambdaOrig[K] = B[iterK * K + iterK] - SSt[iterK * K + iterK]; } FREE(work); FREE(B); FREE(jpvt); } DOUBLE *SXtXSt = (DOUBLE *) MALLOC(K * K * sizeof(DOUBLE)); uplo = 'U'; trans = 'N'; SYRKN = K; SYRKK = N; alpha = 1; SYRKLDA = K; beta = 0; SYRKLDC = K; SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, SXt, &SYRKLDA, &beta, SXtXSt, &SYRKLDC); DOUBLE c = SQR(l2norm); CHAR norm = 'F'; INT LANGEM = N; INT LANGEN = numSamples; INT LANGELDA = N; DOUBLE trXXt = LANGE(&norm, &LANGEM, &LANGEN, X, &LANGELDA, NULL); trXXt = SQR(trXXt); /* DOUBLE *dualLambdaOpt = (DOUBLE *) MALLOC(K * sizeof(DOUBLE)); */ DOUBLE *dualLambdaOpt = XSt; minimize_dual(dualLambdaOpt, dualLambdaOrig, length, SSt, SXt, SXtXSt, trXXt, c, N, K); for (iterK = 0; iterK < K; ++iterK) { SSt[iterK * K + iterK] += dualLambdaOpt[iterK]; } uplo = 'U'; INT POTRSN = K; INT POTRSLDA = K; INT INFO; POTRF(&uplo, &POTRSN, SSt, &POTRSLDA, &INFO); INT POTRSNRHS = N; INT POTRSLDB = K; POTRS(&uplo, &POTRSN, &POTRSNRHS, SSt, &POTRSLDA, SXt, &POTRSLDB, &INFO); transpose(SXt, Dopt, K, N); FREE(SSt); FREE(XSt); FREE(SXt); FREE(dualLambdaOrig); FREE(SXtXSt); }
int main(int argc, char *argv[]){ #ifndef COMPLEX char *trans[] = {"T", "N"}; #else char *trans[] = {"C", "N"}; #endif char *uplo[] = {"U", "L"}; FLOAT alpha[] = {1.0, 0.0}; FLOAT beta [] = {0.0, 0.0}; FLOAT *a, *b; char *p; char btest = 'F'; blasint m, i, j, info, uplos=0; double flops; int from = 1; int to = 200; int step = 1; struct timeval start, stop; double time1; argc--;argv++; if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} if (argc > 0) { step = atol(*argv); argc--; argv++;} if ((p = getenv("OPENBLAS_UPLO"))) if (*p == 'L') uplos=1; if ((p = getenv("OPENBLAS_TEST"))) btest=*p; fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]); if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ fprintf(stderr,"Out of Memory!!\n");exit(1); } for(m = from; m <= to; m += step){ #ifndef COMPLEX if (uplos & 1) { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) a[i + j * m] = 0.; a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.; for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5; } } else { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5; a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.; for(i = j + 1; i < m; i++) a[i + j * m] = 0.; } } #else if (uplos & 1) { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) { a[(i + j * m) * 2 + 0] = 0.; a[(i + j * m) * 2 + 1] = 0.; } a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; a[(j + j * m) * 2 + 1] = 0.; for(i = j + 1; i < m; i++) { a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; } } } else { for (j = 0; j < m; j++) { for(i = 0; i < j; i++) { a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; } a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; a[(j + j * m) * 2 + 1] = 0.; for(i = j + 1; i < m; i++) { a[(i + j * m) * 2 + 0] = 0.; a[(i + j * m) * 2 + 1] = 0.; } } } #endif SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); gettimeofday( &start, (struct timezone *)0); POTRF(uplo[uplos], &m, b, &m, &info); gettimeofday( &stop, (struct timezone *)0); if (info != 0) { fprintf(stderr, "Potrf info = %d\n", info); exit(1); } time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6; if ( btest == 'S' ) { for(j = 0; j < to; j++){ for(i = 0; i < to * COMPSIZE; i++){ a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; } } gettimeofday( &start, (struct timezone *)0); POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info); gettimeofday( &stop, (struct timezone *)0); if (info != 0) { fprintf(stderr, "Potrs info = %d\n", info); exit(1); } time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6; } if ( btest == 'I' ) { gettimeofday( &start, (struct timezone *)0); POTRI(uplo[uplos], &m, b, &m, &info); gettimeofday( &stop, (struct timezone *)0); if (info != 0) { fprintf(stderr, "Potri info = %d\n", info); exit(1); } time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6; } fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest); } return 0; }
void nuclear_psd_hard_thresholding(DOUBLE *X, DOUBLE *norm, INT rank, INT M, DOUBLE *eigv, \ DOUBLE *eigvec, DOUBLE *work, INT lwork) { CHAR jobz = 'V'; CHAR uplo = 'U'; INT SYEVN = M; INT SYEVLDA = M; INT info; if (lwork == - 1) { SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, work, &lwork, &info); return; } INT eigvFlag = 0; if (eigv == NULL) { eigv = (DOUBLE *) MALLOC(M * 1 * sizeof(DOUBLE)); eigvFlag = 1; } INT eigvecFlag = 0; if (eigvec == NULL) { eigvec = (DOUBLE *) MALLOC(M * M * sizeof(DOUBLE)); eigvecFlag = 1; } datacpy(eigvec, X, M * M); INT workFlag = 0; if (lwork == 0) { DOUBLE workTemp; lwork = -1; SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, &workTemp, &lwork, &info); if (info != 0) { PRINTF("Error, INFO = %d. ", info); ERROR("LAPACK error."); } lwork = (INT) workTemp; work = (DOUBLE *) MALLOC(lwork * 1 * sizeof(DOUBLE)); workFlag = 1; } // TODO: Perhaps replace with SYEVR? SYEV(&jobz, &uplo, &SYEVN, eigvec, &SYEVLDA, eigv, work, &lwork, &info); if (info != 0) { PRINTF("Error, INFO = %d. ", info); ERROR("LAPACK error."); } INT iterM; DOUBLE normtemp = 0; DOUBLE alpha; INT SCALN = M; INT incx = 1; for (iterM = 0; iterM < M; ++iterM) { if ((eigv[iterM] < 0) || (iterM < M - rank)){ eigv[iterM] = 0; } else { normtemp += eigv[iterM]; alpha = SQRT(eigv[iterM]); SCAL(&SCALN, &alpha, &eigvec[iterM * M], &incx); } } if (norm != NULL) { *norm = normtemp; } uplo = 'U'; CHAR trans = 'N'; INT SYRKN = M; INT SYRKK = rank; alpha = 1; INT SYRKLDA = M; DOUBLE beta = 0; INT SYRKLDC = M; SYRK(&uplo, &trans, &SYRKN, &SYRKK, &alpha, &eigvec[(M - rank) * M], &SYRKLDA, &beta, X, &SYRKLDC); /* NOTE: alternative 1, somewhat slower than version above. INT iterM; DOUBLE normtemp = 0; memset((void *) X, 0, M * M * sizeof(DOUBLE)); uplo = 'U'; INT SYRN = M; DOUBLE alpha; INT SYRLDA = M; INT incx = 1; for (iterM = 0; iterM < M; ++iterM) { eigv[iterM] = eigv[iterM] - tau; if (eigv[iterM] < 0) { eigv[iterM] = 0; } else { normtemp += eigv[iterM]; alpha = eigv[iterM]; SYR(&uplo, &SYRN, &alpha, &eigvec[iterM * M], &incx, X, &SYRLDA); } } *norm = normtemp; */ INT iterN; for (iterM = 0; iterM < M; ++iterM) { for (iterN = iterM + 1; iterN < M; ++iterN) { X[iterM * M + iterN] = X[iterN * M + iterM]; } } if (eigvFlag == 1) { FREE(eigv); } if (eigvecFlag == 1) { FREE(eigvec); } if (workFlag == 1) { FREE(work); } }