int P_zunmqr( const char *side, const char *trans, int M, int N, int K, void *A, int LDA, void *T, void *B, int LDB ) { PLASMA_enum s, t; int info; if (*side == 'L') { s = PlasmaLeft; } else { s = PlasmaRight; } if (*trans == 'C') { t = PlasmaConjTrans; } else { t = PlasmaNoTrans; } #if CHECK_VERSION_BEQ(2,4,5) info = PLASMA_zunmqr(s, t, M, N, K, A, LDA, T, B, LDB); #else PLASMA_desc *descT; int NB, IB; int MT, NT; /* Get autotuned or set tile size; T matrix allocated with R */ PLASMA_Alloc_Workspace_zgeqrf(1, 1, &descT); PLASMA_Get(PLASMA_TILE_SIZE, &NB); PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB); PLASMA_Dealloc_Handle_Tile(&descT); MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); // possibly allocate space for descT in R and keep it in qr object instead info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); info = PLASMA_zunmqr(s, t, M, N, K, A, LDA, descT, B, LDB); PLASMA_Desc_Destroy(&descT); #endif return(info); }
int P_zgeqrf( int M, int N, void *A, void *T ) { int info; #if CHECK_VERSION_BEQ(2,4,5) info = PLASMA_zgeqrf(M, N, A, M, T); #else PLASMA_desc *descT; int NB, IB; int MT, NT; /* Get autotuned or set tile size; T matrix allocated with R */ PLASMA_Alloc_Workspace_zgeqrf(1, 1, &descT); PLASMA_Get(PLASMA_TILE_SIZE, &NB); PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB); PLASMA_Dealloc_Handle_Tile(&descT); MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); // possibly allocate space for descT in R and keep it in qr object instead info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); info = PLASMA_zgeqrf(M, N, A, M, descT); PLASMA_Desc_Destroy(&descT); #endif return(info); }
int main () { int cores = 2; int M = 10; int N = 15; int LDA = 10; int NRHS = 5; int LDB = 15; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_zgeqrf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for (i = 0; i < M; i++) for (j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_zgelqf(M, N, A2, LDA, T); /* Solve the problem */ info = PLASMA_zgelqs(M, N, NRHS, A2, LDA, T, B2, LDB); /* Check the solution */ info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in ZGELQS example ! \n"); else printf("-- Run of ZGELQS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(T); PLASMA_Finalize(); exit(0); }
void PLASMA_ALLOC_WORKSPACE_ZGEQRF(int *M, int *N, PLASMA_Complex64_t **T, int *INFO) { *INFO = PLASMA_Alloc_Workspace_zgeqrf(*M, *N, T); }
int main () { int cores = 2; int M = 15; int N = 10; int LDA = 15; int K = min(M, N); int info; int info_ortho, info_factorization; int i,j; int LDAxN = LDA*N; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Q = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_desc *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!Q)){ printf("Out of Memory \n "); return EXIT_SUCCESS; } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_zgeqrf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_zgeqrf(M, N, A2, LDA, T); /* Building the economy-size Q */ memset((void*)Q, 0, LDA*N*sizeof(PLASMA_Complex64_t)); for (i = 0; i < K; i++) Q[LDA*i+i] = 1.0; PLASMA_zungqr(M, N, K, A2, LDA, T, Q, LDA); /* Check the orthogonality, factorization and the solution */ info_ortho = check_orthogonality(M, N, LDA, Q); info_factorization = check_factorization(M, N, A1, A2, LDA, Q); printf("--- info %d %d %d \n",info_factorization,info_ortho,info); if ((info_ortho != 0)|(info_factorization != 0)|(info != 0)) printf("-- Error in ZGEQRF example ! \n"); else printf("-- Run of ZGEQRF example successful ! \n"); free(A1); free(A2); free(Q); free(T); PLASMA_Finalize(); return EXIT_SUCCESS; }