예제 #1
0
파일: P.c 프로젝트: nashp/HiPLARb
int P_zunmqr(
const char *side,
const char *trans,
int M,
int N,
int K,
void *A,
int LDA,
void *T,
void *B,
int LDB
) {
	PLASMA_enum s, t;
	int info;

	if (*side == 'L') {
		s = PlasmaLeft;
	} else {
		s = PlasmaRight;
	}

	if (*trans == 'C') {
		t = PlasmaConjTrans;
	} else {
		t = PlasmaNoTrans;
	}

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_zunmqr(s, t, M, N, K, A, LDA, T, B, LDB);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_zgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);

	info = PLASMA_zunmqr(s, t, M, N, K, A, LDA, descT, B, LDB);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);
}
예제 #2
0
파일: P.c 프로젝트: nashp/HiPLARb
int P_zgeqrf(
int M,
int N,
void *A,
void *T
) {
	int info;

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_zgeqrf(M, N, A, M, T);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_zgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);

	info = PLASMA_zgeqrf(M, N, A, M, descT);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);

}
예제 #3
0
int main ()
{

    int cores = 2;
    int M     = 10;
    int N     = 15;
    int LDA   = 10;
    int NRHS  = 5;
    int LDB   = 15;

    int info;
    int info_solution;
    int i,j;
    int LDAxN = LDA*N;
    int LDBxNRHS = LDB*NRHS;

    PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *T;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Plasma Initialization */
    PLASMA_Init(cores);
    printf("-- PLASMA is initialized to run on %d cores. \n",cores);

    /* Allocate T */
    PLASMA_Alloc_Workspace_zgeqrf(M, N, &T);

    /* Initialize A1 and A2 */
    LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1);
    for (i = 0; i < M; i++)
        for (j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i] ;

    /* Initialize B1 and B2 */
    LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for (i = 0; i < M; i++)
        for (j = 0; j < NRHS; j++)
             B2[LDB*j+i] = B1[LDB*j+i] ;

    /* Factorization QR of the matrix A2 */
    info = PLASMA_zgelqf(M, N, A2, LDA, T);

    /* Solve the problem */
    info = PLASMA_zgelqs(M, N, NRHS, A2, LDA, T, B2, LDB);

    /* Check the solution */
    info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB);

    if ((info_solution != 0)|(info != 0))
       printf("-- Error in ZGELQS example ! \n");
    else
       printf("-- Run of ZGELQS example successful ! \n");

    free(A1); free(A2); free(B1); free(B2); free(T);

    PLASMA_Finalize();

    exit(0);
}
예제 #4
0
void PLASMA_ALLOC_WORKSPACE_ZGEQRF(int *M, int *N, PLASMA_Complex64_t **T, int *INFO)
{   *INFO = PLASMA_Alloc_Workspace_zgeqrf(*M, *N, T); }
예제 #5
0
int main ()
{

    int cores = 2;
    int M     = 15;
    int N     = 10;
    int LDA   = 15;
    int K = min(M, N);
    int info;
    int info_ortho, info_factorization;
    int i,j;
    int LDAxN = LDA*N;

    PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t));
    PLASMA_Complex64_t *Q  = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t));
    PLASMA_desc *T;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!Q)){
        printf("Out of Memory \n ");
        return EXIT_SUCCESS;
    }

    /* Plasma Initialization */
    PLASMA_Init(cores);
    printf("-- PLASMA is initialized to run on %d cores. \n",cores);

    /* Allocate T */
    PLASMA_Alloc_Workspace_zgeqrf(M, N, &T);

    /* Initialize A1 and A2 */
    LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1);
    for (i = 0; i < M; i++)
        for (j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i] ;

    /* Factorization QR of the matrix A2 */
    info = PLASMA_zgeqrf(M, N, A2, LDA, T);

    /* Building the economy-size Q */
    memset((void*)Q, 0, LDA*N*sizeof(PLASMA_Complex64_t));
    for (i = 0; i < K; i++)
        Q[LDA*i+i] = 1.0;

    PLASMA_zungqr(M, N, K, A2, LDA, T, Q, LDA);

    /* Check the orthogonality, factorization and the solution */
    info_ortho = check_orthogonality(M, N, LDA, Q);
    info_factorization = check_factorization(M, N, A1, A2, LDA, Q);
    printf("--- info %d %d %d \n",info_factorization,info_ortho,info);
    if ((info_ortho != 0)|(info_factorization != 0)|(info != 0))
       printf("-- Error in ZGEQRF example ! \n");
    else
       printf("-- Run of ZGEQRF example successful ! \n");

    free(A1); free(A2); free(Q); free(T);

    PLASMA_Finalize();

    return EXIT_SUCCESS;
}