Ejemplo n.º 1
0
Archivo: P.c Proyecto: nashp/HiPLARb
int factorQR_T_size(int M, int N) {
	int size;
	int NB, IB;
	int MT, NT;
    PLASMA_desc *descT;
    double *T;

    /* Get autotuned or set tile size; actual allocation of memory with R */
#if CHECK_VERSION_BEQ(2,4,5)
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &T);
#else
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT);
#endif
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
#if CHECK_VERSION_BEQ(2,4,5)
    free(T);
#else
    PLASMA_Dealloc_Handle_Tile(&descT);
#endif

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

	size = MT*NT*IB*NB;

	return(size);
}
Ejemplo n.º 2
0
JNIEXPORT jint JNICALL Java_edu_utk_cs_icl_plasma_PlasmaQr_nativeDoubleAllocateWorkspace
  (JNIEnv *env, jclass caller, jint numRows, jint numColumns, jlongArray workspacePointer){
    double *plasmaWorkspace = 0;
    int plasmaWorkspaceInfo = PLASMA_Alloc_Workspace_dgeqrf(numRows, numColumns, &plasmaWorkspace);
    jlong workspace = (jlong) plasmaWorkspace;
    env->SetLongArrayRegion(workspacePointer, (jsize) 0, (jsize) 1, &workspace);
    return plasmaWorkspaceInfo;
}
Ejemplo n.º 3
0
Archivo: P.c Proyecto: nashp/HiPLARb
int P_dormqr(
const char *side,
const char *trans,
int M,
int N,
int K,
double *A,
int LDA,
double *T,
double *B,
int LDB
) {
	PLASMA_enum s, t;
	int info;

	if (*side == 'L') {
		s = PlasmaLeft;
	} else {
		s = PlasmaRight;
	}

	if (*trans == 'T') {
		t = PlasmaTrans;
	} else {
		t = PlasmaNoTrans;
	}

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_dormqr(s, t, M, N, K, A, LDA, T, B, LDB);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);

	info = PLASMA_dormqr(s, t, M, N, K, A, LDA, descT, B, LDB);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);
}
Ejemplo n.º 4
0
Archivo: P.c Proyecto: nashp/HiPLARb
int P_dgeqrf(
int M,
int N,
double *A,
double *T
) {
	int info;

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_dgeqrf(M, N, A, M, T);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaRealDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
/*
    printf("MB=%d NB=%d BSIZ=%d LM=%d LN=%d M=%d N=%d MT=%d NT=%d\n",
        descT->mb, descT->nb, descT->bsiz, descT->lm, descT->ln,
        descT->m, descT->n, descT->mt, descT->nt);
*/

	info = PLASMA_dgeqrf(M, N, A, M, descT);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);

}
Ejemplo n.º 5
0
Archivo: P.c Proyecto: nashp/HiPLARb
int P_zungqr(
int M,
int N,
int K,
void *A,
int LDA,
void *T,
void *Q,
int LDQ
) {
	int info;

#if CHECK_VERSION_BEQ(2,4,5)
	info = PLASMA_zungqr(M, N, K, A, LDA, T, Q, LDQ);
#else
    PLASMA_desc *descT;
    int NB, IB;
    int MT, NT;

    /* Get autotuned or set tile size; T matrix allocated with R */
    PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT);
	PLASMA_Get(PLASMA_TILE_SIZE, &NB);
    PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB);
    PLASMA_Dealloc_Handle_Tile(&descT);

	MT = (M%NB==0) ? (M/NB) : (M/NB+1);
	NT = (N%NB==0) ? (N/NB) : (N/NB+1);

// possibly allocate space for descT in R and keep it in qr object instead
    info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble,
         IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);

	info = PLASMA_zungqr(M, N, K, A, LDA, descT, Q, LDQ);

    PLASMA_Desc_Destroy(&descT);
#endif

	return(info);
}
Ejemplo n.º 6
0
void PLASMA_ALLOC_WORKSPACE_DGEQRF(int *M, int *N, double **T, int *INFO)
{   *INFO = PLASMA_Alloc_Workspace_dgeqrf(*M, *N, T); }
Ejemplo n.º 7
0
int main ()
{

    int cores = 2;
    int M     = 15;
    int N     = 10;
    int LDA   = 15;
    int NRHS  = 5;
    int LDB   = 15;

    int info;
    int info_solution;
    int i,j;
    int LDAxN = LDA*N;
    int LDBxNRHS = LDB*NRHS;

    double *A1 = (double *)malloc(LDA*N*sizeof(double));
    double *A2 = (double *)malloc(LDA*N*sizeof(double));
    double *B1 = (double *)malloc(LDB*NRHS*sizeof(double));
    double *B2 = (double *)malloc(LDB*NRHS*sizeof(double));
    double *T;

    /* Check if unable to allocate memory */
    if ((!A1)||(!A2)||(!B1)||(!B2)){
        printf("Out of Memory \n ");
        exit(0);
    }

    /* Plasma Initialization */
    PLASMA_Init(cores);
    printf("-- PLASMA is initialized to run on %d cores. \n",cores);

    /* Allocate T */
    PLASMA_Alloc_Workspace_dgeqrf(M, N, &T);

    /* Initialize A1 and A2 */
    LAPACKE_dlarnv_work(IONE, ISEED, LDAxN, A1);
    for (i = 0; i < M; i++)
        for (j = 0; j < N; j++)
            A2[LDA*j+i] = A1[LDA*j+i] ;

    /* Initialize B1 and B2 */
    LAPACKE_dlarnv_work(IONE, ISEED, LDBxNRHS, B1);
    for (i = 0; i < M; i++)
        for (j = 0; j < NRHS; j++)
             B2[LDB*j+i] = B1[LDB*j+i] ;

    /* Factorization QR of the matrix A2 */
    info = PLASMA_dgeqrf(M, N, A2, LDA, T);

    /* Solve the problem */
    info = PLASMA_dgeqrs(M, N, NRHS, A2, LDA, T, B2, LDB);

    /* Check the solution */
    info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB);

    if ((info_solution != 0)|(info != 0))
       printf("-- Error in DGEQRS example ! \n");
    else
       printf("-- Run of DGEQRS example successful ! \n");

    free(A1); free(A2); free(B1); free(B2); free(T);

    PLASMA_Finalize();

    exit(0);
}
Ejemplo n.º 8
0
int main( int argc, char *argv[] )
{
    int
    i, j,
    size,
    n_threads,
    n_repeats,
    n_trials,
    nb_alg,
    increment,
    begin;

    FLA_Datatype
    datatype = FLA_DOUBLE;

    FLA_Obj
    A;

    double
    b_norm_value = 0.0,
    dtime,
    *dtimes,
    *flops,
    *T;

    char
    output_file_m[100];

    FILE
    *fpp;

    fprintf( stdout, "%c Enter number of repeats: ", '%' );
    scanf( "%d", &n_repeats );
    fprintf( stdout, "%c %d\n", '%', n_repeats );

    fprintf( stdout, "%c Enter blocksize: ", '%' );
    scanf( "%d", &nb_alg );
    fprintf( stdout, "%c %d\n", '%', nb_alg );

    fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' );
    scanf( "%d%d%d", &begin, &increment, &n_trials );
    fprintf( stdout, "%c %d %d %d\n", '%', begin, increment, n_trials );

    fprintf( stdout, "%c Enter number of threads: ", '%' );
    scanf( "%d", &n_threads );
    fprintf( stdout, "%c %d\n\n", '%', n_threads );

    sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE );
    fpp = fopen( output_file_m, "a" );

    fprintf( fpp, "%%\n" );
    fprintf( fpp, "%% | Matrix Size |    PLASMA   |\n" );
    fprintf( fpp, "%% |    n x n    |    GFlops   |\n" );
    fprintf( fpp, "%% -----------------------------\n" );

    FLA_Init();
    PLASMA_Init( n_threads );

    PLASMA_Disable( PLASMA_AUTOTUNING );
    PLASMA_Set( PLASMA_TILE_SIZE, nb_alg );
    PLASMA_Set( PLASMA_INNER_BLOCK_SIZE, nb_alg / 4 );

    dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) );
    flops  = ( double * ) FLA_malloc( n_trials  * sizeof( double ) );

    fprintf( fpp, "%s = [\n", OUTPUT_FILE );

    for ( i = 0; i < n_trials; i++ )
    {
        size = begin + i * increment;

        FLA_Obj_create( datatype, size, size, 0, 0, &A );

        for ( j = 0; j < n_repeats; j++ )
        {
            FLA_Random_matrix( A );

            PLASMA_Alloc_Workspace_dgeqrf( size, size, &T );

            dtime = FLA_Clock();

            PLASMA_dgeqrf( size, size, FLA_Obj_buffer_at_view( A ), size, T );

            dtime = FLA_Clock() - dtime;
            dtimes[j] = dtime;

            free( T );
        }

        dtime = dtimes[0];
        for ( j = 1; j < n_repeats; j++ )
            dtime = min( dtime, dtimes[j] );
        flops[i] = 4.0 / 3.0 * size * size * size / dtime / 1e9;

        fprintf( fpp, "   %d   %6.3f\n", size, flops[i] );

        printf( "Time: %e  |  GFlops: %6.3f\n",
                dtime, flops[i] );
        printf( "Matrix size: %d x %d  |  nb_alg: %d\n",
                size, size, nb_alg );
        printf( "Norm of difference: %le\n\n", b_norm_value );

        FLA_Obj_free( &A );
    }

    fprintf( fpp, "];\n" );

    fflush( fpp );
    fclose( fpp );

    FLA_free( dtimes );
    FLA_free( flops );

    PLASMA_Finalize();
    FLA_Finalize();

    return 0;
}