int P_dgeqrf( int M, int N, double *A, double *T ) { int info; #if CHECK_VERSION_BEQ(2,4,5) info = PLASMA_dgeqrf(M, N, A, M, T); #else PLASMA_desc *descT; int NB, IB; int MT, NT; /* Get autotuned or set tile size; T matrix allocated with R */ PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT); PLASMA_Get(PLASMA_TILE_SIZE, &NB); PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB); PLASMA_Dealloc_Handle_Tile(&descT); MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); // possibly allocate space for descT in R and keep it in qr object instead info = PLASMA_Desc_Create(&descT, T, PlasmaRealDouble, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); /* printf("MB=%d NB=%d BSIZ=%d LM=%d LN=%d M=%d N=%d MT=%d NT=%d\n", descT->mb, descT->nb, descT->bsiz, descT->lm, descT->ln, descT->m, descT->n, descT->mt, descT->nt); */ info = PLASMA_dgeqrf(M, N, A, M, descT); PLASMA_Desc_Destroy(&descT); #endif return(info); }
void PLASMA_DGEQRF(int *M, int *N, double *A, int *LDA, double **T, int *INFO) { *INFO = PLASMA_dgeqrf(*M, *N, A, *LDA, *T); }
int main () { int cores = 2; int M = 15; int N = 10; int LDA = 15; int NRHS = 5; int LDB = 15; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; double *A1 = (double *)malloc(LDA*N*sizeof(double)); double *A2 = (double *)malloc(LDA*N*sizeof(double)); double *B1 = (double *)malloc(LDB*NRHS*sizeof(double)); double *B2 = (double *)malloc(LDB*NRHS*sizeof(double)); double *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_dgeqrf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Initialize B1 and B2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDBxNRHS, B1); for (i = 0; i < M; i++) for (j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_dgeqrf(M, N, A2, LDA, T); /* Solve the problem */ info = PLASMA_dgeqrs(M, N, NRHS, A2, LDA, T, B2, LDB); /* Check the solution */ info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in DGEQRS example ! \n"); else printf("-- Run of DGEQRS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(T); PLASMA_Finalize(); exit(0); }
int main( int argc, char *argv[] ) { int i, j, size, n_threads, n_repeats, n_trials, nb_alg, increment, begin; FLA_Datatype datatype = FLA_DOUBLE; FLA_Obj A; double b_norm_value = 0.0, dtime, *dtimes, *flops, *T; char output_file_m[100]; FILE *fpp; fprintf( stdout, "%c Enter number of repeats: ", '%' ); scanf( "%d", &n_repeats ); fprintf( stdout, "%c %d\n", '%', n_repeats ); fprintf( stdout, "%c Enter blocksize: ", '%' ); scanf( "%d", &nb_alg ); fprintf( stdout, "%c %d\n", '%', nb_alg ); fprintf( stdout, "%c Enter problem size parameters: first, inc, num: ", '%' ); scanf( "%d%d%d", &begin, &increment, &n_trials ); fprintf( stdout, "%c %d %d %d\n", '%', begin, increment, n_trials ); fprintf( stdout, "%c Enter number of threads: ", '%' ); scanf( "%d", &n_threads ); fprintf( stdout, "%c %d\n\n", '%', n_threads ); sprintf( output_file_m, "%s/%s_output.m", OUTPUT_PATH, OUTPUT_FILE ); fpp = fopen( output_file_m, "a" ); fprintf( fpp, "%%\n" ); fprintf( fpp, "%% | Matrix Size | PLASMA |\n" ); fprintf( fpp, "%% | n x n | GFlops |\n" ); fprintf( fpp, "%% -----------------------------\n" ); FLA_Init(); PLASMA_Init( n_threads ); PLASMA_Disable( PLASMA_AUTOTUNING ); PLASMA_Set( PLASMA_TILE_SIZE, nb_alg ); PLASMA_Set( PLASMA_INNER_BLOCK_SIZE, nb_alg / 4 ); dtimes = ( double * ) FLA_malloc( n_repeats * sizeof( double ) ); flops = ( double * ) FLA_malloc( n_trials * sizeof( double ) ); fprintf( fpp, "%s = [\n", OUTPUT_FILE ); for ( i = 0; i < n_trials; i++ ) { size = begin + i * increment; FLA_Obj_create( datatype, size, size, 0, 0, &A ); for ( j = 0; j < n_repeats; j++ ) { FLA_Random_matrix( A ); PLASMA_Alloc_Workspace_dgeqrf( size, size, &T ); dtime = FLA_Clock(); PLASMA_dgeqrf( size, size, FLA_Obj_buffer_at_view( A ), size, T ); dtime = FLA_Clock() - dtime; dtimes[j] = dtime; free( T ); } dtime = dtimes[0]; for ( j = 1; j < n_repeats; j++ ) dtime = min( dtime, dtimes[j] ); flops[i] = 4.0 / 3.0 * size * size * size / dtime / 1e9; fprintf( fpp, " %d %6.3f\n", size, flops[i] ); printf( "Time: %e | GFlops: %6.3f\n", dtime, flops[i] ); printf( "Matrix size: %d x %d | nb_alg: %d\n", size, size, nb_alg ); printf( "Norm of difference: %le\n\n", b_norm_value ); FLA_Obj_free( &A ); } fprintf( fpp, "];\n" ); fflush( fpp ); fclose( fpp ); FLA_free( dtimes ); FLA_free( flops ); PLASMA_Finalize(); FLA_Finalize(); return 0; }
JNIEXPORT jint JNICALL Java_edu_utk_cs_icl_plasma_PlasmaQr_nativeDgeqrf (JNIEnv *env, jclass caller, jint numRows, jint numColumns, jobject buffer, jint leadingDimension, jlong workspacePointer){ double* plasmaWorkspace = (double*) workspacePointer; double* matrix = (double*) env->GetDirectBufferAddress(buffer); return (jint) PLASMA_dgeqrf(numRows, numColumns, matrix, leadingDimension, plasmaWorkspace); }