extern "C" magma_int_t magma_ssytrd_sb2st(magma_int_t threads, char uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz, float *A, magma_int_t lda, float *D, float *E, float *V, magma_int_t ldv, float *TAU, magma_int_t compT, float *T, magma_int_t ldt) { /* -- MAGMA (version 1.4.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver August 2013 Purpose ======= Arguments ========= THREADS (input) INTEGER Specifies the number of pthreads used. THREADS > 0 UPLO (input) CHARACTER*1 = 'U': Upper triangles of A is stored; = 'L': Lower triangles of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. NB (input) INTEGER The order of the band matrix A. N >= NB >= 0. VBLKSIZ (input) INTEGER The size of the block of householder vectors applied at once. A (input/workspace) REAL array, dimension (LDA, N) On entry the band matrix stored in the following way: LDA (input) INTEGER The leading dimension of the array A. LDA >= 2*NB. D (output) DOUBLE array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). E (output) DOUBLE array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. V (output) REAL array, dimension (BLKCNT, LDV, VBLKSIZ) On exit it contains the blocks of householder reflectors BLKCNT is the number of block and it is returned by the funtion MAGMA_BULGE_GET_BLKCNT. LDV (input) INTEGER The leading dimension of V. LDV > NB + VBLKSIZ + 1 TAU (output) REAL dimension(BLKCNT, VBLKSIZ) ??? COMPT (input) INTEGER if COMPT = 0 T is not computed if COMPT = 1 T is computed T (output) REAL dimension(LDT *) if COMPT = 1 on exit contains the matrices T needed for Q2 if COMPT = 0 T is not referenced LDT (input) INTEGER The leading dimension of T. LDT > Vblksiz INFO (output) INTEGER ???????????????????????????????????????????????????????????????????????????????????? = 0: successful exit ===================================================================== */ #ifdef ENABLE_TIMER real_Double_t timeblg=0.0; #endif //char uplo_[2] = {uplo, 0}; magma_int_t mklth = threads; magma_int_t INgrsiz=1; magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz); magma_int_t nbtiles = magma_ceildiv(n, nb); memset(T, 0, blkcnt*ldt*Vblksiz*sizeof(float)); memset(TAU, 0, blkcnt*Vblksiz*sizeof(float)); memset(V, 0, blkcnt*ldv*Vblksiz*sizeof(float)); magma_int_t* prog; magma_malloc_cpu((void**) &prog, (2*nbtiles+threads+10)*sizeof(magma_int_t)); memset(prog, 0, (2*nbtiles+threads+10)*sizeof(magma_int_t)); magma_sbulge_id_data* arg; magma_malloc_cpu((void**) &arg, threads*sizeof(magma_sbulge_id_data)); pthread_t* thread_id; magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t)); pthread_attr_t thread_attr; magma_setlapack_numthreads(1); magma_sbulge_data data_bulge(threads, n, nb, nbtiles, INgrsiz, Vblksiz, compT, A, lda, V, ldv, TAU, T, ldt, prog); // Set one thread per core pthread_attr_init(&thread_attr); pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM); pthread_setconcurrency(threads); //timing #ifdef ENABLE_TIMER timeblg = magma_wtime(); #endif // Launch threads for (magma_int_t thread = 1; thread < threads; thread++) { arg[thread] = magma_sbulge_id_data(thread, &data_bulge); pthread_create(&thread_id[thread], &thread_attr, magma_ssytrd_sb2st_parallel_section, &arg[thread]); } arg[0] = magma_sbulge_id_data(0, &data_bulge); magma_ssytrd_sb2st_parallel_section(&arg[0]); // Wait for completion for (magma_int_t thread = 1; thread < threads; thread++) { void *exitcodep; pthread_join(thread_id[thread], &exitcodep); } // timing #ifdef ENABLE_TIMER timeblg = magma_wtime()-timeblg; printf(" time BULGE+T = %f \n" ,timeblg); #endif magma_free_cpu(thread_id); magma_free_cpu(arg); magma_free_cpu(prog); magma_setlapack_numthreads(mklth); /*================================================ * store resulting diag and lower diag D and E * note that D and E are always real *================================================*/ /* Make diagonal and superdiagonal elements real, * storing them in D and E */ /* In real case, the off diagonal element are * not necessary real. we have to make off-diagonal * elements real and copy them to E. * When using HouseHolder elimination, * the SLARFG give us a real as output so, all the * diagonal/off-diagonal element except the last one are already * real and thus we need only to take the abs of the last * one. * */ #if defined(PRECISION_z) || defined(PRECISION_c) if(uplo==MagmaLower){ for (magma_int_t i=0; i < n-1 ; i++) { D[i] = MAGMA_S_REAL(A[i*lda ]); E[i] = MAGMA_S_REAL(A[i*lda+1]); } D[n-1] = MAGMA_S_REAL(A[(n-1)*lda]); } else { /* MagmaUpper not tested yet */ for (magma_int_t i=0; i<n-1; i++) { D[i] = MAGMA_S_REAL(A[i*lda+nb]); E[i] = MAGMA_S_REAL(A[i*lda+nb-1]); } D[n-1] = MAGMA_S_REAL(A[(n-1)*lda+nb]); } /* end MagmaUpper */ #else if( uplo == MagmaLower ){ for (magma_int_t i=0; i < n-1; i++) { D[i] = A[i*lda]; // diag E[i] = A[i*lda+1]; //lower diag } D[n-1] = A[(n-1)*lda]; } else { for (magma_int_t i=0; i < n-1; i++) { D[i] = A[i*lda+nb]; // diag E[i] = A[i*lda+nb-1]; //lower diag } D[n-1] = A[(n-1)*lda+nb]; } #endif return MAGMA_SUCCESS; }
/** Purpose ------- Arguments --------- @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangles of A is stored; - = MagmaLower: Lower triangles of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in] nb INTEGER The order of the band matrix A. N >= NB >= 0. @param[in] Vblksiz INTEGER The size of the block of householder vectors applied at once. @param[in] A (workspace) COMPLEX_16 array, dimension (LDA, N) On entry the band matrix stored in the following way: @param[in] lda INTEGER The leading dimension of the array A. LDA >= 2*NB. @param[out] d DOUBLE array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). @param[out] e DOUBLE array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = MagmaUpper, E(i) = A(i+1,i) if UPLO = MagmaLower. @param[out] V COMPLEX_16 array, dimension (BLKCNT, LDV, VBLKSIZ) On exit it contains the blocks of householder reflectors BLKCNT is the number of block and it is returned by the funtion MAGMA_BULGE_GET_BLKCNT. @param[in] ldv INTEGER The leading dimension of V. LDV > NB + VBLKSIZ + 1 @param[out] TAU COMPLEX_16 dimension(BLKCNT, VBLKSIZ) ??? @param[in] compT INTEGER if COMPT = 0 T is not computed if COMPT = 1 T is computed @param[out] T COMPLEX_16 dimension(LDT *) if COMPT = 1 on exit contains the matrices T needed for Q2 if COMPT = 0 T is not referenced @param[in] ldt INTEGER The leading dimension of T. LDT > Vblksiz @ingroup magma_zheev_2stage ********************************************************************/ extern "C" magma_int_t magma_zhetrd_hb2st( magma_uplo_t uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz, magmaDoubleComplex *A, magma_int_t lda, double *d, double *e, magmaDoubleComplex *V, magma_int_t ldv, magmaDoubleComplex *TAU, magma_int_t compT, magmaDoubleComplex *T, magma_int_t ldt) { #ifdef ENABLE_TIMER real_Double_t timeblg=0.0; #endif magma_int_t threads = magma_get_parallel_numthreads(); magma_int_t mklth = magma_get_lapack_numthreads(); magma_set_lapack_numthreads(1); //const char* uplo_ = lapack_uplo_const( uplo ); magma_int_t INgrsiz=1; magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz); magma_int_t nbtiles = magma_ceildiv(n, nb); memset(T, 0, blkcnt*ldt*Vblksiz*sizeof(magmaDoubleComplex)); memset(TAU, 0, blkcnt*Vblksiz*sizeof(magmaDoubleComplex)); memset(V, 0, blkcnt*ldv*Vblksiz*sizeof(magmaDoubleComplex)); magma_int_t* prog; magma_malloc_cpu((void**) &prog, (2*nbtiles+threads+10)*sizeof(magma_int_t)); memset(prog, 0, (2*nbtiles+threads+10)*sizeof(magma_int_t)); magma_zbulge_id_data* arg; magma_malloc_cpu((void**) &arg, threads*sizeof(magma_zbulge_id_data)); pthread_t* thread_id; magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t)); pthread_attr_t thread_attr; magma_zbulge_data data_bulge(threads, n, nb, nbtiles, INgrsiz, Vblksiz, compT, A, lda, V, ldv, TAU, T, ldt, prog); // Set one thread per core pthread_attr_init(&thread_attr); pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM); pthread_setconcurrency(threads); //timing #ifdef ENABLE_TIMER timeblg = magma_wtime(); #endif // Launch threads for (magma_int_t thread = 1; thread < threads; thread++) { arg[thread] = magma_zbulge_id_data(thread, &data_bulge); pthread_create(&thread_id[thread], &thread_attr, magma_zhetrd_hb2st_parallel_section, &arg[thread]); } arg[0] = magma_zbulge_id_data(0, &data_bulge); magma_zhetrd_hb2st_parallel_section(&arg[0]); // Wait for completion for (magma_int_t thread = 1; thread < threads; thread++) { void *exitcodep; pthread_join(thread_id[thread], &exitcodep); } // timing #ifdef ENABLE_TIMER timeblg = magma_wtime()-timeblg; printf(" time BULGE+T = %f\n", timeblg); #endif magma_free_cpu(thread_id); magma_free_cpu(arg); magma_free_cpu(prog); magma_set_lapack_numthreads(mklth); /*================================================ * store resulting diag and lower diag d and e * note that d and e are always real *================================================*/ /* Make diagonal and superdiagonal elements real, * storing them in d and e */ /* In complex case, the off diagonal element are * not necessary real. we have to make off-diagonal * elements real and copy them to e. * When using HouseHolder elimination, * the ZLARFG give us a real as output so, all the * diagonal/off-diagonal element except the last one are already * real and thus we need only to take the abs of the last * one. * */ #if defined(PRECISION_z) || defined(PRECISION_c) if (uplo == MagmaLower) { for (magma_int_t i=0; i < n-1; i++) { d[i] = MAGMA_Z_REAL( A[i*lda ] ); e[i] = MAGMA_Z_REAL( A[i*lda+1] ); } d[n-1] = MAGMA_Z_REAL(A[(n-1)*lda]); } else { /* MagmaUpper not tested yet */ for (magma_int_t i=0; i < n-1; i++) { d[i] = MAGMA_Z_REAL( A[i*lda+nb] ); e[i] = MAGMA_Z_REAL( A[i*lda+nb-1] ); } d[n-1] = MAGMA_Z_REAL(A[(n-1)*lda+nb]); } /* end MagmaUpper */ #else if ( uplo == MagmaLower ) { for (magma_int_t i=0; i < n-1; i++) { d[i] = A[i*lda]; // diag e[i] = A[i*lda+1]; // lower diag } d[n-1] = A[(n-1)*lda]; } else { for (magma_int_t i=0; i < n-1; i++) { d[i] = A[i*lda+nb]; // diag e[i] = A[i*lda+nb-1]; // lower diag } d[n-1] = A[(n-1)*lda+nb]; } #endif return MAGMA_SUCCESS; }