Exemple #1
0
/***************************************************************************//**
    Purpose
    -------
    Reduces Hermitian band matrix to real symmetric tridiagonal.

    Arguments
    ---------
    @param[in]
    uplo    magma_uplo_t
      -     = MagmaUpper:  Upper triangles of A is stored;
      -     = MagmaLower:  Lower triangles of A is stored.

    @param[in]
    n       INTEGER
            The order of the matrix A.  n >= 0.

    @param[in]
    nb      INTEGER
            The order of the band matrix A.  n >= nb >= 0.

    @param[in]
    Vblksiz INTEGER
            The size of the block of householder vectors applied at once.

    @param[in]
    A       (workspace) COMPLEX array, dimension (lda, n)
            On entry the band matrix stored in the following way:

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  lda >= 2*nb.

    @param[out]
    d       DOUBLE array, dimension (n)
            The diagonal elements of the tridiagonal matrix T:
            D(i) = A(i,i).

    @param[out]
    e       DOUBLE array, dimension (n-1)
            The off-diagonal elements of the tridiagonal matrix T:
            E(i) = A(i,i+1) if UPLO = MagmaUpper, E(i) = A(i+1,i) if UPLO = MagmaLower.

    @param[out]
    V       COMPLEX array, dimension (BLKCNT, LDV, VBLKSIZ)
            On exit it contains the blocks of householder reflectors
            BLKCNT is the number of block and it is returned by the funtion MAGMA_BULGE_GET_BLKCNT.

    @param[in]
    ldv     INTEGER
            The leading dimension of V.
            LDV > nb + VBLKSIZ + 1

    @param[out]
    TAU     COMPLEX dimension(BLKCNT, VBLKSIZ)
            ???

    @param[in]
    wantz   INTEGER
            if COMPT = 0 T is not computed
            if COMPT = 1 T is computed

    @param[out]
    T       COMPLEX dimension(LDT *)
            if COMPT = 1 on exit contains the matrices T needed for Q2
            if COMPT = 0 T is not referenced

    @param[in]
    ldt     INTEGER
            The leading dimension of T.
            LDT > Vblksiz

    @ingroup magma_hetrd_hb2st
*******************************************************************************/
extern "C" magma_int_t
magma_chetrd_hb2st(
    magma_uplo_t uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz,
    magmaFloatComplex *A, magma_int_t lda, float *d, float *e,
    magmaFloatComplex *V, magma_int_t ldv, magmaFloatComplex *TAU,
    magma_int_t wantz, magmaFloatComplex *T, magma_int_t ldt)
{
    #ifdef ENABLE_TIMER
    real_Double_t timeblg=0.0;
    #endif

    magma_int_t parallel_threads = magma_get_parallel_numthreads();
    magma_int_t mklth   = magma_get_lapack_numthreads();
    magma_int_t ompth   = magma_get_omp_numthreads();

    //magma_set_omp_numthreads(1);
    //magma_set_lapack_numthreads(1);

    magma_int_t blkcnt, sizTAU2, sizT2, sizV2;
    magma_cbulge_getstg2size(n, nb, wantz, 
                          Vblksiz, ldv, ldt, &blkcnt, 
                          &sizTAU2, &sizT2, &sizV2);
    memset(T,   0, sizT2*sizeof(magmaFloatComplex));
    memset(TAU, 0, sizTAU2*sizeof(magmaFloatComplex));
    memset(V,   0, sizV2*sizeof(magmaFloatComplex));

    magma_int_t INgrsiz=1;
    magma_int_t nbtiles = magma_ceildiv(n, nb);
    volatile magma_int_t* prog;
    magma_malloc_cpu((void**) &prog, (2*nbtiles+parallel_threads+10)*sizeof(magma_int_t));
    memset((void *) prog, 0, (2*nbtiles+parallel_threads+10)*sizeof(magma_int_t));

    magma_cbulge_id_data* arg;
    magma_malloc_cpu((void**) &arg, parallel_threads*sizeof(magma_cbulge_id_data));

    pthread_t* thread_id;
    magma_malloc_cpu((void**) &thread_id, parallel_threads*sizeof(pthread_t));
    pthread_attr_t thread_attr;

    magma_cbulge_data data_bulge;
    magma_cbulge_data_init(&data_bulge, parallel_threads, n, nb, nbtiles, INgrsiz, Vblksiz, wantz,
                                 A, lda, V, ldv, TAU, T, ldt, prog);

    // Set one thread per core
    pthread_attr_init(&thread_attr);
    pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM);
    pthread_setconcurrency( (unsigned)parallel_threads );

    //timing
    #ifdef ENABLE_TIMER
    timeblg = magma_wtime();
    #endif

    // Launch threads
    for (magma_int_t thread = 1; thread < parallel_threads; thread++) {
        magma_cbulge_id_data_init(&(arg[thread]), thread, &data_bulge);
        pthread_create(&thread_id[thread], &thread_attr, magma_chetrd_hb2st_parallel_section, &arg[thread]);
    }
    magma_cbulge_id_data_init(&(arg[0]), 0, &data_bulge);
    magma_chetrd_hb2st_parallel_section(&arg[0]);

    // Wait for completion
    for (magma_int_t thread = 1; thread < parallel_threads; thread++) {
        void *exitcodep;
        pthread_join(thread_id[thread], &exitcodep);
    }

    // timing
    #ifdef ENABLE_TIMER
    timeblg = magma_wtime()-timeblg;
    printf("  time BULGE+T = %f\n", timeblg);
    #endif

    magma_free_cpu(thread_id);
    magma_free_cpu(arg);
    magma_free_cpu((void *) prog);
    magma_cbulge_data_destroy(&data_bulge);

    magma_set_omp_numthreads(ompth);
    magma_set_lapack_numthreads(mklth);
    /*================================================
     *  store resulting diag and lower diag d and e
     *  note that d and e are always real
     *================================================*/

    /* Make diagonal and superdiagonal elements real,
     * storing them in d and e
     */
    /* In complex case, the off diagonal element are
     * not necessary real. we have to make off-diagonal
     * elements real and copy them to e.
     * When using HouseHolder elimination,
     * the CLARFG give us a real as output so, all the
     * diagonal/off-diagonal element except the last one are already
     * real and thus we need only to take the abs of the last
     * one.
     *  */

#ifdef COMPLEX
    if (uplo == MagmaLower) {
        for (magma_int_t i=0; i < n-1; i++) {
            d[i] = MAGMA_C_REAL( A[i*lda  ] );
            e[i] = MAGMA_C_REAL( A[i*lda+1] );
        }
        d[n-1] = MAGMA_C_REAL(A[(n-1)*lda]);
    } else { /* MagmaUpper not tested yet */
        for (magma_int_t i=0; i < n-1; i++) {
            d[i] = MAGMA_C_REAL( A[i*lda+nb]   );
            e[i] = MAGMA_C_REAL( A[i*lda+nb-1] );
        }
        d[n-1] = MAGMA_C_REAL(A[(n-1)*lda+nb]);
    } /* end MagmaUpper */
#else
    if ( uplo == MagmaLower ) {
        for (magma_int_t i=0; i < n-1; i++) {
            d[i] = A[i*lda];   // diag
            e[i] = A[i*lda+1]; // lower diag
        }
        d[n-1] = A[(n-1)*lda];
    } else {
        for (magma_int_t i=0; i < n-1; i++) {
            d[i] = A[i*lda+nb];   // diag
            e[i] = A[i*lda+nb-1]; // lower diag
        }
        d[n-1] = A[(n-1)*lda+nb];
    }
#endif
    return MAGMA_SUCCESS;
}
Exemple #2
0
extern "C" magma_int_t magma_chetrd_hb2st(magma_int_t threads, char uplo, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz,
                                          magmaFloatComplex *A, magma_int_t lda, float *D, float *E,
                                          magmaFloatComplex *V, magma_int_t ldv, magmaFloatComplex *TAU, magma_int_t compT, magmaFloatComplex *T, magma_int_t ldt)
{
/*  -- MAGMA (version 1.4.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       August 2013

    Purpose
    =======


    Arguments
    =========
    THREADS (input) INTEGER
            Specifies the number of pthreads used.
            THREADS > 0

    UPLO    (input) CHARACTER*1
            = 'U':  Upper triangles of A is stored;
            = 'L':  Lower triangles of A is stored.

    N       (input) INTEGER
            The order of the matrix A.  N >= 0.

    NB      (input) INTEGER
            The order of the band matrix A.  N >= NB >= 0.

    VBLKSIZ (input) INTEGER
            The size of the block of householder vectors applied at once.

    A       (input/workspace) COMPLEX array, dimension (LDA, N)
            On entry the band matrix stored in the following way:

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= 2*NB.

    D       (output) DOUBLE array, dimension (N)
            The diagonal elements of the tridiagonal matrix T:
            D(i) = A(i,i).

    E       (output) DOUBLE array, dimension (N-1)
            The off-diagonal elements of the tridiagonal matrix T:
            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.

    V       (output) COMPLEX array, dimension (BLKCNT, LDV, VBLKSIZ)
            On exit it contains the blocks of householder reflectors
            BLKCNT is the number of block and it is returned by the funtion MAGMA_BULGE_GET_BLKCNT.

    LDV     (input) INTEGER
            The leading dimension of V.
            LDV > NB + VBLKSIZ + 1

    TAU     (output) COMPLEX dimension(BLKCNT, VBLKSIZ)
            ???

    COMPT   (input) INTEGER
            if COMPT = 0 T is not computed
            if COMPT = 1 T is computed

    T       (output) COMPLEX dimension(LDT *)
            if COMPT = 1 on exit contains the matrices T needed for Q2
            if COMPT = 0 T is not referenced

    LDT     (input) INTEGER
            The leading dimension of T.
            LDT > Vblksiz

    INFO    (output) INTEGER ????????????????????????????????????????????????????????????????????????????????????
            = 0:  successful exit


    =====================================================================  */

    #ifdef ENABLE_TIMER
    real_Double_t timeblg=0.0;
    #endif

    //char uplo_[2] = {uplo, 0};
    magma_int_t mklth = threads;
    magma_int_t INgrsiz=1;
    magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz);
    magma_int_t nbtiles = magma_ceildiv(n, nb);

    memset(T,   0, blkcnt*ldt*Vblksiz*sizeof(magmaFloatComplex));
    memset(TAU, 0, blkcnt*Vblksiz*sizeof(magmaFloatComplex));
    memset(V,   0, blkcnt*ldv*Vblksiz*sizeof(magmaFloatComplex));

    magma_int_t* prog;
    magma_malloc_cpu((void**) &prog, (2*nbtiles+threads+10)*sizeof(magma_int_t));
    memset(prog, 0, (2*nbtiles+threads+10)*sizeof(magma_int_t));

    magma_cbulge_id_data* arg;
    magma_malloc_cpu((void**) &arg, threads*sizeof(magma_cbulge_id_data));

    pthread_t* thread_id;
    magma_malloc_cpu((void**) &thread_id, threads*sizeof(pthread_t));
    pthread_attr_t thread_attr;

    magma_setlapack_numthreads(1);
    magma_cbulge_data data_bulge(threads, n, nb, nbtiles, INgrsiz, Vblksiz, compT,
                                 A, lda, V, ldv, TAU, T, ldt, prog);

    // Set one thread per core
    pthread_attr_init(&thread_attr);
    pthread_attr_setscope(&thread_attr, PTHREAD_SCOPE_SYSTEM);
    pthread_setconcurrency(threads);

    //timing
    #ifdef ENABLE_TIMER
    timeblg = magma_wtime();
    #endif

    // Launch threads
    for (magma_int_t thread = 1; thread < threads; thread++)
    {
        arg[thread] = magma_cbulge_id_data(thread, &data_bulge);
        pthread_create(&thread_id[thread], &thread_attr, magma_chetrd_hb2st_parallel_section, &arg[thread]);
    }
    arg[0] = magma_cbulge_id_data(0, &data_bulge);
    magma_chetrd_hb2st_parallel_section(&arg[0]);

    // Wait for completion
    for (magma_int_t thread = 1; thread < threads; thread++)
    {
        void *exitcodep;
        pthread_join(thread_id[thread], &exitcodep);
    }

    // timing
    #ifdef ENABLE_TIMER
    timeblg = magma_wtime()-timeblg;
    printf("  time BULGE+T = %f \n" ,timeblg);
    #endif

    magma_free_cpu(thread_id);
    magma_free_cpu(arg);
    magma_free_cpu(prog);

    magma_setlapack_numthreads(mklth);
    /*================================================
     *  store resulting diag and lower diag D and E
     *  note that D and E are always real
     *================================================*/

    /* Make diagonal and superdiagonal elements real,
     * storing them in D and E
     */
    /* In complex case, the off diagonal element are
     * not necessary real. we have to make off-diagonal
     * elements real and copy them to E.
     * When using HouseHolder elimination,
     * the CLARFG give us a real as output so, all the
     * diagonal/off-diagonal element except the last one are already
     * real and thus we need only to take the abs of the last
     * one.
     *  */

#if defined(PRECISION_z) || defined(PRECISION_c)
    if(uplo==MagmaLower){
        for (magma_int_t i=0; i < n-1 ; i++)
        {
            D[i] = MAGMA_C_REAL(A[i*lda  ]);
            E[i] = MAGMA_C_REAL(A[i*lda+1]);
        }
        D[n-1] = MAGMA_C_REAL(A[(n-1)*lda]);
    } else { /* MagmaUpper not tested yet */
        for (magma_int_t i=0; i<n-1; i++)
        {
            D[i]  =  MAGMA_C_REAL(A[i*lda+nb]);
            E[i] = MAGMA_C_REAL(A[i*lda+nb-1]);
        }
        D[n-1] = MAGMA_C_REAL(A[(n-1)*lda+nb]);
    } /* end MagmaUpper */
#else
    if( uplo == MagmaLower ){
        for (magma_int_t i=0; i < n-1; i++) {
            D[i] = A[i*lda];   // diag
            E[i] = A[i*lda+1]; //lower diag
        }
        D[n-1] = A[(n-1)*lda];
    } else {
        for (magma_int_t i=0; i < n-1; i++) {
            D[i] = A[i*lda+nb];   // diag
            E[i] = A[i*lda+nb-1]; //lower diag
        }
        D[n-1] = A[(n-1)*lda+nb];
    }
#endif
    return MAGMA_SUCCESS;

}