C++ (Cpp) magma_get_zgebrd_nb Examples

Programming Language: C++ (Cpp)

Method/Function: magma_get_zgebrd_nb

Examples at hotexamples.com: 3

C++ (Cpp) magma_get_zgebrd_nb - 3 examples found. These are the top rated real world C++ (Cpp) examples of magma_get_zgebrd_nb extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

/**
    Purpose
    -------
    ZGEBRD reduces a general complex M-by-N matrix A to upper or lower
    bidiagonal form B by an orthogonal transformation: Q**H * A * P = B.

    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.

    Arguments
    ---------
    @param[in]
    m       INTEGER
            The number of rows in the matrix A.  M >= 0.

    @param[in]
    n       INTEGER
            The number of columns in the matrix A.  N >= 0.

    @param[in,out]
    A       COMPLEX_16 array, dimension (LDA,N)
            On entry, the M-by-N general matrix to be reduced.
            On exit,
            if m >= n, the diagonal and the first superdiagonal are
              overwritten with the upper bidiagonal matrix B; the
              elements below the diagonal, with the array TAUQ, represent
              the orthogonal matrix Q as a product of elementary
              reflectors, and the elements above the first superdiagonal,
              with the array TAUP, represent the orthogonal matrix P as
              a product of elementary reflectors;
    \n
            if m < n, the diagonal and the first subdiagonal are
              overwritten with the lower bidiagonal matrix B; the
              elements below the first subdiagonal, with the array TAUQ,
              represent the orthogonal matrix Q as a product of
              elementary reflectors, and the elements above the diagonal,
              with the array TAUP, represent the orthogonal matrix P as
              a product of elementary reflectors.
            See Further Details.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,M).

    @param[out]
    d       double precision array, dimension (min(M,N))
            The diagonal elements of the bidiagonal matrix B:
            D(i) = A(i,i).

    @param[out]
    e       double precision array, dimension (min(M,N)-1)
            The off-diagonal elements of the bidiagonal matrix B:
            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.

    @param[out]
    tauq    COMPLEX_16 array dimension (min(M,N))
            The scalar factors of the elementary reflectors which
            represent the orthogonal matrix Q. See Further Details.

    @param[out]
    taup    COMPLEX_16 array, dimension (min(M,N))
            The scalar factors of the elementary reflectors which
            represent the orthogonal matrix P. See Further Details.

    @param[out]
    work    (workspace) COMPLEX_16 array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK[0] returns the optimal LWORK.

    @param[in]
    lwork   INTEGER
            The length of the array WORK.  LWORK >= (M+N)*NB, where NB
            is the optimal blocksize.
    \n
            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued by XERBLA.

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value.

    Further Details
    ---------------
    The matrices Q and P are represented as products of elementary
    reflectors:

    If m >= n,

       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)

    Each H(i) and G(i) has the form:

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'

    where tauq and taup are complex scalars, and v and u are complex vectors;
    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
    tauq is stored in TAUQ(i) and taup in TAUP(i).

    If m < n,

       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)

    Each H(i) and G(i) has the form:

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'

    where tauq and taup are complex scalars, and v and u are complex vectors;
    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
    tauq is stored in TAUQ(i) and taup in TAUP(i).

    The contents of A on exit are illustrated by the following examples:

    @verbatim
    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):

      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
      (  v1  v2  v3  v4  v5 )
    @endverbatim

    where d and e denote diagonal and off-diagonal elements of B, vi
    denotes an element of the vector defining H(i), and ui an element of
    the vector defining G(i).

    @ingroup magma_zgesvd_comp
    ********************************************************************/
extern "C" magma_int_t
magma_zgebrd(
    magma_int_t m, magma_int_t n,
    magmaDoubleComplex *A, magma_int_t lda, double *d, double *e,
    magmaDoubleComplex *tauq, magmaDoubleComplex *taup,
    magmaDoubleComplex *work, magma_int_t lwork,
    magma_int_t *info)
{
#define  A(i, j) (A  + (j)*lda  + (i))
#define dA(i, j) (dA + (j)*ldda + (i))

    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    magmaDoubleComplex c_one     = MAGMA_Z_ONE;
    magmaDoubleComplex *dA, *dwork;

    magma_int_t ncol, nrow, jmax, nb, ldda;

    magma_int_t i, j, nx;
    magma_int_t iinfo;

    magma_int_t minmn;
    magma_int_t ldwrkx, ldwrky, lwkopt;
    magma_int_t lquery;

    nb   = magma_get_zgebrd_nb( m, n );
    ldda = m;

    lwkopt = (m + n) * nb;
    work[0] = magma_zmake_lwork( lwkopt );
    lquery = (lwork == -1);
    
    /* Check arguments */
    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (lda < max(1,m)) {
        *info = -4;
    } else if (lwork < lwkopt && (! lquery) ) {
        *info = -10;
    }
    if (*info < 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery)
        return *info;

    /* Quick return if possible */
    minmn = min(m,n);
    if (minmn == 0) {
        work[0] = c_one;
        return *info;
    }

    magma_queue_t queue = NULL;
    magma_device_t cdev;
    magma_getdevice( &cdev );
    magma_queue_create( cdev, &queue );

    magmaDoubleComplex *work2;
    magma_int_t lwork2 = max(m,n);
    if (MAGMA_SUCCESS != magma_zmalloc_cpu( &work2, lwork2 )) {
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }
    if (MAGMA_SUCCESS != magma_zmalloc( &dA, n*ldda + (m + n)*nb )) {
        magma_free_cpu( work2 );
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    dwork = dA + n*ldda;

    ldwrkx = m;
    ldwrky = n;

    /* Set the block/unblock crossover point NX. */
    nx = 128;

    /* Copy the matrix to the GPU */
    if (minmn - nx >= 1) {
        magma_zsetmatrix( m, n, A, lda, dA, ldda, queue );
    }
    
    for (i=0; i < (minmn - nx); i += nb) {
        /*  Reduce rows and columns i:i+nb-1 to bidiagonal form and return
            the matrices X and Y which are needed to update the unreduced
            part of the matrix */
        nrow = m - i;
        ncol = n - i;

        /* Get the current panel (no need for the 1st iteration) */
        if ( i > 0 ) {
            magma_zgetmatrix( nrow, nb,
                              dA(i, i), ldda,
                              A( i, i), lda, queue );
            magma_zgetmatrix( nb, ncol - nb,
                              dA(i, i+nb), ldda,
                              A( i, i+nb), lda, queue );
        }

        magma_zlabrd_gpu(nrow, ncol, nb,
                         A(i, i),          lda,    dA(i, i),          ldda,
                         d+i, e+i, tauq+i, taup+i,
                         work,             ldwrkx, dwork,             ldwrkx,  // x, dx
                         work+(ldwrkx*nb), ldwrky, dwork+(ldwrkx*nb), ldwrky,
                         work2, lwork2, queue ); // y, dy

        /*  Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
            of the form  A := A - V*Y' - X*U' */
        nrow = m - i - nb;
        ncol = n - i - nb;

        // Send Y back to the GPU
        magma_zsetmatrix( nrow, nb,
                          work  + nb, ldwrkx,
                          dwork + nb, ldwrkx, queue );
        magma_zsetmatrix( ncol, nb,
                          work  + (ldwrkx+1)*nb, ldwrky,
                          dwork + (ldwrkx+1)*nb, ldwrky, queue );

        magma_zgemm( MagmaNoTrans, MagmaConjTrans,
                     nrow, ncol, nb,
                     c_neg_one, dA(i+nb, i   ),      ldda,
                                dwork+(ldwrkx+1)*nb, ldwrky,
                     c_one,     dA(i+nb, i+nb),      ldda, queue );

        magma_zgemm( MagmaNoTrans, MagmaNoTrans,
                     nrow, ncol, nb,
                     c_neg_one, dwork+nb,         ldwrkx,
                                dA( i,    i+nb ), ldda,
                     c_one,     dA( i+nb, i+nb ), ldda, queue );

        /* Copy diagonal and off-diagonal elements of B back into A */
        if (m >= n) {
            jmax = i + nb;
            for (j = i; j < jmax; ++j) {
                *A(j, j  ) = MAGMA_Z_MAKE( d[j], 0. );
                *A(j, j+1) = MAGMA_Z_MAKE( e[j], 0. );
            }
        } else {
            jmax = i + nb;
            for (j = i; j < jmax; ++j) {
                *A(j,   j ) = MAGMA_Z_MAKE( d[j], 0. );
                *A(j+1, j ) = MAGMA_Z_MAKE( e[j], 0. );
            }
        }
    }

    /* Use unblocked code to reduce the remainder of the matrix */
    nrow = m - i;
    ncol = n - i;

    if ( 0 < minmn - nx ) {
        magma_zgetmatrix( nrow, ncol,
                          dA(i, i), ldda,
                          A( i, i), lda, queue );
    }
    
    lapackf77_zgebrd( &nrow, &ncol,
                      A(i, i), &lda, d+i, e+i,
                      tauq+i, taup+i, work, &lwork, &iinfo);
    work[0] = magma_zmake_lwork( lwkopt );

    magma_free_cpu( work2 );
    magma_free( dA );
    
    magma_queue_destroy( queue );

    return *info;
} /* magma_zgebrd */

Example #2

Show file

File: get_nb.cpp Project: soulsheng/magma

magma_int_t magma_get_zgesvd_nb( magma_int_t m )
{
    return magma_get_zgebrd_nb( m );
}

Example #3

Show file

File: testing_zunmbr.cpp Project: xulunfan/magma

/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zunmbr
*/
int main( int argc, char** argv )
{
    TESTING_INIT();
    
    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    double Cnorm, error, dwork[1];
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    magma_int_t ione = 1;
    magma_int_t m, n, k, mi, ni, mm, nn, nq, size, info;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t nb, ldc, lda, lwork, lwork_max;
    magmaDoubleComplex *C, *R, *A, *work, *tau, *tauq, *taup;
    double *d, *e;
    magma_int_t status = 0;
    
    magma_opts opts;
    opts.parse_opts( argc, argv );
    
    // need slightly looser bound (60*eps instead of 30*eps) for some tests
    opts.tolerance = max( 60., opts.tolerance );
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    // test all combinations of input parameters
    magma_vect_t  vect [] = { MagmaQ,          MagmaP       };
    magma_side_t  side [] = { MagmaLeft,       MagmaRight   };
    magma_trans_t trans[] = { Magma_ConjTrans, MagmaNoTrans };

    printf("%%   M     N     K   vect side   trans   CPU Gflop/s (sec)   GPU Gflop/s (sec)   ||R||_F / ||QC||_F\n");
    printf("%%==============================================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
      for( int ivect = 0; ivect < 2; ++ivect ) {
      for( int iside = 0; iside < 2; ++iside ) {
      for( int itran = 0; itran < 2; ++itran ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            m = opts.msize[itest];
            n = opts.nsize[itest];
            k = opts.ksize[itest];
            nb  = magma_get_zgebrd_nb( m, n );
            ldc = m;
            // A is nq x k (vect=Q) or k x nq (vect=P)
            // where nq=m (left) or nq=n (right)
            nq  = (side[iside] == MagmaLeft ? m  : n );
            mm  = (vect[ivect] == MagmaQ    ? nq : k );
            nn  = (vect[ivect] == MagmaQ    ? k  : nq);
            lda = mm;
            
            // MBR calls either MQR or MLQ in various ways
            if ( vect[ivect] == MagmaQ ) {
                if ( nq >= k ) {
                    gflops = FLOPS_ZUNMQR( m, n, k, side[iside] ) / 1e9;
                }
                else {
                    if ( side[iside] == MagmaLeft ) {
                        mi = m - 1;
                        ni = n;
                    }
                    else {
                        mi = m;
                        ni = n - 1;
                    }
                    gflops = FLOPS_ZUNMQR( mi, ni, nq-1, side[iside] ) / 1e9;
                }
            }
            else {
                if ( nq > k ) {
                    gflops = FLOPS_ZUNMLQ( m, n, k, side[iside] ) / 1e9;
                }
                else {
                    if ( side[iside] == MagmaLeft ) {
                        mi = m - 1;
                        ni = n;
                    }
                    else {
                        mi = m;
                        ni = n - 1;
                    }
                    gflops = FLOPS_ZUNMLQ( mi, ni, nq-1, side[iside] ) / 1e9;
                }
            }
            
            // workspace for gebrd is (mm + nn)*nb
            // workspace for unmbr is m*nb or n*nb, depending on side
            lwork_max = max( (mm + nn)*nb, max( m*nb, n*nb ));
            // this rounds it up slightly if needed to agree with lwork query below
            lwork_max = int( real( magma_zmake_lwork( lwork_max )));
            
            TESTING_MALLOC_CPU( C,    magmaDoubleComplex, ldc*n );
            TESTING_MALLOC_CPU( R,    magmaDoubleComplex, ldc*n );
            TESTING_MALLOC_CPU( A,    magmaDoubleComplex, lda*nn );
            TESTING_MALLOC_CPU( work, magmaDoubleComplex, lwork_max );
            TESTING_MALLOC_CPU( d,    double,             min(mm,nn) );
            TESTING_MALLOC_CPU( e,    double,             min(mm,nn) );
            TESTING_MALLOC_CPU( tauq, magmaDoubleComplex, min(mm,nn) );
            TESTING_MALLOC_CPU( taup, magmaDoubleComplex, min(mm,nn) );
            
            // C is full, m x n
            size = ldc*n;
            lapackf77_zlarnv( &ione, ISEED, &size, C );
            lapackf77_zlacpy( "Full", &m, &n, C, &ldc, R, &ldc );
            
            size = lda*nn;
            lapackf77_zlarnv( &ione, ISEED, &size, A );
            
            // compute BRD factorization to get Householder vectors in A, tauq, taup
            //lapackf77_zgebrd( &mm, &nn, A, &lda, d, e, tauq, taup, work, &lwork_max, &info );
            magma_zgebrd( mm, nn, A, lda, d, e, tauq, taup, work, lwork_max, &info );
            if (info != 0) {
                printf("magma_zgebrd returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            if ( vect[ivect] == MagmaQ ) {
                tau = tauq;
            } else {
                tau = taup;
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            cpu_time = magma_wtime();
            lapackf77_zunmbr( lapack_vect_const( vect[ivect] ),
                              lapack_side_const( side[iside] ),
                              lapack_trans_const( trans[itran] ),
                              &m, &n, &k,
                              A, &lda, tau, C, &ldc, work, &lwork_max, &info );
            cpu_time = magma_wtime() - cpu_time;
            cpu_perf = gflops / cpu_time;
            if (info != 0) {
                printf("lapackf77_zunmbr returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            // query for workspace size
            lwork = -1;
            magma_zunmbr( vect[ivect], side[iside], trans[itran],
                          m, n, k,
                          A, lda, tau, R, ldc, work, lwork, &info );
            if (info != 0) {
                printf("magma_zunmbr (lwork query) returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            lwork = (magma_int_t) MAGMA_Z_REAL( work[0] );
            if ( lwork < 0 || lwork > lwork_max ) {
                printf("Warning: optimal lwork %d > allocated lwork_max %d\n", (int) lwork, (int) lwork_max );
                lwork = lwork_max;
            }
            
            gpu_time = magma_wtime();
            magma_zunmbr( vect[ivect], side[iside], trans[itran],
                          m, n, k,
                          A, lda, tau, R, ldc, work, lwork, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0) {
                printf("magma_zunmbr returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            /* =====================================================================
               compute relative error |QC_magma - QC_lapack| / |QC_lapack|
               =================================================================== */
            size = ldc*n;
            blasf77_zaxpy( &size, &c_neg_one, C, &ione, R, &ione );
            Cnorm = lapackf77_zlange( "Fro", &m, &n, C, &ldc, dwork );
            error = lapackf77_zlange( "Fro", &m, &n, R, &ldc, dwork ) / (magma_dsqrt(m*n) * Cnorm);
            
            printf( "%5d %5d %5d   %c   %4c   %5c   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e   %s\n",
                    (int) m, (int) n, (int) k,
                    lapacke_vect_const( vect[ivect] ),
                    lapacke_side_const( side[iside] ),
                    lapacke_trans_const( trans[itran] ),
                    cpu_perf, cpu_time, gpu_perf, gpu_time,
                    error, (error < tol ? "ok" : "failed") );
            status += ! (error < tol);
            
            TESTING_FREE_CPU( C );
            TESTING_FREE_CPU( R );
            TESTING_FREE_CPU( A );
            TESTING_FREE_CPU( work );
            TESTING_FREE_CPU( d );
            TESTING_FREE_CPU( e );
            TESTING_FREE_CPU( taup );
            TESTING_FREE_CPU( tauq );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
      }}}  // end ivect, iside, itran
      printf( "\n" );
    }
    
    opts.cleanup();
    TESTING_FINALIZE();
    return status;
}