示例#1
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zprint
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magmaDoubleComplex *hA, *dA;
    //magma_int_t ione     = 1;
    //magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t M, N, lda, ldda;  //size
    magma_int_t status = 0;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );

    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            M     = opts.msize[itest];
            N     = opts.nsize[itest];
            lda   = M;
            ldda  = ((M + 31)/32)*32;
            //size  = lda*N;

            /* Allocate host memory for the matrix */
            TESTING_MALLOC_CPU( hA, magmaDoubleComplex, lda *N );
            TESTING_MALLOC_DEV( dA, magmaDoubleComplex, ldda*N );
        
            //lapackf77_zlarnv( &ione, ISEED, &size, hA );
            for( int j = 0; j < N; ++j ) {
                for( int i = 0; i < M; ++i ) {
                    hA[i + j*lda] = MAGMA_Z_MAKE( i + j*0.01, 0. );
                }
            }
            magma_zsetmatrix( M, N, hA, lda, dA, ldda );
            
            printf( "A=" );
            magma_zprint( M, N, hA, lda );
            
            printf( "dA=" );
            magma_zprint_gpu( M, N, dA, ldda );
            
            TESTING_FREE_CPU( hA );
            TESTING_FREE_DEV( dA );
        }
    }

    TESTING_FINALIZE();
    return status;
}
int main( int argc, char** argv) 
{
    #define hA(i,j) (hA + (i) + (j)*lda)
    
    TESTING_CUDA_INIT();

    cuDoubleComplex c_zero = MAGMA_Z_ZERO;
    cuDoubleComplex c_one  = MAGMA_Z_ONE;
    
    cuDoubleComplex *hA, *hR, *dA;
    //real_Double_t   gpu_time, gpu_perf;

    //int ione     = 1;
    //int ISEED[4] = {0, 0, 0, 1};
    
    int nsize[] = { 32, 64, 96, 256, 100, 200, 512 };
    int ntest = sizeof(nsize) / sizeof(int);
    int n   = nsize[ntest-1];
    int lda = ((n + 31)/32)*32;
    int ntile, nb;
    
    TESTING_MALLOC   ( hA, cuDoubleComplex, lda*n );
    TESTING_MALLOC   ( hR, cuDoubleComplex, lda*n );
    TESTING_DEVALLOC ( dA, cuDoubleComplex, lda*n );
    
    for( int t = 0; t < ntest; ++t ) {
        n = nsize[t];
        lda = ((n + 31)/32)*32;
        
        // initialize matrices; entries are (i.j) for A
        double nf = 100.;
        for( int j = 0; j < n; ++j ) {
            // upper
            for( int i = 0; i < j; ++i ) {
                *hA(i,j) = MAGMA_Z_MAKE( (i + j/nf)/nf, 0. );
            }
            // lower
            for( int i = j; i < n; ++i ) {
                *hA(i,j) = MAGMA_Z_MAKE( i + j/nf, 0. );
            }
        }
        printf( "A%d = ", n );
        magma_zprint( n, n, hA, lda );
        
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize( MagmaLower, n, dA, lda );
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d = ", n );
        magma_zprint( n, n, hR, lda );
        
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize( MagmaUpper, n, dA, lda );
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "U%d = ", n );
        magma_zprint( n, n, hR, lda );
        
        // -----
        //lapackf77_zlaset( "u", &n, &n, &c_zero, &c_one, hA, &lda );
        
        nb = 64;
        ntile = n / nb;
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d = ", n, nb );
        magma_zprint( n, n, hR, lda );
        
        nb = 32;
        ntile = n / nb;
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d = ", n, nb );
        magma_zprint( n, n, hR, lda );
        
        ntile = (n - nb < 0 ? 0 : (n - nb) / (2*nb) + 1);
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, 2*nb, nb );
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d_2m = ", n, nb );
        magma_zprint( n, n, hR, lda );
        
        nb = 25;
        ntile = n / nb;
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d = ", n, nb );
        magma_zprint( n, n, hR, lda );
        
        nb = 25;
        ntile = (n - nb < 0 ? 0 : (n - nb) / (3*nb) + 1);
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, 3*nb );
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d_3n = ", n, nb );
        magma_zprint( n, n, hR, lda );
        
        nb = 100;
        ntile = n / nb;
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
        magmablas_zsymmetrize( MagmaLower, n%nb, &dA[ ntile*nb*(1+lda) ], lda );  // last partial block
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d = ", n, nb );
        magma_zprint( n, n, hR, lda );
        
        // -----
        nb = 64;
        ntile = n / nb;
        magma_zsetmatrix( n, n, hA, lda, dA, lda );
        magmablas_zsymmetrize_tiles( MagmaUpper, nb, dA, lda, ntile, nb, nb );
        magma_zgetmatrix( n, n, dA, lda, hR, lda );
        printf( "U%d_%d = ", n, nb );
        magma_zprint( n, n, hR, lda );
    }
    
    TESTING_FREE( hA );
    TESTING_FREE( hR );
    TESTING_DEVFREE( dA );
    
    /* Shutdown */
    TESTING_CUDA_FINALIZE();
    return 0;
}
示例#3
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zlaset
   Code is very similar to testing_zlacpy.cpp
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t    gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
    double           error, work[1];
    magmaDoubleComplex  c_neg_one = MAGMA_Z_NEG_ONE;
    magmaDoubleComplex *h_A, *h_R;
    magmaDoubleComplex_ptr d_A;
    magmaDoubleComplex offdiag, diag;
    magma_int_t M, N, size, lda, ldda;
    magma_int_t ione     = 1;
    magma_int_t status = 0;
    
    magma_opts opts;
    opts.parse_opts( argc, argv );

    magma_uplo_t uplo[] = { MagmaLower, MagmaUpper, MagmaFull };

    printf("%% uplo    M     N    offdiag    diag    CPU GByte/s (ms)    GPU GByte/s (ms)   check\n");
    printf("%%===================================================================================\n");
    for( int iuplo = 0; iuplo < 3; ++iuplo ) {
      for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
          for( int ival = 0; ival < 4; ++ival ) {
            // test combinations of zero & non-zero:
            // ival  offdiag  diag
            // 0     0        0
            // 1     0        3.14
            // 2     1.23     0
            // 3     1.23     3.14
            offdiag = MAGMA_Z_MAKE( 1.2345, 6.7890 ) * (ival / 2);
            diag    = MAGMA_Z_MAKE( 3.1415, 2.7183 ) * (ival % 2);
            
            M = opts.msize[itest];
            N = opts.nsize[itest];
            //M += 2;  // space for insets
            //N += 2;
            lda    = M;
            ldda   = magma_roundup( M, opts.align );
            size   = lda*N;
            if ( uplo[iuplo] == MagmaLower ) {
                // save lower trapezoid (with diagonal)
                if ( M > N ) {
                    gbytes = sizeof(magmaDoubleComplex) * (1.*M*N - 0.5*N*(N-1)) / 1e9;
                } else {
                    gbytes = sizeof(magmaDoubleComplex) * 0.5*M*(M+1) / 1e9;
                }
            }
            else if ( uplo[iuplo] == MagmaUpper ) {
                // save upper trapezoid (with diagonal)
                if ( N > M ) {
                    gbytes = sizeof(magmaDoubleComplex) * (1.*M*N - 0.5*M*(M-1)) / 1e9;
                } else {
                    gbytes = sizeof(magmaDoubleComplex) * 0.5*N*(N+1) / 1e9;
                }
            }
            else {
                // save entire matrix
                gbytes = sizeof(magmaDoubleComplex) * 1.*M*N / 1e9;
            }
    
            TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, size   );
            TESTING_MALLOC_CPU( h_R, magmaDoubleComplex, size   );
            
            TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*N );
            
            /* Initialize the matrix */
            for( int j = 0; j < N; ++j ) {
                for( int i = 0; i < M; ++i ) {
                    h_A[i + j*lda] = MAGMA_Z_MAKE( i + j/10000., j );
                }
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            magma_zsetmatrix( M, N, h_A, lda, d_A, ldda );
            
            magmablasSetKernelStream( opts.queue );
            gpu_time = magma_sync_wtime( opts.queue );
            //magmablas_zlaset( uplo[iuplo], M-2, N-2, offdiag, diag, d_A+1+ldda, ldda );  // inset by 1 row & col
            magmablas_zlaset( uplo[iuplo], M, N, offdiag, diag, d_A, ldda );
            gpu_time = magma_sync_wtime( opts.queue ) - gpu_time;
            gpu_perf = gbytes / gpu_time;
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            cpu_time = magma_wtime();
            //magma_int_t M2 = M-2;  // inset by 1 row & col
            //magma_int_t N2 = N-2;
            //lapackf77_zlaset( lapack_uplo_const( uplo[iuplo] ), &M2, &N2, &offdiag, &diag, h_A+1+lda, &lda );
            lapackf77_zlaset( lapack_uplo_const( uplo[iuplo] ), &M, &N, &offdiag, &diag, h_A, &lda );
            cpu_time = magma_wtime() - cpu_time;
            cpu_perf = gbytes / cpu_time;
            
            if ( opts.verbose ) {
                printf( "A= " );  magma_zprint(     M, N, h_A, lda );
                printf( "dA=" );  magma_zprint_gpu( M, N, d_A, ldda );
            }
            
            /* =====================================================================
               Check the result
               =================================================================== */
            magma_zgetmatrix( M, N, d_A, ldda, h_R, lda );
            
            blasf77_zaxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione);
            error = lapackf77_zlange("f", &M, &N, h_R, &lda, work);

            bool okay = (error == 0);
            status += ! okay;
            printf("%5s %5d %5d  %9.4f  %6.4f   %7.2f (%7.2f)   %7.2f (%7.2f)   %s\n",
                   lapack_uplo_const( uplo[iuplo] ), (int) M, (int) N,
                   real(offdiag), real(diag),
                   cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
                   (okay ? "ok" : "failed") );
            
            TESTING_FREE_CPU( h_A );
            TESTING_FREE_CPU( h_R );
            
            TESTING_FREE_DEV( d_A );
            fflush( stdout );
          }
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
      }
      printf( "\n" );
    }

    opts.cleanup();
    TESTING_FINALIZE();
    return status;
}