Beispiel #1
0
void magma_cprint_gpu(
    magma_int_t m, magma_int_t n,
    const magmaFloatComplex *dA, magma_int_t ldda )
{
    magma_int_t info = 0;
    if ( m < 0 )
        info = -1;
    else if ( n < 0 )
        info = -2;
    else if ( ldda < max(1,m) )
        info = -4;
    
    if (info != 0) {
        magma_xerbla( __func__, -(info) );
        return;  //info;
    }
    
    magma_int_t lda = m;
    magmaFloatComplex* A;
    magma_cmalloc_cpu( &A, lda*n );
    magma_cgetmatrix( m, n, dA, ldda, A, lda );
    
    magma_cprint( m, n, A, lda );
    
    magma_free_cpu( A );
}
Beispiel #2
0
void magma_cprint_gpu( magma_int_t m, magma_int_t n, const magmaFloatComplex *dA, magma_int_t ldda )
{
    if ( magma_is_devptr( dA ) == 0 ) {
        fprintf( stderr, "ERROR: cprint_gpu called with host pointer.\n" );
        exit(1);
    }
    
    magma_int_t lda = m;
    magmaFloatComplex* A;
    magma_cmalloc_cpu( &A, lda*n );
    magma_cgetmatrix( m, n, dA, ldda, A, lda );
    magma_cprint( m, n, A, lda );
    magma_free_cpu( A );
}
Beispiel #3
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing cprint
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magmaFloatComplex *hA, *dA;
    //magma_int_t ione     = 1;
    //magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t M, N, lda, ldda;  //size
    magma_int_t status = 0;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );

    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            M     = opts.msize[itest];
            N     = opts.nsize[itest];
            lda   = M;
            ldda  = ((M + 31)/32)*32;
            //size  = lda*N;

            /* Allocate host memory for the matrix */
            TESTING_MALLOC_CPU( hA, magmaFloatComplex, lda *N );
            TESTING_MALLOC_DEV( dA, magmaFloatComplex, ldda*N );
        
            //lapackf77_clarnv( &ione, ISEED, &size, hA );
            for( int j = 0; j < N; ++j ) {
                for( int i = 0; i < M; ++i ) {
                    hA[i + j*lda] = MAGMA_C_MAKE( i + j*0.01, 0. );
                }
            }
            magma_csetmatrix( M, N, hA, lda, dA, ldda );
            
            printf( "A=" );
            magma_cprint( M, N, hA, lda );
            
            printf( "dA=" );
            magma_cprint_gpu( M, N, dA, ldda );
            
            TESTING_FREE_CPU( hA );
            TESTING_FREE_DEV( dA );
        }
    }

    TESTING_FINALIZE();
    return status;
}
int main( int argc, char** argv) 
{
    #define hA(i,j) (hA + (i) + (j)*lda)
    
    TESTING_CUDA_INIT();

    cuFloatComplex c_zero = MAGMA_C_ZERO;
    cuFloatComplex c_one  = MAGMA_C_ONE;
    
    cuFloatComplex *hA, *hR, *dA;
    //real_Double_t   gpu_time, gpu_perf;

    //int ione     = 1;
    //int ISEED[4] = {0, 0, 0, 1};
    
    int nsize[] = { 32, 64, 96, 256, 100, 200, 512 };
    int ntest = sizeof(nsize) / sizeof(int);
    int n   = nsize[ntest-1];
    int lda = ((n + 31)/32)*32;
    int ntile, nb;
    
    TESTING_MALLOC   ( hA, cuFloatComplex, lda*n );
    TESTING_MALLOC   ( hR, cuFloatComplex, lda*n );
    TESTING_DEVALLOC ( dA, cuFloatComplex, lda*n );
    
    for( int t = 0; t < ntest; ++t ) {
        n = nsize[t];
        lda = ((n + 31)/32)*32;
        
        // initialize matrices; entries are (i.j) for A
        float nf = 100.;
        for( int j = 0; j < n; ++j ) {
            // upper
            for( int i = 0; i < j; ++i ) {
                *hA(i,j) = MAGMA_C_MAKE( (i + j/nf)/nf, 0. );
            }
            // lower
            for( int i = j; i < n; ++i ) {
                *hA(i,j) = MAGMA_C_MAKE( i + j/nf, 0. );
            }
        }
        printf( "A%d = ", n );
        magma_cprint( n, n, hA, lda );
        
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize( MagmaLower, n, dA, lda );
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d = ", n );
        magma_cprint( n, n, hR, lda );
        
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize( MagmaUpper, n, dA, lda );
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "U%d = ", n );
        magma_cprint( n, n, hR, lda );
        
        // -----
        //lapackf77_claset( "u", &n, &n, &c_zero, &c_one, hA, &lda );
        
        nb = 64;
        ntile = n / nb;
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d = ", n, nb );
        magma_cprint( n, n, hR, lda );
        
        nb = 32;
        ntile = n / nb;
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d = ", n, nb );
        magma_cprint( n, n, hR, lda );
        
        ntile = (n - nb < 0 ? 0 : (n - nb) / (2*nb) + 1);
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, 2*nb, nb );
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d_2m = ", n, nb );
        magma_cprint( n, n, hR, lda );
        
        nb = 25;
        ntile = n / nb;
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d = ", n, nb );
        magma_cprint( n, n, hR, lda );
        
        nb = 25;
        ntile = (n - nb < 0 ? 0 : (n - nb) / (3*nb) + 1);
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, 3*nb );
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d_3n = ", n, nb );
        magma_cprint( n, n, hR, lda );
        
        nb = 100;
        ntile = n / nb;
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize_tiles( MagmaLower, nb, dA, lda, ntile, nb, nb );
        magmablas_csymmetrize( MagmaLower, n%nb, &dA[ ntile*nb*(1+lda) ], lda );  // last partial block
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "L%d_%d = ", n, nb );
        magma_cprint( n, n, hR, lda );
        
        // -----
        nb = 64;
        ntile = n / nb;
        magma_csetmatrix( n, n, hA, lda, dA, lda );
        magmablas_csymmetrize_tiles( MagmaUpper, nb, dA, lda, ntile, nb, nb );
        magma_cgetmatrix( n, n, dA, lda, hR, lda );
        printf( "U%d_%d = ", n, nb );
        magma_cprint( n, n, hR, lda );
    }
    
    TESTING_FREE( hA );
    TESTING_FREE( hR );
    TESTING_DEVFREE( dA );
    
    /* Shutdown */
    TESTING_CUDA_FINALIZE();
    return 0;
}
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing claswp
*/
int main( int argc, char** argv)
{
    /* Initialize */
    magma_queue_t  queue;
    magma_device_t device[ MagmaMaxGPUs ];
    int num = 0;
    magma_err_t err;
    magma_init();
    err = magma_get_devices( device, MagmaMaxGPUs, &num );
    if ( err != 0 || num < 1 ) {
        fprintf( stderr, "magma_get_devices failed: %d\n", err );
        exit(-1);
    }
    err = magma_queue_create( device[0], &queue );
    if ( err != 0 ) {
        fprintf( stderr, "magma_queue_create failed: %d\n", err );
        exit(-1);
    }

    magmaFloatComplex *h_A1, *h_A2, *h_A3, *h_AT;
    magmaFloatComplex_ptr d_A1;

    real_Double_t gpu_time, cpu_time1, cpu_time2;

    /* Matrix size */
    int M=0, N=0, n2, lda, ldat;
    int size[7] = {1000,2000,3000,4000,5000,6000,7000};
    int i, j;
    int ione     = 1;
    int ISEED[4] = {0,0,0,1};
    int *ipiv;

    int k1, k2, r, c, incx;

    if (argc != 1){
        for(i = 1; i<argc; i++){
            if (strcmp("-N", argv[i])==0)
                N = atoi(argv[++i]);
            if (strcmp("-M", argv[i])==0)
                M = atoi(argv[++i]);
        }
        if (M>0 && N>0)
            printf("  testing_claswp -M %d -N %d\n\n", M, N);
        else
            {
                printf("\nUsage: \n");
                printf("  testing_claswp -M %d -N %d\n\n", 1024, 1024);
                exit(1);
            }
    }
    else {
        printf("\nUsage: \n");
        printf("  testing_claswp -M %d -N %d\n\n", 1024, 1024);
        M = N = size[6];
    }

    lda = M;
    n2 = M*N;

    /* Allocate host memory for the matrix */
    TESTING_MALLOC_CPU( h_A1, magmaFloatComplex, n2 );
    TESTING_MALLOC_CPU( h_A2, magmaFloatComplex, n2 );
    TESTING_MALLOC_CPU( h_A3, magmaFloatComplex, n2 );
    TESTING_MALLOC_CPU( h_AT, magmaFloatComplex, n2 );
    
    TESTING_MALLOC_DEV( d_A1, magmaFloatComplex, n2 );

    ipiv = (int*)malloc(M * sizeof(int));
    if (ipiv == 0) {
        fprintf (stderr, "!!!! host memory allocation error (ipiv)\n");
    }
  
    printf("\n\n");
    printf("  M     N    CPU_BLAS (sec)  CPU_LAPACK (sec) GPU (sec)                      \n");
    printf("=============================================================================\n");
    for(i=0; i<7; i++) {
        if(argc == 1){
            M = N = size[i];
        }
        lda = M;
        ldat = N;
        n2 = M*N;
        
        /* Initialize the matrix */
        lapackf77_clarnv( &ione, ISEED, &n2, h_A1 );
        lapackf77_clacpy( MagmaUpperLowerStr, &M, &N, h_A1, &lda, h_A2, &lda );
        for(r=0;r<M;r++){
            for(c=0;c<N;c++){
                h_AT[c+r*ldat] = h_A1[r+c*lda];
            }
        }

        magma_csetmatrix( N, M, h_AT, 0, ldat, d_A1, 0, ldat, queue);

        for(j=0; j<M; j++) {
          ipiv[j] = (int)((rand()*1.*M) / (RAND_MAX * 1.)) + 1;
        }

        /*
         *  BLAS swap
         */
        /* Column Major */
        cpu_time1 = magma_wtime();
        for ( j=0; j<M; j++) {
            if ( j != (ipiv[j]-1)) {
                blasf77_cswap( &N, h_A1+j, &lda, h_A1+(ipiv[j]-1), &lda);
            }
        }
        cpu_time1 = magma_wtime() - cpu_time1;

        /*
         *  LAPACK laswp
         */
        cpu_time2 = magma_wtime();
        k1 = 1;
        k2 = M;
        incx = 1;
        lapackf77_claswp(&N, h_A2, &lda, &k1, &k2, ipiv, &incx);
        cpu_time2 = magma_wtime() - cpu_time2;
        
        /*
         *  GPU swap
         */
        /* Col swap on transpose matrix*/
        gpu_time = magma_wtime();
        magma_cpermute_long2(N, d_A1, 0, ldat, ipiv, M, 0, queue);
        gpu_time = magma_wtime() - gpu_time;
        
        /* Check Result */
        magma_cgetmatrix( N, M, d_A1, 0, ldat, h_AT, 0, ldat, queue);
        for(r=0;r<N;r++){
            for(c=0;c<M;c++){
                h_A3[c+r*lda] = h_AT[r+c*ldat];
            }
        }
        
        int check_bl, check_bg, check_lg;

        check_bl = diffMatrix( h_A1, h_A2, M, N, lda );
        check_bg = diffMatrix( h_A1, h_A3, M, N, lda );
        check_lg = diffMatrix( h_A2, h_A3, M, N, lda );
        
        printf("%5d %5d  %6.2f      %6.2f        %6.2f    %s    %s    %s\n",
                M, N, cpu_time1, cpu_time2, gpu_time,
               (check_bl == 0) ? "SUCCESS" : "FAILED",
               (check_bg == 0) ? "SUCCESS" : "FAILED",
               (check_lg == 0) ? "SUCCESS" : "FAILED");

        if(check_lg !=0){
            printf("lapack swap results:\n");
            magma_cprint(M, N, h_A1, lda);
            printf("gpu swap transpose matrix result:\n");
            magma_cprint(M, N, h_A3, lda);
        }

        if (argc != 1)
          break;
    }
    
    /* clean up */
    TESTING_FREE_CPU( ipiv );
    TESTING_FREE_CPU( h_A1 );
    TESTING_FREE_CPU( h_A2 );
    TESTING_FREE_CPU( h_A3 );
    TESTING_FREE_CPU( h_AT );
    TESTING_FREE_DEV( d_A1 );

    magma_queue_destroy( queue );
    magma_finalize();
}
Beispiel #6
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing claset
   Code is very similar to testing_clacpy.cpp
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t    gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
    float           error, work[1];
    magmaFloatComplex  c_neg_one = MAGMA_C_NEG_ONE;
    magmaFloatComplex *h_A, *h_R;
    magmaFloatComplex_ptr d_A;
    magmaFloatComplex offdiag, diag;
    magma_int_t M, N, size, lda, ldda;
    magma_int_t ione     = 1;
    magma_int_t status = 0;
    
    magma_opts opts;
    opts.parse_opts( argc, argv );

    magma_uplo_t uplo[] = { MagmaLower, MagmaUpper, MagmaFull };

    printf("%% uplo    M     N    offdiag    diag    CPU GByte/s (ms)    GPU GByte/s (ms)   check\n");
    printf("%%===================================================================================\n");
    for( int iuplo = 0; iuplo < 3; ++iuplo ) {
      for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
          for( int ival = 0; ival < 4; ++ival ) {
            // test combinations of zero & non-zero:
            // ival  offdiag  diag
            // 0     0        0
            // 1     0        3.14
            // 2     1.23     0
            // 3     1.23     3.14
            offdiag = MAGMA_C_MAKE( 1.2345, 6.7890 ) * (ival / 2);
            diag    = MAGMA_C_MAKE( 3.1415, 2.7183 ) * (ival % 2);
            
            M = opts.msize[itest];
            N = opts.nsize[itest];
            //M += 2;  // space for insets
            //N += 2;
            lda    = M;
            ldda   = magma_roundup( M, opts.align );
            size   = lda*N;
            if ( uplo[iuplo] == MagmaLower ) {
                // save lower trapezoid (with diagonal)
                if ( M > N ) {
                    gbytes = sizeof(magmaFloatComplex) * (1.*M*N - 0.5*N*(N-1)) / 1e9;
                } else {
                    gbytes = sizeof(magmaFloatComplex) * 0.5*M*(M+1) / 1e9;
                }
            }
            else if ( uplo[iuplo] == MagmaUpper ) {
                // save upper trapezoid (with diagonal)
                if ( N > M ) {
                    gbytes = sizeof(magmaFloatComplex) * (1.*M*N - 0.5*M*(M-1)) / 1e9;
                } else {
                    gbytes = sizeof(magmaFloatComplex) * 0.5*N*(N+1) / 1e9;
                }
            }
            else {
                // save entire matrix
                gbytes = sizeof(magmaFloatComplex) * 1.*M*N / 1e9;
            }
    
            TESTING_MALLOC_CPU( h_A, magmaFloatComplex, size   );
            TESTING_MALLOC_CPU( h_R, magmaFloatComplex, size   );
            
            TESTING_MALLOC_DEV( d_A, magmaFloatComplex, ldda*N );
            
            /* Initialize the matrix */
            for( int j = 0; j < N; ++j ) {
                for( int i = 0; i < M; ++i ) {
                    h_A[i + j*lda] = MAGMA_C_MAKE( i + j/10000., j );
                }
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            magma_csetmatrix( M, N, h_A, lda, d_A, ldda );
            
            magmablasSetKernelStream( opts.queue );
            gpu_time = magma_sync_wtime( opts.queue );
            //magmablas_claset( uplo[iuplo], M-2, N-2, offdiag, diag, d_A+1+ldda, ldda );  // inset by 1 row & col
            magmablas_claset( uplo[iuplo], M, N, offdiag, diag, d_A, ldda );
            gpu_time = magma_sync_wtime( opts.queue ) - gpu_time;
            gpu_perf = gbytes / gpu_time;
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            cpu_time = magma_wtime();
            //magma_int_t M2 = M-2;  // inset by 1 row & col
            //magma_int_t N2 = N-2;
            //lapackf77_claset( lapack_uplo_const( uplo[iuplo] ), &M2, &N2, &offdiag, &diag, h_A+1+lda, &lda );
            lapackf77_claset( lapack_uplo_const( uplo[iuplo] ), &M, &N, &offdiag, &diag, h_A, &lda );
            cpu_time = magma_wtime() - cpu_time;
            cpu_perf = gbytes / cpu_time;
            
            if ( opts.verbose ) {
                printf( "A= " );  magma_cprint(     M, N, h_A, lda );
                printf( "dA=" );  magma_cprint_gpu( M, N, d_A, ldda );
            }
            
            /* =====================================================================
               Check the result
               =================================================================== */
            magma_cgetmatrix( M, N, d_A, ldda, h_R, lda );
            
            blasf77_caxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione);
            error = lapackf77_clange("f", &M, &N, h_R, &lda, work);

            bool okay = (error == 0);
            status += ! okay;
            printf("%5s %5d %5d  %9.4f  %6.4f   %7.2f (%7.2f)   %7.2f (%7.2f)   %s\n",
                   lapack_uplo_const( uplo[iuplo] ), (int) M, (int) N,
                   real(offdiag), real(diag),
                   cpu_perf, cpu_time*1000., gpu_perf, gpu_time*1000.,
                   (okay ? "ok" : "failed") );
            
            TESTING_FREE_CPU( h_A );
            TESTING_FREE_CPU( h_R );
            
            TESTING_FREE_DEV( d_A );
            fflush( stdout );
          }
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
      }
      printf( "\n" );
    }

    opts.cleanup();
    TESTING_FINALIZE();
    return status;
}