magma_int_t magma_cnan_inf_gpu( magma_uplo_t uplo, magma_int_t m, magma_int_t n, const magmaFloatComplex *dA, magma_int_t ldda, magma_int_t *cnt_nan, magma_int_t *cnt_inf ) { magma_int_t info = 0; if ( uplo != MagmaLower && uplo != MagmaUpper && uplo != MagmaFull ) info = -1; else if ( m < 0 ) info = -2; else if ( n < 0 ) info = -3; else if ( magma_is_devptr( dA ) == 0 ) info = -4; else if ( ldda < max(1,m) ) info = -5; if (info != 0) { magma_xerbla( __func__, -(info) ); return info; } magma_int_t lda = m; magmaFloatComplex* A; magma_cmalloc_cpu( &A, lda*n ); magma_cgetmatrix( m, n, dA, ldda, A, lda ); magma_int_t cnt = magma_cnan_inf( uplo, m, n, A, lda, cnt_nan, cnt_inf ); magma_free_cpu( A ); return cnt; }
magma_int_t magma_cnan_inf_gpu( magma_uplo_t uplo, magma_int_t m, magma_int_t n, magmaFloatComplex_const_ptr dA, magma_int_t ldda, magma_int_t *cnt_nan, magma_int_t *cnt_inf ) { magma_int_t info = 0; if ( uplo != MagmaLower && uplo != MagmaUpper && uplo != MagmaFull ) info = -1; else if ( m < 0 ) info = -2; else if ( n < 0 ) info = -3; else if ( ldda < max(1,m) ) info = -5; if (info != 0) { magma_xerbla( __func__, -(info) ); return info; } magma_int_t lda = m; magmaFloatComplex* A; magma_cmalloc_cpu( &A, lda*n ); magma_queue_t queue; magma_device_t cdev; magma_getdevice( &cdev ); magma_queue_create( cdev, &queue ); magma_cgetmatrix( m, n, dA, ldda, A, lda, queue ); magma_queue_destroy( queue ); magma_int_t cnt = magma_cnan_inf( uplo, m, n, A, lda, cnt_nan, cnt_inf ); magma_free_cpu( A ); return cnt; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing znan_inf */ int main( int argc, char** argv) { TESTING_INIT(); #define hA(i,j) (hA + (i) + (j)*lda) magmaFloatComplex *hA, *dA; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t M, N, lda, ldda, size; magma_int_t *ii, *jj; magma_int_t i, j, cnt, tmp; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); magma_uplo_t uplo[] = { MagmaLower, MagmaUpper, MagmaFull }; printf("uplo M N CPU nan + inf GPU nan + inf actual nan + inf \n"); printf("===============================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iuplo = 0; iuplo < 3; ++iuplo ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[itest]; N = opts.nsize[itest]; lda = M; ldda = ((M + 31)/32)*32; size = lda*N; /* Allocate memory for the matrix */ TESTING_MALLOC_CPU( hA, magmaFloatComplex, lda *N ); TESTING_MALLOC_DEV( dA, magmaFloatComplex, ldda*N ); /* Initialize the matrix */ lapackf77_clarnv( &ione, ISEED, &size, hA ); // up to half of matrix is NAN, and // up to half of matrix is INF. magma_int_t cnt_nan = (magma_int_t)( (rand() / ((float)RAND_MAX)) * 0.5 * M*N ); magma_int_t cnt_inf = (magma_int_t)( (rand() / ((float)RAND_MAX)) * 0.5 * M*N ); magma_int_t total = cnt_nan + cnt_inf; assert( cnt_nan >= 0 ); assert( cnt_inf >= 0 ); assert( total <= M*N ); // fill in indices TESTING_MALLOC_CPU( ii, magma_int_t, size ); TESTING_MALLOC_CPU( jj, magma_int_t, size ); for( cnt=0; cnt < size; ++cnt ) { ii[cnt] = cnt % M; jj[cnt] = cnt / M; } // shuffle indices for( cnt=0; cnt < total; ++cnt ) { i = int( rand() / ((float)RAND_MAX) * size ); tmp=ii[cnt]; ii[cnt]=ii[i]; ii[i]=tmp; tmp=jj[cnt]; jj[cnt]=jj[i]; jj[i]=tmp; } // fill in NAN and INF // for uplo, count NAN and INF in triangular portion of A int c_nan=0; int c_inf=0; for( cnt=0; cnt < cnt_nan; ++cnt ) { i = ii[cnt]; j = jj[cnt]; *hA(i,j) = MAGMA_C_NAN; if ( uplo[iuplo] == MagmaLower && i >= j ) { c_nan++; } if ( uplo[iuplo] == MagmaUpper && i <= j ) { c_nan++; } } for( cnt=cnt_nan; cnt < cnt_nan + cnt_inf; ++cnt ) { i = ii[cnt]; j = jj[cnt]; *hA(i,j) = MAGMA_C_INF; if ( uplo[iuplo] == MagmaLower && i >= j ) { c_inf++; } if ( uplo[iuplo] == MagmaUpper && i <= j ) { c_inf++; } } if ( uplo[iuplo] == MagmaLower || uplo[iuplo] == MagmaUpper ) { cnt_nan = c_nan; cnt_inf = c_inf; total = cnt_nan + cnt_inf; } //printf( "nan %g + %gi\n", MAGMA_C_REAL( MAGMA_C_NAN ), MAGMA_C_REAL( MAGMA_C_NAN ) ); //printf( "inf %g + %gi\n", MAGMA_C_REAL( MAGMA_C_INF ), MAGMA_C_REAL( MAGMA_C_INF ) ); //magma_cprint( M, N, hA, lda ); magma_csetmatrix( M, N, hA, lda, dA, ldda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ magma_int_t c_cpu_nan=-1, c_cpu_inf=-1; magma_int_t c_gpu_nan=-1, c_gpu_inf=-1; magma_int_t c_cpu = magma_cnan_inf ( uplo[iuplo], M, N, hA, lda, &c_cpu_nan, &c_cpu_inf ); magma_int_t c_gpu = magma_cnan_inf_gpu( uplo[iuplo], M, N, dA, ldda, &c_gpu_nan, &c_gpu_inf ); magma_int_t c_cpu2 = magma_cnan_inf ( uplo[iuplo], M, N, hA, lda, NULL, NULL ); magma_int_t c_gpu2 = magma_cnan_inf_gpu( uplo[iuplo], M, N, dA, ldda, NULL, NULL ); /* ===================================================================== Check the result =================================================================== */ bool ok = ( c_cpu == c_gpu ) && ( c_cpu == c_cpu2 ) && ( c_gpu == c_gpu2 ) && ( c_cpu == c_cpu_nan + c_cpu_inf ) && ( c_gpu == c_gpu_nan + c_gpu_inf ) && ( c_cpu_nan == cnt_nan ) && ( c_cpu_inf == cnt_inf ) && ( c_gpu_nan == cnt_nan ) && ( c_gpu_inf == cnt_inf ); printf( "%4c %5d %5d %10d + %-10d %10d + %-10d %10d + %-10d %s\n", lapacke_uplo_const( uplo[iuplo] ), (int) M, (int) N, (int) c_cpu_nan, (int) c_cpu_inf, (int) c_gpu_nan, (int) c_gpu_inf, (int) cnt_nan, (int) cnt_inf, (ok ? "ok" : "failed")); status += ! ok; TESTING_FREE_CPU( hA ); TESTING_FREE_DEV( dA ); TESTING_FREE_CPU( ii ); TESTING_FREE_CPU( jj ); } } printf( "\n" ); } TESTING_FINALIZE(); return status; }