/** @return true if either real(x) or imag(x) is INF. */ inline bool magma_d_isinf( double x ) { #ifdef COMPLEX return isinf( MAGMA_D_REAL( x )) || isinf( MAGMA_D_IMAG( x )); #else return isinf( x ); #endif }
void magma_dprint( magma_int_t m, magma_int_t n, const double *A, magma_int_t lda ) { if ( magma_is_devptr( A ) == 1 ) { fprintf( stderr, "ERROR: dprint called with device pointer.\n" ); exit(1); } double c_zero = MAGMA_D_ZERO; if ( m == 1 ) { printf( "[ " ); } else { printf( "[\n" ); } for( int i = 0; i < m; ++i ) { for( int j = 0; j < n; ++j ) { if ( MAGMA_D_EQUAL( *A(i,j), c_zero )) { printf( " 0. " ); } else { #if defined(PRECISION_z) || defined(PRECISION_c) printf( " %8.4f+%8.4fi", MAGMA_D_REAL( *A(i,j) ), MAGMA_D_IMAG( *A(i,j) )); #else printf( " %8.4f", MAGMA_D_REAL( *A(i,j) )); #endif } } if ( m > 1 ) { printf( "\n" ); } else { printf( " " ); } } printf( "];\n" ); }
extern "C" magma_int_t magma_dtfqmr_unrolled( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_TFQMR; solver_par->numiter = 0; solver_par->spmv_count = 0; solver_par->spmv_count = 0; // local variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE; // solver variables double nom0, r0, res, nomb; double rho = c_one, rho_l = c_one, eta = c_zero , c = c_zero , theta = c_zero , tau = c_zero, alpha = c_one, beta = c_zero, sigma = c_zero; magma_int_t dofs = A.num_rows* b.num_cols; // GPU workspace magma_d_matrix r={Magma_CSR}, r_tld={Magma_CSR}, d={Magma_CSR}, w={Magma_CSR}, v={Magma_CSR}, u_mp1={Magma_CSR}, u_m={Magma_CSR}, Au={Magma_CSR}, Ad={Magma_CSR}, Au_new={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &u_mp1,Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_dvinit( &r_tld,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &u_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_dvinit( &Ad, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &Au_new, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &Au, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_dcopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, w.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, u_mp1.dval, 1, queue ); CHECK( magma_d_spmv( c_one, A, u_mp1, c_zero, v, queue )); // v = A u magma_dcopy( dofs, v.dval, 1, Au.dval, 1, queue ); nomb = magma_dnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } tau = magma_dsqrt( magma_ddot( dofs, r.dval, 1, r_tld.dval, 1, queue ) ); rho = magma_ddot( dofs, r.dval, 1, r_tld.dval, 1, queue ); rho_l = rho; //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; // do this every iteration as unrolled alpha = rho / magma_ddot( dofs, v.dval, 1, r_tld.dval, 1, queue ); sigma = theta * theta / alpha * eta; magma_daxpy( dofs, -alpha, v.dval, 1, u_mp1.dval, 1, queue ); // u_mp1 = u_mp_1 - alpha*v; magma_daxpy( dofs, -alpha, Au.dval, 1, w.dval, 1, queue ); // w = w - alpha*Au; magma_dscal( dofs, sigma, d.dval, 1, queue ); magma_daxpy( dofs, c_one, u_mp1.dval, 1, d.dval, 1, queue ); // d = u_mp1 + sigma*d; //magma_dscal( dofs, sigma, Ad.dval, 1, queue ); //magma_daxpy( dofs, c_one, Au.dval, 1, Ad.dval, 1, queue ); // Ad = Au + sigma*Ad; theta = magma_dsqrt( magma_ddot(dofs, w.dval, 1, w.dval, 1, queue ) ) / tau; c = c_one / magma_dsqrt( c_one + theta*theta ); tau = tau * theta *c; eta = c * c * alpha; sigma = theta * theta / alpha * eta; printf("sigma: %f+%fi\n", MAGMA_D_REAL(sigma), MAGMA_D_IMAG(sigma) ); CHECK( magma_d_spmv( c_one, A, d, c_zero, Ad, queue )); // Au_new = A u_mp1 solver_par->spmv_count++; magma_daxpy( dofs, eta, d.dval, 1, x->dval, 1, queue ); // x = x + eta * d magma_daxpy( dofs, -eta, Ad.dval, 1, r.dval, 1, queue ); // r = r - eta * Ad // here starts the second part of the loop ################################# magma_daxpy( dofs, -alpha, Au.dval, 1, w.dval, 1, queue ); // w = w - alpha*Au; magma_dscal( dofs, sigma, d.dval, 1, queue ); magma_daxpy( dofs, c_one, u_mp1.dval, 1, d.dval, 1, queue ); // d = u_mp1 + sigma*d; magma_dscal( dofs, sigma, Ad.dval, 1, queue ); magma_daxpy( dofs, c_one, Au.dval, 1, Ad.dval, 1, queue ); // Ad = Au + sigma*Ad; theta = magma_dsqrt( magma_ddot(dofs, w.dval, 1, w.dval, 1, queue ) ) / tau; c = c_one / magma_dsqrt( c_one + theta*theta ); tau = tau * theta *c; eta = c * c * alpha; magma_daxpy( dofs, eta, d.dval, 1, x->dval, 1, queue ); // x = x + eta * d magma_daxpy( dofs, -eta, Ad.dval, 1, r.dval, 1, queue ); // r = r - eta * Ad res = magma_dnrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } // do this every loop as unrolled rho_l = rho; rho = magma_ddot( dofs, w.dval, 1, r_tld.dval, 1, queue ); beta = rho / rho_l; magma_dscal( dofs, beta, u_mp1.dval, 1, queue ); magma_daxpy( dofs, c_one, w.dval, 1, u_mp1.dval, 1, queue ); // u_mp1 = w + beta*u_mp1; CHECK( magma_d_spmv( c_one, A, u_mp1, c_zero, Au_new, queue )); // Au_new = A u_mp1 solver_par->spmv_count++; // do this every loop as unrolled magma_dscal( dofs, beta*beta, v.dval, 1, queue ); magma_daxpy( dofs, beta, Au.dval, 1, v.dval, 1, queue ); magma_daxpy( dofs, c_one, Au_new.dval, 1, v.dval, 1, queue ); // v = Au_new + beta*(Au+beta*v); magma_dcopy( dofs, Au_new.dval, 1, Au.dval, 1, queue ); } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&r_tld, queue ); magma_dmfree(&d, queue ); magma_dmfree(&w, queue ); magma_dmfree(&v, queue ); magma_dmfree(&u_m, queue ); magma_dmfree(&u_mp1, queue ); magma_dmfree(&d, queue ); magma_dmfree(&Au, queue ); magma_dmfree(&Au_new, queue ); magma_dmfree(&Ad, queue ); solver_par->info = info; return info; } /* magma_dfqmr_unrolled */
magma_int_t magma_dprint_csr_mtx( magma_int_t n_row, magma_int_t n_col, magma_int_t nnz, double **val, magma_index_t **row, magma_index_t **col, magma_order_t MajorType, magma_queue_t queue ) { magma_int_t info = 0; if ( MajorType == MagmaColMajor ) { // to obtain ColMajor output we transpose the matrix // and flip the row and col pointer in the output double *new_val=NULL; magma_index_t *new_row; magma_index_t *new_col; magma_int_t new_n_row; magma_int_t new_n_col; magma_int_t new_nnz; CHECK( d_transpose_csr( n_row, n_col, nnz, *val, *row, *col, &new_n_row, &new_n_col, &new_nnz, &new_val, &new_row, &new_col, queue) ); #define REAL #ifdef COMPLEX // real case printf( "%%%%MatrixMarket matrix coordinate real general\n" ); printf( "%d %d %d\n", int(new_n_col), int(new_n_row), int(new_nnz)); // TODO what's the difference between i (or i+1) and rowindex? magma_index_t i=0, j=0, rowindex=1; for(i=0; i < n_col; i++) { magma_index_t rowtemp1 = (new_row)[i]; magma_index_t rowtemp2 = (new_row)[i+1]; for(j=0; j < rowtemp2 - rowtemp1; j++) { printf( "%d %d %.6e %.6e\n", ((new_col)[rowtemp1+j]+1), rowindex, MAGMA_D_REAL((new_val)[rowtemp1+j]), MAGMA_D_IMAG((new_val)[rowtemp1+j]) ); } rowindex++; } #else // real case printf( "%%%%MatrixMarket matrix coordinate real general\n" ); printf( "%d %d %d\n", int(new_n_col), int(new_n_row), int(new_nnz)); // TODO what's the difference between i (or i+1) and rowindex? magma_index_t i=0, j=0, rowindex=1; for(i=0; i < n_col; i++) { magma_index_t rowtemp1 = (new_row)[i]; magma_index_t rowtemp2 = (new_row)[i+1]; for(j=0; j < rowtemp2 - rowtemp1; j++) { printf( "%d %d %.6e\n", ((new_col)[rowtemp1+j]+1), rowindex, MAGMA_D_REAL((new_val)[rowtemp1+j]) ); } rowindex++; } #endif } else { #define REAL #ifdef COMPLEX // real case printf( "%%%%MatrixMarket matrix coordinate real general\n" ); printf( "%d %d %d\n", int(n_col), int(n_row), int(nnz)); // TODO what's the difference between i (or i+1) and rowindex? magma_index_t i=0, j=0, rowindex=1; for(i=0; i < n_col; i++) { magma_index_t rowtemp1 = (*row)[i]; magma_index_t rowtemp2 = (*row)[i+1]; for(j=0; j < rowtemp2 - rowtemp1; j++) { printf( "%d %d %.6e %.6e\n", rowindex, ((*col)[rowtemp1+j]+1), MAGMA_D_REAL((*val)[rowtemp1+j]), MAGMA_D_IMAG((*val)[rowtemp1+j]) ); } rowindex++; } #else // real case printf( "%%%%MatrixMarket matrix coordinate real general\n" ); printf( "%d %d %d\n", int(n_col), int(n_row), int(nnz)); // TODO what's the difference between i (or i+1) and rowindex? magma_index_t i=0, j=0, rowindex=1; for(i=0; i < n_col; i++) { magma_index_t rowtemp1 = (*row)[i]; magma_index_t rowtemp2 = (*row)[i+1]; for(j=0; j < rowtemp2 - rowtemp1; j++) { printf( "%d %d %.6e\n", rowindex, ((*col)[rowtemp1+j]+1), MAGMA_D_REAL((*val)[rowtemp1+j]) ); } rowindex++; } #endif } cleanup: return info; }
magma_int_t magma_dwrite_csr_mtx( magma_d_matrix A, magma_order_t MajorType, const char *filename, magma_queue_t queue ) { magma_int_t info = 0; FILE *fp; magma_d_matrix B = {Magma_CSR}; if ( MajorType == MagmaColMajor ) { // to obtain ColMajor output we transpose the matrix // and flip the row and col pointer in the output CHECK( magma_d_cucsrtranspose( A, &B, queue )); // TODO avoid duplicating this code below. printf("%% Writing sparse matrix to file (%s):", filename); fflush(stdout); fp = fopen(filename, "w"); if ( fp == NULL ){ printf("\n%% error writing matrix: file exists or missing write permission\n"); info = -1; goto cleanup; } #define REAL #ifdef COMPLEX // real case fprintf( fp, "%%%%MatrixMarket matrix coordinate real general\n" ); fprintf( fp, "%d %d %d\n", int(B.num_cols), int(B.num_rows), int(B.nnz)); // TODO what's the difference between i (or i+1) and rowindex? magma_index_t i=0, j=0, rowindex=1; for(i=0; i < B.num_cols; i++) { magma_index_t rowtemp1 = B.row[i]; magma_index_t rowtemp2 = B.row[i+1]; for(j=0; j < rowtemp2 - rowtemp1; j++) { fprintf( fp, "%d %d %.16g %.16g\n", ((B.col)[rowtemp1+j]+1), rowindex, MAGMA_D_REAL((B.val)[rowtemp1+j]), MAGMA_D_IMAG((B.val)[rowtemp1+j]) ); } rowindex++; } #else // real case fprintf( fp, "%%%%MatrixMarket matrix coordinate real general\n" ); fprintf( fp, "%d %d %d\n", int(B.num_cols), int(B.num_rows), int(B.nnz)); // TODO what's the difference between i (or i+1) and rowindex? magma_index_t i=0, j=0, rowindex=1; for(i=0; i < B.num_cols; i++) { magma_index_t rowtemp1 = B.row[i]; magma_index_t rowtemp2 = B.row[i+1]; for(j=0; j < rowtemp2 - rowtemp1; j++) { fprintf( fp, "%d %d %.16g\n", ((B.col)[rowtemp1+j]+1), rowindex, MAGMA_D_REAL((B.val)[rowtemp1+j]) ); } rowindex++; } #endif if (fclose(fp) != 0) printf("\n%% error: writing matrix failed\n"); else printf(" done\n"); } else { printf("%% Writing sparse matrix to file (%s):", filename); fflush(stdout); fp = fopen (filename, "w"); if ( fp == NULL ){ printf("\n%% error writing matrix: file exists or missing write permission\n"); info = -1; goto cleanup; } #define REAL #ifdef COMPLEX // real case fprintf( fp, "%%%%MatrixMarket matrix coordinate real general\n" ); fprintf( fp, "%d %d %d\n", int(A.num_cols), int(A.num_rows), int(A.nnz)); // TODO what's the difference between i (or i+1) and rowindex? magma_index_t i=0, j=0, rowindex=1; for(i=0; i < A.num_cols; i++) { magma_index_t rowtemp1 = A.row[i]; magma_index_t rowtemp2 = A.row[i+1]; for(j=0; j < rowtemp2 - rowtemp1; j++) { fprintf( fp, "%d %d %.16g %.16g\n", ((A.col)[rowtemp1+j]+1), rowindex, MAGMA_D_REAL((A.val)[rowtemp1+j]), MAGMA_D_IMAG((A.val)[rowtemp1+j]) ); } rowindex++; } #else // real case fprintf( fp, "%%%%MatrixMarket matrix coordinate real general\n" ); fprintf( fp, "%d %d %d\n", int(A.num_cols), int(A.num_rows), int(A.nnz)); // TODO what's the difference between i (or i+1) and rowindex? magma_index_t i=0, j=0, rowindex=1; for(i=0; i < B.num_cols; i++) { magma_index_t rowtemp1 = A.row[i]; magma_index_t rowtemp2 = A.row[i+1]; for(j=0; j < rowtemp2 - rowtemp1; j++) { fprintf( fp, "%d %d %.16g\n", ((A.col)[rowtemp1+j]+1), rowindex, MAGMA_D_REAL((A.val)[rowtemp1+j])); } rowindex++; } #endif if (fclose(fp) != 0) printf("\n%% error: writing matrix failed\n"); else printf(" done\n"); } cleanup: return info; }
magma_int_t magma_dorderstatistics( double *val, magma_int_t length, magma_int_t k, magma_int_t r, double *element, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i, st; double tmp; if( r == 0 ){ for ( st = i = 0; i < length - 1; i++ ) { if ( magma_d_isnan_inf( val[i]) ) { printf("error: array contains %f + %fi.\n", MAGMA_D_REAL(val[i]), MAGMA_D_IMAG(val[i]) ); info = MAGMA_ERR_NAN; goto cleanup; } if ( MAGMA_D_ABS(val[i]) > MAGMA_D_ABS(val[length-1]) ){ continue; } SWAP(i, st); st++; } SWAP(length-1, st); if ( k == st ){ *element = val[st]; } else if ( st > k ) { CHECK( magma_dorderstatistics( val, st, k, r, element, queue )); } else { CHECK( magma_dorderstatistics( val+st, length-st, k-st, r, element, queue )); } } else { for ( st = i = 0; i < length - 1; i++ ) { if ( magma_d_isnan_inf( val[i]) ) { printf("error: array contains %f + %fi.\n", MAGMA_D_REAL(val[i]), MAGMA_D_IMAG(val[i]) ); info = MAGMA_ERR_NAN; goto cleanup; } if ( MAGMA_D_ABS(val[i]) < MAGMA_D_ABS(val[length-1]) ){ continue; } SWAP(i, st); st++; } SWAP(length-1, st); if ( k == st ){ *element = val[st]; } else if ( st > k ) { CHECK( magma_dorderstatistics( val, st, k, r, element, queue )); } else { CHECK( magma_dorderstatistics( val+st, length-st, k-st, r, element, queue )); } } cleanup: return info; }