magma_int_t magma_dvspread( magma_d_matrix *x, const char * filename, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix A={Magma_CSR}, B={Magma_CSR}; magma_int_t entry=0; // char *vfilename[] = {"/mnt/sparse_matrices/mtx/rail_79841_B.mtx"}; CHECK( magma_d_csr_mtx( &A, filename, queue )); CHECK( magma_dmconvert( A, &B, Magma_CSR, Magma_DENSE, queue )); CHECK( magma_dvinit( x, Magma_CPU, A.num_cols, A.num_rows, MAGMA_D_ZERO, queue )); x->major = MagmaRowMajor; for(magma_int_t i=0; i<A.num_cols; i++) { for(magma_int_t j=0; j<A.num_rows; j++) { x->val[i*A.num_rows+j] = B.val[ i+j*A.num_cols ]; entry++; } } x->num_rows = A.num_rows; x->num_cols = A.num_cols; cleanup: magma_dmfree( &A, queue ); magma_dmfree( &B, queue ); return info; }
magma_int_t magma_dmLdiagadd( magma_d_matrix *L, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix LL={Magma_CSR}; if( L->row[1]==1 ){ // lower triangular with unit diagonal //printf("L lower triangular.\n"); LL.diagorder_type = Magma_UNITY; CHECK( magma_dmconvert( *L, &LL, Magma_CSR, Magma_CSRL, queue )); } else if( L->row[1]==0 ){ // strictly lower triangular //printf("L strictly lower triangular.\n"); CHECK( magma_dmtransfer( *L, &LL, Magma_CPU, Magma_CPU, queue )); magma_free_cpu( LL.col ); magma_free_cpu( LL.val ); LL.nnz = L->nnz+L->num_rows; CHECK( magma_dmalloc_cpu( &LL.val, LL.nnz )); CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz )); magma_int_t z=0; for( magma_int_t i=0; i<L->num_rows; i++){ LL.row[i] = z; for( magma_int_t j=L->row[i]; j<L->row[i+1]; j++){ LL.val[z] = L->val[j]; LL.col[z] = L->col[j]; z++; } // add unit diagonal LL.val[z] = MAGMA_D_MAKE(1.0, 0.0); LL.col[z] = i; z++; } LL.row[LL.num_rows] = z; LL.nnz = z; } else{ printf("error: L neither lower nor strictly lower triangular!\n"); } magma_dmfree( L, queue ); CHECK( magma_dmtransfer(LL, L, Magma_CPU, Magma_CPU, queue )); cleanup: if( info != 0 ){ magma_dmfree( L, queue ); } magma_dmfree( &LL, queue ); return info; }
extern "C" magma_int_t magma_dmshrink( magma_d_matrix A, magma_d_matrix *B, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, hB={Magma_CSR}, hBCSR={Magma_CSR}; if( A.num_rows<=A.num_cols){ if( A.memory_location == Magma_CPU && A.storage_type == Magma_CSR ){ CHECK( magma_dmconvert( A, B, Magma_CSR, Magma_CSR, queue )); for(magma_int_t i=0; i<A.nnz; i++){ if( B->col[i] >= A.num_rows ){ B->val[i] = MAGMA_D_ZERO; } } CHECK( magma_dmcsrcompressor( B, queue ) ); B->num_cols = B->num_rows; } else { CHECK( magma_dmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); CHECK( magma_dmconvert( hA, &hACSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_dmshrink( hACSR, &hBCSR, queue )); CHECK( magma_dmconvert( hBCSR, &hB, Magma_CSR, A.storage_type, queue )); CHECK( magma_dmtransfer( hB, B, Magma_CPU, A.memory_location, queue )); } } else { printf("%% error: A has too many rows: m > n.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } cleanup: magma_dmfree( &hA, queue ); magma_dmfree( &hB, queue ); magma_dmfree( &hACSR, queue ); magma_dmfree( &hBCSR, queue ); return info; }
extern "C" magma_int_t magma_dmdiagadd( magma_d_matrix *A, double add, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix hA={Magma_CSR}, CSRA={Magma_CSR}; if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSRCOO ) { for( magma_int_t z=0; z<A->nnz; z++ ) { if ( A->col[z]== A->rowidx[z] ) { // add some identity matrix A->val[z] = A->val[z] + add; } } } else { magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; CHECK( magma_dmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue )); CHECK( magma_dmconvert( hA, &CSRA, hA.storage_type, Magma_CSRCOO, queue )); CHECK( magma_dmdiagadd( &CSRA, add, queue )); magma_dmfree( &hA, queue ); magma_dmfree( A, queue ); CHECK( magma_dmconvert( CSRA, &hA, Magma_CSRCOO, A_storage, queue )); CHECK( magma_dmtransfer( hA, A, Magma_CPU, A_location, queue )); } cleanup: magma_dmfree( &hA, queue ); magma_dmfree( &CSRA, queue ); return info; }
magma_int_t magma_dvget_dev( magma_d_matrix v, magma_int_t *m, magma_int_t *n, magmaDouble_ptr *val, magma_queue_t queue ) { magma_int_t info =0; magma_d_matrix v_DEV={Magma_CSR}; if ( v.memory_location == Magma_DEV ) { *m = v.num_rows; *n = v.num_cols; *val = v.dval; } else { CHECK( magma_dmtransfer( v, &v_DEV, v.memory_location, Magma_DEV, queue )); CHECK( magma_dvget_dev( v_DEV, m, n, val, queue )); } cleanup: magma_dmfree( &v_DEV, queue ); return info; }
magma_int_t magma_dprint_matrix( magma_d_matrix A, magma_queue_t queue ) { magma_int_t info = 0; //************************************************************** #define REAL #ifdef COMPLEX #define magma_dprintval( tmp ) { \ if ( MAGMA_D_EQUAL( tmp, c_zero )) { \ printf( " 0. " ); \ } \ else { \ printf( " %8.4f+%8.4fi", \ MAGMA_D_REAL( tmp ), MAGMA_D_IMAG( tmp )); \ } \ } #else #define magma_dprintval( tmp ) { \ if ( MAGMA_D_EQUAL( tmp, c_zero )) { \ printf( " 0. " ); \ } \ else { \ printf( " %8.4f", MAGMA_D_REAL( tmp )); \ } \ } #endif //************************************************************** magma_index_t i, j, k; double c_zero = MAGMA_D_ZERO; magma_d_matrix C={Magma_CSR}; if ( A.memory_location == Magma_CPU ) { printf("visualizing matrix of size %d x %d with %d nonzeros:\n", int(A.num_rows), int(A.num_cols), int(A.nnz)); if ( A.storage_type == Magma_DENSE ) { for( i=0; i < (A.num_rows); i++ ) { for( j=0; j < A.num_cols; j++ ) { magma_dprintval( A.val[i*(A.num_cols)+j] ); } printf( "\n" ); } } else if( A.num_cols < 8 || A.num_rows < 8 ) { CHECK( magma_dmconvert( A, &C, A.storage_type, Magma_DENSE, queue )); CHECK( magma_dprint_matrix( C, queue )); } else if ( A.storage_type == Magma_CSR ) { // visualize only small matrices like dense if ( A.num_rows < 11 && A.num_cols < 11 ) { CHECK( magma_dmconvert( A, &C, A.storage_type, Magma_DENSE, queue )); CHECK( magma_dprint_matrix( C, queue )); magma_dmfree( &C, queue ); } // otherwise visualize only coners else { // 4 beginning and 4 last elements of first four rows for( i=0; i < 4; i++ ) { // upper left corner for( j=0; j < 4; j++ ) { double tmp = MAGMA_D_ZERO; magma_index_t rbound = min( A.row[i]+4, A.row[i+1]); magma_index_t lbound = max( A.row[i], A.row[i]); for( k=lbound; k < rbound; k++ ) { if ( A.col[k] == j ) { tmp = A.val[k]; } } magma_dprintval( tmp ); } if ( i == 0 ) { printf( " . . . " ); } else { printf( " " ); } // upper right corner for( j=A.num_cols-4; j < A.num_cols; j++ ) { double tmp = MAGMA_D_ZERO; magma_index_t rbound = min( A.row[i+1], A.row[i+1]); magma_index_t lbound = max( A.row[i+1]-4, A.row[i]); for( k=lbound; k < rbound; k++ ) { if ( A.col[k] == j ) { tmp = A.val[k]; } } magma_dprintval( tmp ); } printf( "\n"); } printf( " . . . .\n" " . . . .\n" " . . . .\n" " . . . .\n" ); for( i=A.num_rows-4; i < A.num_rows; i++ ) { // lower left corner for( j=0; j < 4; j++ ) { double tmp = MAGMA_D_ZERO; magma_index_t rbound = min( A.row[i]+4, A.row[i+1]); magma_index_t lbound = max( A.row[i], A.row[i]); for( k=lbound; k < rbound; k++ ) { if ( A.col[k] == j ) { tmp = A.val[k]; } } magma_dprintval( tmp ); } printf( " "); // lower right corner for( j=A.num_cols-4; j < A.num_cols; j++ ) { double tmp = MAGMA_D_ZERO; magma_index_t rbound = min( A.row[i+1], A.row[i+1]); magma_index_t lbound = max( A.row[i+1]-4, A.row[i]); for( k=lbound; k < rbound; k++ ) { if ( A.col[k] == j ) { tmp = A.val[k]; } } magma_dprintval( tmp ); } printf( "\n"); } } } else { CHECK( magma_dmconvert( A, &C, A.storage_type, Magma_CSR, queue )); CHECK( magma_dprint_matrix( C, queue )); } } else { //magma_d_matrix C={Magma_CSR}; CHECK( magma_dmtransfer( A, &C, A.memory_location, Magma_CPU, queue )); CHECK( magma_dprint_matrix( C, queue )); } cleanup: magma_dmfree( &C, queue ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_d_matrix A={Magma_CSR}, A2={Magma_CSR}, A3={Magma_CSR}, A4={Magma_CSR}, A5={Magma_CSR}; int i=1; TESTING_CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_dm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_d_csr_mtx( &A, argv[i], queue )); } printf("%% matrix info: %lld-by-%lld with %lld nonzeros\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); // filename for temporary matrix storage const char *filename = "testmatrix.mtx"; // write to file TESTING_CHECK( magma_dwrite_csrtomtx( A, filename, queue )); // read from file TESTING_CHECK( magma_d_csr_mtx( &A2, filename, queue )); // delete temporary matrix unlink( filename ); //visualize printf("A2:\n"); TESTING_CHECK( magma_dprint_matrix( A2, queue )); //visualize TESTING_CHECK( magma_dmconvert(A2, &A4, Magma_CSR, Magma_CSRL, queue )); printf("A4:\n"); TESTING_CHECK( magma_dprint_matrix( A4, queue )); TESTING_CHECK( magma_dmconvert(A4, &A5, Magma_CSR, Magma_ELL, queue )); printf("A5:\n"); TESTING_CHECK( magma_dprint_matrix( A5, queue )); // pass it to another application and back magma_int_t m, n; magma_index_t *row, *col; double *val=NULL; TESTING_CHECK( magma_dcsrget( A2, &m, &n, &row, &col, &val, queue )); TESTING_CHECK( magma_dcsrset( m, n, row, col, val, &A3, queue )); TESTING_CHECK( magma_dmdiff( A, A2, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester IO: ok\n"); else printf("%% tester IO: failed\n"); TESTING_CHECK( magma_dmdiff( A, A3, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester matrix interface: ok\n"); else printf("%% tester matrix interface: failed\n"); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&A4, queue ); magma_dmfree(&A5, queue ); i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }
extern "C" magma_int_t magma_didr( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_IDR; solver_par->numiter = 0; solver_par->spmv_count = 0; solver_par->init_res = 0.0; solver_par->final_res = 0.0; solver_par->iter_res = 0.0; solver_par->runtime = 0.0; // constants const double c_zero = MAGMA_D_ZERO; const double c_one = MAGMA_D_ONE; const double c_n_one = MAGMA_D_NEG_ONE; // internal user parameters const magma_int_t smoothing = 1; // 0 = disable, 1 = enable const double angle = 0.7; // [0-1] // local variables magma_int_t iseed[4] = {0, 0, 0, 1}; magma_int_t dof; magma_int_t s; magma_int_t distr; magma_int_t k, i, sk; magma_int_t innerflag; double residual; double nrm; double nrmb; double nrmr; double nrmt; double rho; double om; double tt; double tr; double gamma; double alpha; double mkk; double fk; // matrices and vectors magma_d_matrix dxs = {Magma_CSR}; magma_d_matrix dr = {Magma_CSR}, drs = {Magma_CSR}; magma_d_matrix dP = {Magma_CSR}, dP1 = {Magma_CSR}; magma_d_matrix dG = {Magma_CSR}; magma_d_matrix dU = {Magma_CSR}; magma_d_matrix dM = {Magma_CSR}; magma_d_matrix df = {Magma_CSR}; magma_d_matrix dt = {Magma_CSR}; magma_d_matrix dc = {Magma_CSR}; magma_d_matrix dv = {Magma_CSR}; magma_d_matrix dbeta = {Magma_CSR}, hbeta = {Magma_CSR}; // chronometry real_Double_t tempo1, tempo2; // initial s space // TODO: add option for 's' (shadow space number) // Hack: uses '--restart' option as the shadow space number. // This is not a good idea because the default value of restart option is used to detect // if the user provided a custom restart. This means that if the default restart value // is changed then the code will think it was the user (unless the default value is // also updated in the 'if' statement below. s = 1; if ( solver_par->restart != 50 ) { if ( solver_par->restart > A.num_cols ) { s = A.num_cols; } else { s = solver_par->restart; } } solver_par->restart = s; // set max iterations solver_par->maxiter = min( 2 * A.num_cols, solver_par->maxiter ); // check if matrix A is square if ( A.num_rows != A.num_cols ) { //printf("Matrix A is not square.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } // |b| nrmb = magma_dnrm2( b.num_rows, b.dval, 1, queue ); if ( nrmb == 0.0 ) { magma_dscal( x->num_rows, MAGMA_D_ZERO, x->dval, 1, queue ); info = MAGMA_SUCCESS; goto cleanup; } // r = b - A x CHECK( magma_dvinit( &dr, Magma_DEV, b.num_rows, 1, c_zero, queue )); CHECK( magma_dresidualvec( A, b, *x, &dr, &nrmr, queue )); // |r| solver_par->init_res = nrmr; solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nrmr; } // check if initial is guess good enough if ( nrmr <= solver_par->atol || nrmr/nrmb <= solver_par->rtol ) { info = MAGMA_SUCCESS; goto cleanup; } // P = randn(n, s) // P = ortho(P) //--------------------------------------- // P = 0.0 CHECK( magma_dvinit( &dP, Magma_CPU, A.num_cols, s, c_zero, queue )); // P = randn(n, s) distr = 3; // 1 = unif (0,1), 2 = unif (-1,1), 3 = normal (0,1) dof = dP.num_rows * dP.num_cols; lapackf77_dlarnv( &distr, iseed, &dof, dP.val ); // transfer P to device CHECK( magma_dmtransfer( dP, &dP1, Magma_CPU, Magma_DEV, queue )); magma_dmfree( &dP, queue ); // P = ortho(P1) if ( dP1.num_cols > 1 ) { // P = magma_dqr(P1), QR factorization CHECK( magma_dqr( dP1.num_rows, dP1.num_cols, dP1, dP1.ld, &dP, NULL, queue )); } else { // P = P1 / |P1| nrm = magma_dnrm2( dof, dP1.dval, 1, queue ); nrm = 1.0 / nrm; magma_dscal( dof, nrm, dP1.dval, 1, queue ); CHECK( magma_dmtransfer( dP1, &dP, Magma_DEV, Magma_DEV, queue )); } magma_dmfree( &dP1, queue ); //--------------------------------------- // allocate memory for the scalar products CHECK( magma_dvinit( &hbeta, Magma_CPU, s, 1, c_zero, queue )); CHECK( magma_dvinit( &dbeta, Magma_DEV, s, 1, c_zero, queue )); // smoothing enabled if ( smoothing > 0 ) { // set smoothing solution vector CHECK( magma_dmtransfer( *x, &dxs, Magma_DEV, Magma_DEV, queue )); // set smoothing residual vector CHECK( magma_dmtransfer( dr, &drs, Magma_DEV, Magma_DEV, queue )); } // G(n,s) = 0 CHECK( magma_dvinit( &dG, Magma_DEV, A.num_cols, s, c_zero, queue )); // U(n,s) = 0 CHECK( magma_dvinit( &dU, Magma_DEV, A.num_cols, s, c_zero, queue )); // M(s,s) = I CHECK( magma_dvinit( &dM, Magma_DEV, s, s, c_zero, queue )); magmablas_dlaset( MagmaFull, s, s, c_zero, c_one, dM.dval, s, queue ); // f = 0 CHECK( magma_dvinit( &df, Magma_DEV, dP.num_cols, 1, c_zero, queue )); // t = 0 CHECK( magma_dvinit( &dt, Magma_DEV, dr.num_rows, 1, c_zero, queue )); // c = 0 CHECK( magma_dvinit( &dc, Magma_DEV, dM.num_cols, 1, c_zero, queue )); // v = 0 CHECK( magma_dvinit( &dv, Magma_DEV, dr.num_rows, 1, c_zero, queue )); //--------------START TIME--------------- // chronometry tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->timing[0] = 0.0; } om = MAGMA_D_ONE; innerflag = 0; // start iteration do { solver_par->numiter++; // new RHS for small systems // f = P' r magmablas_dgemv( MagmaConjTrans, dP.num_rows, dP.num_cols, c_one, dP.dval, dP.ld, dr.dval, 1, c_zero, df.dval, 1, queue ); // shadow space loop for ( k = 0; k < s; ++k ) { sk = s - k; // f(k:s) = M(k:s,k:s) c(k:s) magma_dcopyvector( sk, &df.dval[k], 1, &dc.dval[k], 1, queue ); magma_dtrsv( MagmaLower, MagmaNoTrans, MagmaNonUnit, sk, &dM.dval[k*dM.ld+k], dM.ld, &dc.dval[k], 1, queue ); // v = r - G(:,k:s) c(k:s) magma_dcopyvector( dr.num_rows, dr.dval, 1, dv.dval, 1, queue ); magmablas_dgemv( MagmaNoTrans, dG.num_rows, sk, c_n_one, &dG.dval[k*dG.ld], dG.ld, &dc.dval[k], 1, c_one, dv.dval, 1, queue ); // U(:,k) = om * v + U(:,k:s) c(k:s) magmablas_dgemv( MagmaNoTrans, dU.num_rows, sk, c_one, &dU.dval[k*dU.ld], dU.ld, &dc.dval[k], 1, om, dv.dval, 1, queue ); magma_dcopyvector( dU.num_rows, dv.dval, 1, &dU.dval[k*dU.ld], 1, queue ); // G(:,k) = A U(:,k) CHECK( magma_d_spmv( c_one, A, dv, c_zero, dv, queue )); solver_par->spmv_count++; magma_dcopyvector( dG.num_rows, dv.dval, 1, &dG.dval[k*dG.ld], 1, queue ); // bi-orthogonalize the new basis vectors for ( i = 0; i < k; ++i ) { // alpha = P(:,i)' G(:,k) alpha = magma_ddot( dP.num_rows, &dP.dval[i*dP.ld], 1, &dG.dval[k*dG.ld], 1, queue ); // alpha = alpha / M(i,i) magma_dgetvector( 1, &dM.dval[i*dM.ld+i], 1, &mkk, 1, queue ); alpha = alpha / mkk; // G(:,k) = G(:,k) - alpha * G(:,i) magma_daxpy( dG.num_rows, -alpha, &dG.dval[i*dG.ld], 1, &dG.dval[k*dG.ld], 1, queue ); // U(:,k) = U(:,k) - alpha * U(:,i) magma_daxpy( dU.num_rows, -alpha, &dU.dval[i*dU.ld], 1, &dU.dval[k*dU.ld], 1, queue ); } // new column of M = P'G, first k-1 entries are zero // M(k:s,k) = P(:,k:s)' G(:,k) magmablas_dgemv( MagmaConjTrans, dP.num_rows, sk, c_one, &dP.dval[k*dP.ld], dP.ld, &dG.dval[k*dG.ld], 1, c_zero, &dM.dval[k*dM.ld+k], 1, queue ); // check M(k,k) == 0 magma_dgetvector( 1, &dM.dval[k*dM.ld+k], 1, &mkk, 1, queue ); if ( MAGMA_D_EQUAL(mkk, MAGMA_D_ZERO) ) { innerflag = 1; info = MAGMA_DIVERGENCE; break; } // beta = f(k) / M(k,k) magma_dgetvector( 1, &df.dval[k], 1, &fk, 1, queue ); hbeta.val[k] = fk / mkk; // check for nan if ( magma_d_isnan( hbeta.val[k] ) || magma_d_isinf( hbeta.val[k] )) { innerflag = 1; info = MAGMA_DIVERGENCE; break; } // r = r - beta * G(:,k) magma_daxpy( dr.num_rows, -hbeta.val[k], &dG.dval[k*dG.ld], 1, dr.dval, 1, queue ); // smoothing disabled if ( smoothing <= 0 ) { // |r| nrmr = magma_dnrm2( dr.num_rows, dr.dval, 1, queue ); // smoothing enabled } else { // x = x + beta * U(:,k) magma_daxpy( x->num_rows, hbeta.val[k], &dU.dval[k*dU.ld], 1, x->dval, 1, queue ); // smoothing operation //--------------------------------------- // t = rs - r magma_dcopyvector( drs.num_rows, drs.dval, 1, dt.dval, 1, queue ); magma_daxpy( dt.num_rows, c_n_one, dr.dval, 1, dt.dval, 1, queue ); // t't // t'rs tt = magma_ddot( dt.num_rows, dt.dval, 1, dt.dval, 1, queue ); tr = magma_ddot( dt.num_rows, dt.dval, 1, drs.dval, 1, queue ); // gamma = (t' * rs) / (t' * t) gamma = tr / tt; // rs = rs - gamma * (rs - r) magma_daxpy( drs.num_rows, -gamma, dt.dval, 1, drs.dval, 1, queue ); // xs = xs - gamma * (xs - x) magma_dcopyvector( dxs.num_rows, dxs.dval, 1, dt.dval, 1, queue ); magma_daxpy( dt.num_rows, c_n_one, x->dval, 1, dt.dval, 1, queue ); magma_daxpy( dxs.num_rows, -gamma, dt.dval, 1, dxs.dval, 1, queue ); // |rs| nrmr = magma_dnrm2( drs.num_rows, drs.dval, 1, queue ); //--------------------------------------- } // store current timing and residual if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter) % solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter) / solver_par->verbose] = (real_Double_t)nrmr; solver_par->timing[(solver_par->numiter) / solver_par->verbose] = (real_Double_t)tempo2 - tempo1; } } // check convergence if ( nrmr <= solver_par->atol || nrmr/nrmb <= solver_par->rtol ) { s = k + 1; // for the x-update outside the loop innerflag = 2; info = MAGMA_SUCCESS; break; } // non-last s iteration if ( (k + 1) < s ) { // f(k+1:s) = f(k+1:s) - beta * M(k+1:s,k) magma_daxpy( sk-1, -hbeta.val[k], &dM.dval[k*dM.ld+(k+1)], 1, &df.dval[k+1], 1, queue ); } } // smoothing disabled if ( smoothing <= 0 && innerflag != 1 ) { // update solution approximation x // x = x + U(:,1:s) * beta(1:s) magma_dsetvector( s, hbeta.val, 1, dbeta.dval, 1, queue ); magmablas_dgemv( MagmaNoTrans, dU.num_rows, s, c_one, dU.dval, dU.ld, dbeta.dval, 1, c_one, x->dval, 1, queue ); } // check convergence or iteration limit or invalid result of inner loop if ( innerflag > 0 ) { break; } // t = A v // t = A r CHECK( magma_d_spmv( c_one, A, dr, c_zero, dt, queue )); solver_par->spmv_count++; // computation of a new omega //--------------------------------------- // |t| nrmt = magma_dnrm2( dt.num_rows, dt.dval, 1, queue ); // t'r tr = magma_ddot( dt.num_rows, dt.dval, 1, dr.dval, 1, queue ); // rho = abs(t' * r) / (|t| * |r|)) rho = MAGMA_D_ABS( MAGMA_D_REAL(tr) / (nrmt * nrmr) ); // om = (t' * r) / (|t| * |t|) om = tr / (nrmt * nrmt); if ( rho < angle ) { om = (om * angle) / rho; } //--------------------------------------- if ( MAGMA_D_EQUAL(om, MAGMA_D_ZERO) ) { info = MAGMA_DIVERGENCE; break; } // update approximation vector // x = x + om * v // x = x + om * r magma_daxpy( x->num_rows, om, dr.dval, 1, x->dval, 1, queue ); // update residual vector // r = r - om * t magma_daxpy( dr.num_rows, -om, dt.dval, 1, dr.dval, 1, queue ); // smoothing disabled if ( smoothing <= 0 ) { // residual norm nrmr = magma_dnrm2( b.num_rows, dr.dval, 1, queue ); // smoothing enabled } else { // smoothing operation //--------------------------------------- // t = rs - r magma_dcopyvector( drs.num_rows, drs.dval, 1, dt.dval, 1, queue ); magma_daxpy( dt.num_rows, c_n_one, dr.dval, 1, dt.dval, 1, queue ); // t't // t'rs tt = magma_ddot( dt.num_rows, dt.dval, 1, dt.dval, 1, queue ); tr = magma_ddot( dt.num_rows, dt.dval, 1, drs.dval, 1, queue ); // gamma = (t' * rs) / (|t| * |t|) gamma = tr / tt; // rs = rs - gamma * (rs - r) magma_daxpy( drs.num_rows, -gamma, dt.dval, 1, drs.dval, 1, queue ); // xs = xs - gamma * (xs - x) magma_dcopyvector( dxs.num_rows, dxs.dval, 1, dt.dval, 1, queue ); magma_daxpy( dt.num_rows, c_n_one, x->dval, 1, dt.dval, 1, queue ); magma_daxpy( dxs.num_rows, -gamma, dt.dval, 1, dxs.dval, 1, queue ); // |rs| nrmr = magma_dnrm2( b.num_rows, drs.dval, 1, queue ); //--------------------------------------- } // store current timing and residual if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter) % solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter) / solver_par->verbose] = (real_Double_t)nrmr; solver_par->timing[(solver_par->numiter) / solver_par->verbose] = (real_Double_t)tempo2 - tempo1; } } // check convergence if ( nrmr <= solver_par->atol || nrmr/nrmb <= solver_par->rtol ) { info = MAGMA_SUCCESS; break; } } while ( solver_par->numiter + 1 <= solver_par->maxiter ); // smoothing enabled if ( smoothing > 0 ) { // x = xs magma_dcopyvector( x->num_rows, dxs.dval, 1, x->dval, 1, queue ); // r = rs magma_dcopyvector( dr.num_rows, drs.dval, 1, dr.dval, 1, queue ); } // get last iteration timing tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t)tempo2 - tempo1; //--------------STOP TIME---------------- // get final stats solver_par->iter_res = nrmr; CHECK( magma_dresidualvec( A, b, *x, &dr, &residual, queue )); solver_par->final_res = residual; // set solver conclusion if ( info != MAGMA_SUCCESS && info != MAGMA_DIVERGENCE ) { if ( solver_par->init_res > solver_par->final_res ) { info = MAGMA_SLOW_CONVERGENCE; } } cleanup: // free resources // smoothing enabled if ( smoothing > 0 ) { magma_dmfree( &dxs, queue ); magma_dmfree( &drs, queue ); } magma_dmfree( &dr, queue ); magma_dmfree( &dP, queue ); magma_dmfree( &dP1, queue ); magma_dmfree( &dG, queue ); magma_dmfree( &dU, queue ); magma_dmfree( &dM, queue ); magma_dmfree( &df, queue ); magma_dmfree( &dt, queue ); magma_dmfree( &dc, queue ); magma_dmfree( &dv, queue ); magma_dmfree( &dbeta, queue ); magma_dmfree( &hbeta, queue ); solver_par->info = info; return info; /* magma_didr */ }
extern "C" magma_int_t magma_dpbicgstab( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_d_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = 0; // set queue for old dense routines magma_queue_t orig_queue=NULL; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_PBICGSTAB; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; // some useful variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE, c_mone = MAGMA_D_NEG_ONE; magma_int_t dofs = A.num_rows*b.num_cols; // workspace magma_d_matrix r={Magma_CSR}, rr={Magma_CSR}, p={Magma_CSR}, v={Magma_CSR}, s={Magma_CSR}, t={Magma_CSR}, ms={Magma_CSR}, mt={Magma_CSR}, y={Magma_CSR}, z={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &rr,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &ms,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &mt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables double alpha, beta, omega, rho_old, rho_new; double nom, betanom, nom0, r0, den, res; // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); magma_dcopy( dofs, r.dval, 1, rr.dval, 1 ); // rr = r betanom = nom0; nom = nom0*nom0; rho_new = omega = alpha = MAGMA_D_MAKE( 1.0, 0. ); solver_par->init_res = nom0; CHECK( magma_d_spmv( c_one, A, r, c_zero, v, queue )); // z = A r den = MAGMA_D_REAL( magma_ddot(dofs, v.dval, 1, r.dval, 1) ); // den = z' * r if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) { solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } solver_par->numiter = 0; // start iteration do { solver_par->numiter++; rho_old = rho_new; // rho_old=rho rho_new = magma_ddot( dofs, rr.dval, 1, r.dval, 1 ); // rho=<rr,r> beta = rho_new/rho_old * alpha/omega; // beta=rho/rho_old *alpha/omega magma_dscal( dofs, beta, p.dval, 1 ); // p = beta*p magma_daxpy( dofs, c_mone * omega * beta, v.dval, 1 , p.dval, 1 ); // p = p-omega*beta*v magma_daxpy( dofs, c_one, r.dval, 1, p.dval, 1 ); // p = p+r // preconditioner CHECK( magma_d_applyprecond_left( A, p, &mt, precond_par, queue )); CHECK( magma_d_applyprecond_right( A, mt, &y, precond_par, queue )); CHECK( magma_d_spmv( c_one, A, y, c_zero, v, queue )); // v = Ap alpha = rho_new / magma_ddot( dofs, rr.dval, 1, v.dval, 1 ); magma_dcopy( dofs, r.dval, 1 , s.dval, 1 ); // s=r magma_daxpy( dofs, c_mone * alpha, v.dval, 1 , s.dval, 1 ); // s=s-alpha*v // preconditioner CHECK( magma_d_applyprecond_left( A, s, &ms, precond_par, queue )); CHECK( magma_d_applyprecond_right( A, ms, &z, precond_par, queue )); CHECK( magma_d_spmv( c_one, A, z, c_zero, t, queue )); // t=As // preconditioner CHECK( magma_d_applyprecond_left( A, s, &ms, precond_par, queue )); CHECK( magma_d_applyprecond_left( A, t, &mt, precond_par, queue )); // omega = <ms,mt>/<mt,mt> omega = magma_ddot( dofs, mt.dval, 1, ms.dval, 1 ) / magma_ddot( dofs, mt.dval, 1, mt.dval, 1 ); magma_daxpy( dofs, alpha, y.dval, 1 , x->dval, 1 ); // x=x+alpha*p magma_daxpy( dofs, omega, z.dval, 1 , x->dval, 1 ); // x=x+omega*s magma_dcopy( dofs, s.dval, 1 , r.dval, 1 ); // r=s magma_daxpy( dofs, c_mone * omega, t.dval, 1 , r.dval, 1 ); // r=r-omega*t res = betanom = magma_dnrm2( dofs, r.dval, 1 ); nom = betanom*betanom; if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nom0 < solver_par->epsilon ) { break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->final_res = residual; solver_par->iter_res = res; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->epsilon*solver_par->init_res ){ info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&rr, queue ); magma_dmfree(&p, queue ); magma_dmfree(&v, queue ); magma_dmfree(&s, queue ); magma_dmfree(&t, queue ); magma_dmfree(&ms, queue ); magma_dmfree(&mt, queue ); magma_dmfree(&y, queue ); magma_dmfree(&z, queue ); magmablasSetKernelStream( orig_queue ); solver_par->info = info; return info; } /* magma_dbicgstab */
extern "C" magma_int_t magma_dcg( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = 0; // set queue for old dense routines magma_queue_t orig_queue=NULL; magmablasGetKernelStream( &orig_queue ); // prepare solver feedback solver_par->solver = Magma_CG; solver_par->numiter = 0; solver_par->info = MAGMA_SUCCESS; // local variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE; magma_int_t dofs = A.num_rows * b.num_cols; // GPU workspace magma_d_matrix r={Magma_CSR}, p={Magma_CSR}, q={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables double alpha, beta; double nom, nom0, r0, betanom, betanomsq, den; // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); magma_dcopy( dofs, r.dval, 1, p.dval, 1 ); // p = r betanom = nom0; nom = nom0 * nom0; // nom = r' * r CHECK( magma_d_spmv( c_one, A, p, c_zero, q, queue )); // q = A p den = MAGMA_D_REAL( magma_ddot(dofs, p.dval, 1, q.dval, 1) );// den = p dot q solver_par->init_res = nom0; if ( (r0 = nom * solver_par->epsilon) < ATOLERANCE ) r0 = ATOLERANCE; if ( nom < r0 ) { solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; goto cleanup; } // check positive definite if (den <= 0.0) { printf("Operator A is not postive definite. (Ar,r) = %f\n", den); magmablasSetKernelStream( orig_queue ); info = MAGMA_NONSPD; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } solver_par->numiter = 0; // start iteration do { solver_par->numiter++; alpha = MAGMA_D_MAKE(nom/den, 0.); magma_daxpy(dofs, alpha, p.dval, 1, x->dval, 1); // x = x + alpha p magma_daxpy(dofs, -alpha, q.dval, 1, r.dval, 1); // r = r - alpha q betanom = magma_dnrm2(dofs, r.dval, 1); // betanom = || r || betanomsq = betanom * betanom; // betanoms = r' * r if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < r0 ) { break; } beta = MAGMA_D_MAKE(betanomsq/nom, 0.); // beta = betanoms/nom magma_dscal(dofs, beta, p.dval, 1); // p = beta*p magma_daxpy(dofs, c_one, r.dval, 1, p.dval, 1); // p = p + r CHECK( magma_d_spmv( c_one, A, p, c_zero, q, queue )); // q = A p den = MAGMA_D_REAL(magma_ddot(dofs, p.dval, 1, q.dval, 1)); // den = p dot q nom = betanomsq; } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { solver_par->info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->epsilon*solver_par->init_res ){ info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&p, queue ); magma_dmfree(&q, queue ); magmablasSetKernelStream( orig_queue ); solver_par->info = info; return info; } /* magma_dcg */
extern "C" magma_int_t magma_diterref( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_d_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // some useful variables double c_zero = MAGMA_D_ZERO; double c_one = MAGMA_D_ONE; double c_neg_one = MAGMA_D_NEG_ONE; // prepare solver feedback solver_par->solver = Magma_ITERREF; solver_par->numiter = 0; solver_par->spmv_count = 0; magma_int_t dofs = A.num_rows*b.num_cols; // solver variables double nom, nom0; // workspace magma_d_matrix r={Magma_CSR}, z={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); double residual; CHECK( magma_dresidual( A, b, *x, &residual, queue )); solver_par->init_res = residual; // solver setup magma_dscal( dofs, c_zero, x->dval, 1, queue ); // x = 0 //CHECK( magma_dresidualvec( A, b, *x, &r, nom, queue)); magma_dcopy( dofs, b.dval, 1, r.dval, 1, queue ); // r = b nom0 = magma_dnrm2( dofs, r.dval, 1, queue ); // nom0 = || r || nom = nom0 * nom0; solver_par->init_res = nom0; if( nom0 < solver_par->atol || nom0/solver_par->init_res < solver_par->rtol ){ solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } // start iteration for( solver_par->numiter= 1; solver_par->numiter<solver_par->maxiter; solver_par->numiter++ ) { magma_dscal( dofs, MAGMA_D_MAKE(1./nom, 0.), r.dval, 1, queue ); // scale it CHECK( magma_d_precond( A, r, &z, precond_par, queue )); // inner solver: A * z = r magma_dscal( dofs, MAGMA_D_MAKE(nom, 0.), z.dval, 1, queue ); // scale it magma_daxpy( dofs, c_one, z.dval, 1, x->dval, 1, queue ); // x = x + z CHECK( magma_d_spmv( c_neg_one, A, *x, c_zero, r, queue )); // r = - A x solver_par->spmv_count++; magma_daxpy( dofs, c_one, b.dval, 1, r.dval, 1, queue ); // r = r + b nom = magma_dnrm2( dofs, r.dval, 1, queue ); // nom = || r || if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if( nom < solver_par->atol || nom/solver_par->init_res < solver_par->rtol ){ break; } } tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->final_res = residual; solver_par->iter_res = nom; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->atol || solver_par->iter_res/solver_par->init_res < solver_par->rtol ){ info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) nom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&z, queue ); solver_par->info = info; return info; } /* magma_diterref */
magma_int_t magma_dilures( magma_d_matrix A, magma_d_matrix L, magma_d_matrix U, magma_d_matrix *LU, real_Double_t *res, real_Double_t *nonlinres, magma_queue_t queue ) { magma_int_t info = 0; double tmp; real_Double_t tmp2; magma_int_t i,j,k; double one = MAGMA_D_MAKE( 1.0, 0.0 ); magma_d_matrix LL={Magma_CSR}, L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR}; if( L.row[1]==1 ){ // lower triangular with unit diagonal //printf("L lower triangular.\n"); LL.diagorder_type = Magma_UNITY; CHECK( magma_dmconvert( L, &LL, Magma_CSR, Magma_CSRL, queue )); } else if( L.row[1]==0 ){ // strictly lower triangular //printf("L strictly lower triangular.\n"); CHECK( magma_dmtransfer( L, &LL, Magma_CPU, Magma_CPU, queue )); magma_free_cpu( LL.col ); magma_free_cpu( LL.val ); LL.nnz = L.nnz+L.num_rows; CHECK( magma_dmalloc_cpu( &LL.val, LL.nnz )); CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz )); magma_int_t z=0; for( magma_int_t i=0; i<L.num_rows; i++){ LL.row[i] = z; for( magma_int_t j=L.row[i]; j<L.row[i+1]; j++){ LL.val[z] = L.val[j]; LL.col[z] = L.col[j]; z++; } // add unit diagonal LL.val[z] = MAGMA_D_MAKE(1.0, 0.0); LL.col[z] = i; z++; } LL.row[LL.num_rows] = z; } else{ printf("error: L neither lower nor strictly lower triangular!\n"); } CHECK( magma_dmtransfer( LL, &L_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( U, &U_d, Magma_CPU, Magma_DEV, queue )); magma_dmfree( &LL, queue ); CHECK( magma_d_spmm( one, L_d, U_d, &LU_d, queue )); CHECK( magma_dmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue )); magma_dmfree( &L_d, queue ); magma_dmfree( &U_d, queue ); magma_dmfree( &LU_d, queue ); // compute Frobenius norm of A-LU for(i=0; i<A.num_rows; i++){ for(j=A.row[i]; j<A.row[i+1]; j++){ magma_index_t lcol = A.col[j]; for(k=LU->row[i]; k<LU->row[i+1]; k++){ if( LU->col[k] == lcol ){ tmp = MAGMA_D_MAKE( MAGMA_D_REAL( LU->val[k] )- MAGMA_D_REAL( A.val[j] ) , 0.0 ); LU->val[k] = tmp; tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(tmp) ); (*nonlinres) = (*nonlinres) + tmp2*tmp2; } } } } for(i=0; i<LU->num_rows; i++){ for(j=LU->row[i]; j<LU->row[i+1]; j++){ tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(LU->val[j]) ); (*res) = (*res) + tmp2* tmp2; } } (*res) = sqrt((*res)); (*nonlinres) = sqrt((*nonlinres)); cleanup: if( info !=0 ){ magma_dmfree( LU, queue ); } magma_dmfree( &LL, queue ); magma_dmfree( &L_d, queue ); magma_dmfree( &U_d, queue ); magma_dmfree( &LU_d, queue ); return info; }
magma_int_t magma_dnonlinres( magma_d_matrix A, magma_d_matrix L, magma_d_matrix U, magma_d_matrix *LU, real_Double_t *res, magma_queue_t queue ) { magma_int_t info = 0; real_Double_t tmp2; magma_int_t i,j,k; double one = MAGMA_D_MAKE( 1.0, 0.0 ); magma_d_matrix L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR}, A_t={Magma_CSR}; CHECK( magma_dmtransfer( L, &L_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( U, &U_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( A, &A_t, Magma_CPU, Magma_CPU, queue )); CHECK( magma_d_spmm( one, L_d, U_d, &LU_d, queue )); CHECK( magma_dmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue )); magma_dmfree( &L_d, queue ); magma_dmfree( &U_d, queue ); magma_dmfree( &LU_d, queue ); // compute Frobenius norm of A-LU for(i=0; i<A.num_rows; i++){ for(j=A.row[i]; j<A.row[i+1]; j++){ magma_index_t lcol = A.col[j]; double newval = MAGMA_D_MAKE(0.0, 0.0); for(k=LU->row[i]; k<LU->row[i+1]; k++){ if( LU->col[k] == lcol ){ newval = MAGMA_D_MAKE( MAGMA_D_REAL( LU->val[k] )- MAGMA_D_REAL( A.val[j] ) , 0.0 ); } } A_t.val[j] = newval; } } for(i=0; i<A.num_rows; i++){ for(j=A.row[i]; j<A.row[i+1]; j++){ tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(A_t.val[j]) ); (*res) = (*res) + tmp2* tmp2; } } magma_dmfree( LU, queue ); magma_dmfree( &A_t, queue ); (*res) = sqrt((*res)); cleanup: if( info !=0 ){ magma_dmfree( LU, queue ); } magma_dmfree( &A_t, queue ); magma_dmfree( &L_d, queue ); magma_dmfree( &U_d, queue ); magma_dmfree( &LU_d, queue ); return info; }
magma_int_t magma_dcustomilusetup( magma_d_matrix A, magma_d_matrix b, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_d_matrix hA={Magma_CSR}; char preconditionermatrix[255]; // first L snprintf( preconditionermatrix, sizeof(preconditionermatrix), "precondL.mtx" ); CHECK( magma_d_csr_mtx( &hA, preconditionermatrix , queue) ); CHECK( magma_dmtransfer( hA, &precond->L, Magma_CPU, Magma_DEV , queue )); // extract the diagonal of L into precond->d CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_dvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); magma_dmfree( &hA, queue ); // now U snprintf( preconditionermatrix, sizeof(preconditionermatrix), "precondU.mtx" ); CHECK( magma_d_csr_mtx( &hA, preconditionermatrix , queue) ); CHECK( magma_dmtransfer( hA, &precond->U, Magma_CPU, Magma_DEV , queue )); // extract the diagonal of U into precond->d2 CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_dvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseDcsrsv_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.val, precond->L.row, precond->L.col, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseDcsrsv_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.val, precond->U.row, precond->U.col, precond->cuinfoU )); cleanup: cusparseDestroy( cusparseHandle ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseHandle=NULL; descrL=NULL; descrU=NULL; magma_dmfree( &hA, queue ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_d_matrix A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; int i=1; real_Double_t start, end; CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_dm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_d_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", int(A.num_rows), int(A.num_cols), int(A.nnz) ); // scale matrix CHECK( magma_dmscale( &A, zopts.scaling, queue )); // remove nonzeros in matrix start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_dmcsrcompressor( &A, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA CPU: %.2e seconds.\n", (end-start)/10 ); // transpose CHECK( magma_dmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_dmconvert( AT, &B, Magma_CSR, Magma_CSR, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&B, queue ); start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_dmcsrcompressor_gpu( &B_d, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA GPU: %.2e seconds.\n", (end-start)/10 ); CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_dmfree(&B_d, queue ); CHECK( magma_dmconvert( B, &AT, Magma_CSR, Magma_CSR, queue )); magma_dmfree(&B, queue ); // transpose back CHECK( magma_dmtranspose( AT, &A2, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmdiff( A, A2, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester matrix compressor: ok\n"); else printf("%% tester matrix compressor: failed\n"); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); i++; } cleanup: magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing zdot */ int main( int argc, char** argv ) { magma_int_t info = 0; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); const double one = MAGMA_D_MAKE(1.0, 0.0); const double zero = MAGMA_D_MAKE(0.0, 0.0); double alpha; TESTING_INIT(); magma_d_matrix a={Magma_CSR}, b={Magma_CSR}, x={Magma_CSR}, y={Magma_CSR}, skp={Magma_CSR}; printf("%%=======================================================================================================================================================================\n"); printf("\n"); printf(" | runtime | GFLOPS\n"); printf("%% n num_vecs | CUDOT CUGEMV MAGMAGEMV MDOT MDGM MDGM_SHFL | CUDOT CUGEMV MAGMAGEMV MDOT MDGM MDGM_SHFL\n"); printf("%%------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n"); printf("\n"); for( magma_int_t num_vecs=1; num_vecs <= 32; num_vecs += 1 ) { for( magma_int_t n=500000; n < 500001; n += 10000 ) { int iters = 10; double computations = (2.* n * iters * num_vecs); #define ENABLE_TIMER #ifdef ENABLE_TIMER real_Double_t mdot1, mdot2, mdgm1, mdgm2, magmagemv1, magmagemv2, cugemv1, cugemv2, cudot1, cudot2; real_Double_t mdot_time, mdgm_time, mdgmshf_time, magmagemv_time, cugemv_time, cudot_time; #endif CHECK( magma_dvinit( &a, Magma_DEV, n, num_vecs, one, queue )); CHECK( magma_dvinit( &b, Magma_DEV, n, 1, one, queue )); CHECK( magma_dvinit( &x, Magma_DEV, n, 8, one, queue )); CHECK( magma_dvinit( &y, Magma_DEV, n, 8, one, queue )); CHECK( magma_dvinit( &skp, Magma_DEV, 1, num_vecs, zero, queue )); // warm up CHECK( magma_dgemvmdot( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); // CUDOT #ifdef ENABLE_TIMER cudot1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { for( int l=0; l<num_vecs; l++){ alpha = magma_ddot( n, a.dval+l*a.num_rows, 1, b.dval, 1, queue ); //cudaDeviceSynchronize(); } //cudaDeviceSynchronize(); } #ifdef ENABLE_TIMER cudot2 = magma_sync_wtime( queue ); cudot_time=cudot2-cudot1; #endif // CUGeMV #ifdef ENABLE_TIMER cugemv1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { magma_dgemv( MagmaTrans, n, num_vecs, one, a.dval, n, b.dval, 1, zero, skp.dval, 1, queue ); } #ifdef ENABLE_TIMER cugemv2 = magma_sync_wtime( queue ); cugemv_time=cugemv2-cugemv1; #endif // MAGMAGeMV #ifdef ENABLE_TIMER magmagemv1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { magmablas_dgemv( MagmaTrans, n, num_vecs, one, a.dval, n, b.dval, 1, zero, skp.dval, 1, queue ); } #ifdef ENABLE_TIMER magmagemv2 = magma_sync_wtime( queue ); magmagemv_time=magmagemv2-magmagemv1; #endif // MDOT #ifdef ENABLE_TIMER mdot1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { for( int c = 0; c<num_vecs/2; c++ ){ CHECK( magma_dmdotc( n, 2, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); } for( int c = 0; c<num_vecs%2; c++ ){ CHECK( magma_dmdotc( n, 1, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); } //h++; } #ifdef ENABLE_TIMER mdot2 = magma_sync_wtime( queue ); mdot_time=mdot2-mdot1; #endif // MDGM #ifdef ENABLE_TIMER mdgm1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { CHECK( magma_dgemvmdot( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); //h++; } #ifdef ENABLE_TIMER mdgm2 = magma_sync_wtime( queue ); mdgm_time=mdgm2-mdgm1; #endif // MDGM_shfl #ifdef ENABLE_TIMER mdgm1 = magma_sync_wtime( queue ); #endif for( int h=0; h < iters; h++) { CHECK( magma_dgemvmdot_shfl( n, num_vecs, a.dval, b.dval, x.dval, y.dval, skp.dval, queue )); } #ifdef ENABLE_TIMER mdgm2 = magma_sync_wtime( queue ); mdgmshf_time=mdgm2-mdgm1; #endif //magma_dprint_gpu(num_vecs,1,skp.dval,num_vecs); //Chronometry #ifdef ENABLE_TIMER printf("%d %d %e %e %e %e %e %e || %e %e %e %e %e %e\n", int(n), int(num_vecs), cudot_time/iters, (cugemv_time)/iters, (magmagemv_time)/iters, (mdot_time)/iters, (mdgm_time)/iters, (mdgmshf_time)/iters, computations/(cudot_time*1e9), computations/(cugemv_time*1e9), computations/(magmagemv_time*1e9), computations/(mdot_time*1e9), computations/(mdgm_time*1e9), computations/(mdgmshf_time*1e9) ); #endif magma_dmfree(&a, queue ); magma_dmfree(&b, queue ); magma_dmfree(&x, queue ); magma_dmfree(&y, queue ); magma_dmfree(&skp, queue ); } //printf("%%================================================================================================================================================\n"); //printf("\n"); //printf("\n"); } // use alpha to silence compiler warnings if ( isnan( real( alpha ))) { info = -1; } cleanup: magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
extern "C" magma_int_t magma_dlsqr( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_d_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_LSQR; solver_par->numiter = 0; solver_par->spmv_count = 0; magma_int_t m = A.num_rows * b.num_cols; magma_int_t n = A.num_cols * b.num_cols; // local variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE; // solver variables double s, nom0, r0, res=0, nomb, phibar, beta, alpha, c, rho, rhot, phi, thet, normr, normar, norma, sumnormd2, normd; // need to transpose the matrix magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; // GPU workspace magma_d_matrix r={Magma_CSR}, v={Magma_CSR}, z={Magma_CSR}, zt={Magma_CSR}, d={Magma_CSR}, vt={Magma_CSR}, q={Magma_CSR}, w={Magma_CSR}, u={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &v, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &z, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &d, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &vt,Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &q, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &w, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &u, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // transpose the matrix magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransposeconjugate( Ah2, &Ah1, queue ); magma_dmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_dmfree(&Ah2, queue ); // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; nomb = magma_dnrm2( m, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } magma_dcopy( m, b.dval, 1, u.dval, 1, queue ); beta = magma_dnrm2( m, u.dval, 1, queue ); magma_dscal( m, MAGMA_D_MAKE(1./beta, 0.0 ), u.dval, 1, queue ); normr = beta; c = 1.0; s = 0.0; phibar = beta; CHECK( magma_d_spmv( c_one, AT, u, c_zero, v, queue )); if( precond_par->solver == Magma_NONE ){ ; } else { CHECK( magma_d_applyprecond_right( MagmaTrans, A, v, &zt, precond_par, queue )); CHECK( magma_d_applyprecond_left( MagmaTrans, A, zt, &v, precond_par, queue )); } alpha = magma_dnrm2( n, v.dval, 1, queue ); magma_dscal( n, MAGMA_D_MAKE(1./alpha, 0.0 ), v.dval, 1, queue ); normar = alpha * beta; norma = 0; sumnormd2 = 0; //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; // start iteration do { solver_par->numiter++; if( precond_par->solver == Magma_NONE || A.num_rows != A.num_cols ) { magma_dcopy( n, v.dval, 1 , z.dval, 1, queue ); } else { CHECK( magma_d_applyprecond_left( MagmaNoTrans, A, v, &zt, precond_par, queue )); CHECK( magma_d_applyprecond_right( MagmaNoTrans, A, zt, &z, precond_par, queue )); } //CHECK( magma_d_spmv( c_one, A, z, MAGMA_D_MAKE(-alpha,0.0), u, queue )); CHECK( magma_d_spmv( c_one, A, z, c_zero, zt, queue )); magma_dscal( m, MAGMA_D_MAKE(-alpha, 0.0 ), u.dval, 1, queue ); magma_daxpy( m, c_one, zt.dval, 1, u.dval, 1, queue ); solver_par->spmv_count++; beta = magma_dnrm2( m, u.dval, 1, queue ); magma_dscal( m, MAGMA_D_MAKE(1./beta, 0.0 ), u.dval, 1, queue ); // norma = norm([norma alpha beta]); norma = sqrt(norma*norma + alpha*alpha + beta*beta ); //lsvec( solver_par->numiter-1 ) = normar / norma; thet = -s * alpha; rhot = c * alpha; rho = sqrt( rhot * rhot + beta * beta ); c = rhot / rho; s = - beta / rho; phi = c * phibar; phibar = s * phibar; // d = (z - thet * d) / rho; magma_dscal( n, MAGMA_D_MAKE(-thet, 0.0 ), d.dval, 1, queue ); magma_daxpy( n, c_one, z.dval, 1, d.dval, 1, queue ); magma_dscal( n, MAGMA_D_MAKE(1./rho, 0.0 ), d.dval, 1, queue ); normd = magma_dnrm2( n, d.dval, 1, queue ); sumnormd2 = sumnormd2 + normd*normd; // convergence check res = normr; if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } // check for convergence in A*x=b if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ info = MAGMA_SUCCESS; break; } // check for convergence in min{|b-A*x|} if ( A.num_rows != A.num_cols && ( normar/(norma*normr) <= solver_par->rtol || normar <= solver_par->atol ) ){ printf("%% warning: quit from minimization convergence check.\n"); info = MAGMA_SUCCESS; break; } magma_daxpy( n, MAGMA_D_MAKE( phi, 0.0 ), d.dval, 1, x->dval, 1, queue ); normr = fabs(s) * normr; CHECK( magma_d_spmv( c_one, AT, u, c_zero, vt, queue )); solver_par->spmv_count++; if( precond_par->solver == Magma_NONE ){ ; } else { CHECK( magma_d_applyprecond_right( MagmaTrans, A, vt, &zt, precond_par, queue )); CHECK( magma_d_applyprecond_left( MagmaTrans, A, zt, &vt, precond_par, queue )); } magma_dscal( n, MAGMA_D_MAKE(-beta, 0.0 ), v.dval, 1, queue ); magma_daxpy( n, c_one, vt.dval, 1, v.dval, 1, queue ); alpha = magma_dnrm2( n, v.dval, 1, queue ); magma_dscal( n, MAGMA_D_MAKE(1./alpha, 0.0 ), v.dval, 1, queue ); normar = alpha * fabs(s*phi); } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&v, queue ); magma_dmfree(&z, queue ); magma_dmfree(&zt, queue ); magma_dmfree(&d, queue ); magma_dmfree(&vt, queue ); magma_dmfree(&q, queue ); magma_dmfree(&u, queue ); magma_dmfree(&w, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&Ah1, queue ); magma_dmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_dqmr */
magma_int_t magma_d_csr_mtx( magma_d_matrix *A, const char *filename, magma_queue_t queue ) { char buffer[ 1024 ]; magma_int_t info = 0; int csr_compressor = 0; // checks for zeros in original file magma_d_matrix B={Magma_CSR}; magma_index_t *coo_col = NULL; magma_index_t *coo_row = NULL; double *coo_val = NULL; double *new_val = NULL; magma_index_t* new_row = NULL; magma_index_t* new_col = NULL; magma_int_t symmetric = 0; std::vector< std::pair< magma_index_t, double > > rowval; FILE *fid = NULL; MM_typecode matcode; fid = fopen(filename, "r"); if (fid == NULL) { printf("%% Unable to open file %s\n", filename); info = MAGMA_ERR_NOT_FOUND; goto cleanup; } printf("%% Reading sparse matrix from file (%s):", filename); fflush(stdout); if (mm_read_banner(fid, &matcode) != 0) { printf("\n%% Could not process Matrix Market banner: %s.\n", matcode); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if (!mm_is_valid(matcode)) { printf("\n%% Invalid Matrix Market file.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if ( ! ( ( mm_is_real(matcode) || mm_is_integer(matcode) || mm_is_pattern(matcode) || mm_is_real(matcode) ) && mm_is_coordinate(matcode) && mm_is_sparse(matcode) ) ) { mm_snprintf_typecode( buffer, sizeof(buffer), matcode ); printf("\n%% Sorry, MAGMA-sparse does not support Market Market type: [%s]\n", buffer ); printf("%% Only real-valued or pattern coordinate matrices are supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } magma_index_t num_rows, num_cols, num_nonzeros; if (mm_read_mtx_crd_size(fid, &num_rows, &num_cols, &num_nonzeros) != 0) { info = MAGMA_ERR_UNKNOWN; goto cleanup; } A->storage_type = Magma_CSR; A->memory_location = Magma_CPU; A->num_rows = num_rows; A->num_cols = num_cols; A->nnz = num_nonzeros; A->fill_mode = MagmaFull; CHECK( magma_index_malloc_cpu( &coo_col, A->nnz ) ); CHECK( magma_index_malloc_cpu( &coo_row, A->nnz ) ); CHECK( magma_dmalloc_cpu( &coo_val, A->nnz ) ); if (mm_is_real(matcode) || mm_is_integer(matcode)) { for(magma_int_t i = 0; i < A->nnz; ++i) { magma_index_t ROW, COL; double VAL; // always read in a double and convert later if necessary fscanf(fid, " %d %d %lf \n", &ROW, &COL, &VAL); if ( VAL == 0 ) csr_compressor = 1; coo_row[i] = ROW - 1; coo_col[i] = COL - 1; coo_val[i] = MAGMA_D_MAKE( VAL, 0.); } } else if (mm_is_pattern(matcode) ) { for(magma_int_t i = 0; i < A->nnz; ++i) { magma_index_t ROW, COL; fscanf(fid, " %d %d \n", &ROW, &COL ); coo_row[i] = ROW - 1; coo_col[i] = COL - 1; coo_val[i] = MAGMA_D_MAKE( 1.0, 0.); } } else if (mm_is_real(matcode) ){ for(magma_int_t i = 0; i < A->nnz; ++i) { magma_index_t ROW, COL; double VAL, VALC; // always read in a double and convert later if necessary fscanf(fid, " %d %d %lf %lf\n", &ROW, &COL, &VAL, &VALC); coo_row[i] = ROW - 1; coo_col[i] = COL - 1; coo_val[i] = MAGMA_D_MAKE( VAL, VALC); } // printf(" ...successfully read real matrix... "); } else { printf("\n%% Unrecognized data type\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } fclose(fid); fid = NULL; printf(" done. Converting to CSR:"); fflush(stdout); A->sym = Magma_GENERAL; if( mm_is_symmetric(matcode) ) { symmetric = 1; } if ( mm_is_symmetric(matcode) || mm_is_symmetric(matcode) ) { // duplicate off diagonal entries printf("\n%% Detected symmetric case."); A->sym = Magma_SYMMETRIC; magma_index_t off_diagonals = 0; for(magma_int_t i = 0; i < A->nnz; ++i) { if (coo_row[i] != coo_col[i]) ++off_diagonals; } magma_index_t true_nonzeros = 2*off_diagonals + (A->nnz - off_diagonals); //printf("%% total number of nonzeros: %d\n%%", int(A->nnz)); CHECK( magma_index_malloc_cpu( &new_row, true_nonzeros )); CHECK( magma_index_malloc_cpu( &new_col, true_nonzeros )); CHECK( magma_dmalloc_cpu( &new_val, true_nonzeros )); magma_index_t ptr = 0; for(magma_int_t i = 0; i < A->nnz; ++i) { if (coo_row[i] != coo_col[i]) { new_row[ptr] = coo_row[i]; new_col[ptr] = coo_col[i]; new_val[ptr] = coo_val[i]; ptr++; new_col[ptr] = coo_row[i]; new_row[ptr] = coo_col[i]; new_val[ptr] = (symmetric == 0) ? coo_val[i] : conj(coo_val[i]); ptr++; } else { new_row[ptr] = coo_row[i]; new_col[ptr] = coo_col[i]; new_val[ptr] = coo_val[i]; ptr++; } } magma_free_cpu(coo_row); magma_free_cpu(coo_col); magma_free_cpu(coo_val); coo_row = new_row; coo_col = new_col; coo_val = new_val; A->nnz = true_nonzeros; //printf("total number of nonzeros: %d\n", A->nnz); } // end symmetric case CHECK( magma_dmalloc_cpu( &A->val, A->nnz )); CHECK( magma_index_malloc_cpu( &A->col, A->nnz )); CHECK( magma_index_malloc_cpu( &A->row, A->num_rows+1 )); // original code from Nathan Bell and Michael Garland for (magma_index_t i = 0; i < num_rows; i++) (A->row)[i] = 0; for (magma_index_t i = 0; i < A->nnz; i++) (A->row)[coo_row[i]]++; // cumulative sum the nnz per row to get row[] magma_int_t cumsum; cumsum = 0; for(magma_int_t i = 0; i < num_rows; i++) { magma_index_t temp = (A->row)[i]; (A->row)[i] = cumsum; cumsum += temp; } (A->row)[num_rows] = A->nnz; // write Aj,Ax into Bj,Bx for(magma_int_t i = 0; i < A->nnz; i++) { magma_index_t row_ = coo_row[i]; magma_index_t dest = (A->row)[row_]; (A->col)[dest] = coo_col[i]; (A->val)[dest] = coo_val[i]; (A->row)[row_]++; } magma_free_cpu(coo_row); magma_free_cpu(coo_col); magma_free_cpu(coo_val); coo_row = NULL; coo_col = NULL; coo_val = NULL; int last; last = 0; for(int i = 0; i <= num_rows; i++) { int temp = (A->row)[i]; (A->row)[i] = last; last = temp; } (A->row)[A->num_rows] = A->nnz; // sort column indices within each row // copy into vector of pairs (column index, value), sort by column index, then copy back for (magma_index_t k=0; k < A->num_rows; ++k) { int kk = (A->row)[k]; int len = (A->row)[k+1] - (A->row)[k]; rowval.resize( len ); for( int i=0; i < len; ++i ) { rowval[i] = std::make_pair( (A->col)[kk+i], (A->val)[kk+i] ); } std::sort( rowval.begin(), rowval.end(), compare_first ); for( int i=0; i < len; ++i ) { (A->col)[kk+i] = rowval[i].first; (A->val)[kk+i] = rowval[i].second; } } if ( csr_compressor > 0) { // run the CSR compressor to remove zeros //printf("removing zeros: "); CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue )); CHECK( magma_d_csr_compressor( &(A->val), &(A->row), &(A->col), &B.val, &B.row, &B.col, &B.num_rows, queue )); B.nnz = B.row[num_rows]; //printf(" remaining nonzeros:%d ", B.nnz); magma_free_cpu( A->val ); magma_free_cpu( A->row ); magma_free_cpu( A->col ); CHECK( magma_dmtransfer( B, A, Magma_CPU, Magma_CPU, queue )); //printf("done.\n"); } A->true_nnz = A->nnz; printf(" done.\n"); cleanup: if ( fid != NULL ) { fclose( fid ); fid = NULL; } magma_dmfree( &B, queue ); magma_free_cpu(coo_row); magma_free_cpu(coo_col); magma_free_cpu(coo_val); return info; }
magma_int_t magma_dicres( magma_d_matrix A, magma_d_matrix C, magma_d_matrix CT, magma_d_matrix *LU, real_Double_t *res, real_Double_t *nonlinres, magma_queue_t queue ) { magma_int_t info = 0; double tmp; real_Double_t tmp2; magma_int_t i,j,k; double one = MAGMA_D_MAKE( 1.0, 0.0 ); magma_d_matrix L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR}; *res = 0.0; *nonlinres = 0.0; CHECK( magma_dmtransfer( C, &L_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( CT, &U_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_d_spmm( one, L_d, U_d, &LU_d, queue )); CHECK( magma_dmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue )); magma_dmfree( &LU_d, queue ); // compute Frobenius norm of A-LU for(i=0; i<A.num_rows; i++){ for(j=A.row[i]; j<A.row[i+1]; j++){ magma_index_t lcol = A.col[j]; for(k=LU->row[i]; k<LU->row[i+1]; k++){ if( LU->col[k] == lcol ){ tmp = MAGMA_D_MAKE( MAGMA_D_REAL( LU->val[k] )- MAGMA_D_REAL( A.val[j] ) , 0.0 ); LU->val[k] = tmp; tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(tmp) ); (*nonlinres) = (*nonlinres) + tmp2*tmp2; } } } } for(i=0; i<LU->num_rows; i++){ for(j=LU->row[i]; j<LU->row[i+1]; j++){ tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(LU->val[j]) ); (*res) = (*res) + tmp2* tmp2; } } (*res) = sqrt((*res)); (*nonlinres) = sqrt((*nonlinres)); cleanup: if( info !=0 ){ magma_dmfree( LU, queue ); } magma_dmfree( &L_d, queue ); magma_dmfree( &U_d, queue ); magma_dmfree( &LU_d, queue ); return info; }
magma_int_t magma_dinitguess( magma_d_matrix A, magma_d_matrix *L, magma_d_matrix *U, magma_queue_t queue ) { magma_int_t info = 0; double one = MAGMA_D_MAKE( 1.0, 0.0 ); magma_d_matrix hAL={Magma_CSR}, hAU={Magma_CSR}, dAL={Magma_CSR}, dAU={Magma_CSR}, dALU={Magma_CSR}, hALU={Magma_CSR}, hD={Magma_CSR}, dD={Magma_CSR}, dL={Magma_CSR}, hL={Magma_CSR}; magma_int_t i,j; magma_int_t offdiags = 0; magma_index_t *diag_offset; double *diag_vals=NULL; // need only lower triangular hAL.diagorder_type = Magma_VALUE; CHECK( magma_dmconvert( A, &hAL, Magma_CSR, Magma_CSRL, queue )); //magma_dmconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO ); // need only upper triangular //magma_dmconvert( A, &hAU, Magma_CSR, Magma_CSRU ); CHECK( magma_d_cucsrtranspose( hAL, &hAU, queue )); //magma_dmconvert( hAU, &hAUCOO, Magma_CSR, Magma_CSRCOO ); CHECK( magma_dmtransfer( hAL, &dAL, Magma_CPU, Magma_DEV, queue )); CHECK( magma_d_spmm( one, dAL, dAU, &dALU, queue )); CHECK( magma_dmtransfer( dALU, &hALU, Magma_DEV, Magma_CPU, queue )); magma_dmfree( &dAU, queue); magma_dmfree( &dALU, queue); CHECK( magma_dmalloc_cpu( &diag_vals, offdiags+1 )); CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 )); diag_offset[0] = 0; diag_vals[0] = MAGMA_D_MAKE( 1.0, 0.0 ); CHECK( magma_dmgenerator( hALU.num_rows, offdiags, diag_offset, diag_vals, &hD, queue )); magma_dmfree( &hALU, queue ); for(i=0; i<hALU.num_rows; i++){ for(j=hALU.row[i]; j<hALU.row[i+1]; j++){ if( hALU.col[j] == i ){ //printf("%d %d %d == %d -> %f -->", i, j, hALU.col[j], i, hALU.val[j]); hD.val[i] = MAGMA_D_MAKE( 1.0 / sqrt(fabs(MAGMA_D_REAL(hALU.val[j]))) , 0.0 ); //printf("insert %f at %d\n", hD.val[i], i); } } } CHECK( magma_dmtransfer( hD, &dD, Magma_CPU, Magma_DEV, queue )); magma_dmfree( &hD, queue); CHECK( magma_d_spmm( one, dD, dAL, &dL, queue )); magma_dmfree( &dAL, queue ); magma_dmfree( &dD, queue ); /* // check for diagonal = 1 magma_d_matrix dLt={Magma_CSR}, dLL={Magma_CSR}, LL={Magma_CSR}; CHECK( magma_d_cucsrtranspose( dL, &dLt )); CHECK( magma_dcuspmm( dL, dLt, &dLL )); CHECK( magma_dmtransfer( dLL, &LL, Magma_DEV, Magma_CPU )); //for(i=0; i < hALU.num_rows; i++) { for(i=0; i < 100; i++) { for(j=hALU.row[i]; j < hALU.row[i+1]; j++) { if( hALU.col[j] == i ){ printf("%d %d -> %f -->", i, i, LL.val[j]); } } } */ CHECK( magma_dmtransfer( dL, &hL, Magma_DEV, Magma_CPU, queue )); CHECK( magma_dmconvert( hL, L, Magma_CSR, Magma_CSRCOO, queue )); cleanup: if( info !=0 ){ magma_dmfree( L, queue ); magma_dmfree( U, queue ); } magma_dmfree( &dAU, queue); magma_dmfree( &dALU, queue); magma_dmfree( &dL, queue ); magma_dmfree( &hL, queue ); magma_dmfree( &dAL, queue ); magma_dmfree( &dD, queue ); magma_dmfree( &hD, queue); magma_dmfree( &hALU, queue ); return info; }
magma_int_t magma_dsymbilu( magma_d_matrix *A, magma_int_t levels, magma_d_matrix *L, magma_d_matrix *U, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix A_copy={Magma_CSR}, B={Magma_CSR}; magma_d_matrix hA={Magma_CSR}, CSRCOOA={Magma_CSR}; if( A->memory_location == Magma_CPU && A->storage_type == Magma_CSR ){ CHECK( magma_dmtransfer( *A, &A_copy, Magma_CPU, Magma_CPU, queue )); CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue )); // possibility to scale to unit diagonal //magma_dmscale( &B, Magma_UNITDIAG ); CHECK( magma_dmconvert( B, L, Magma_CSR, Magma_CSR , queue)); CHECK( magma_dmconvert( B, U, Magma_CSR, Magma_CSR, queue )); magma_int_t num_lnnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz; magma_int_t num_unnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz; magma_free_cpu( L->col ); magma_free_cpu( U->col ); CHECK( magma_index_malloc_cpu( &L->col, num_lnnz )); CHECK( magma_index_malloc_cpu( &U->col, num_unnz )); magma_dsymbolic_ilu( levels, A->num_rows, &num_lnnz, &num_unnz, B.row, B.col, L->row, L->col, U->row, U->col ); L->nnz = num_lnnz; U->nnz = num_unnz; magma_free_cpu( L->val ); magma_free_cpu( U->val ); CHECK( magma_dmalloc_cpu( &L->val, L->nnz )); CHECK( magma_dmalloc_cpu( &U->val, U->nnz )); for( magma_int_t i=0; i<L->nnz; i++ ) L->val[i] = MAGMA_D_MAKE( 0.0, 0.0 ); for( magma_int_t i=0; i<U->nnz; i++ ) U->val[i] = MAGMA_D_MAKE( 0.0, 0.0 ); // take the original values (scaled) as initial guess for L for(magma_int_t i=0; i<L->num_rows; i++){ for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){ magma_index_t lcol = B.col[j]; for(magma_int_t k=L->row[i]; k<L->row[i+1]; k++){ if( L->col[k] == lcol ){ L->val[k] = B.val[j]; } } } } // take the original values (scaled) as initial guess for U for(magma_int_t i=0; i<U->num_rows; i++){ for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){ magma_index_t lcol = B.col[j]; for(magma_int_t k=U->row[i]; k<U->row[i+1]; k++){ if( U->col[k] == lcol ){ U->val[k] = B.val[j]; } } } } magma_dmfree( &B, queue ); // fill A with the new structure; magma_free_cpu( A->col ); magma_free_cpu( A->val ); CHECK( magma_index_malloc_cpu( &A->col, L->nnz+U->nnz )); CHECK( magma_dmalloc_cpu( &A->val, L->nnz+U->nnz )); A->nnz = L->nnz+U->nnz; magma_int_t z = 0; for(magma_int_t i=0; i<A->num_rows; i++){ A->row[i] = z; for(magma_int_t j=L->row[i]; j<L->row[i+1]; j++){ A->col[z] = L->col[j]; A->val[z] = L->val[j]; z++; } for(magma_int_t j=U->row[i]; j<U->row[i+1]; j++){ A->col[z] = U->col[j]; A->val[z] = U->val[j]; z++; } } A->row[A->num_rows] = z; // reset the values of A to the original entries for(magma_int_t i=0; i<A->num_rows; i++){ for(magma_int_t j=A_copy.row[i]; j<A_copy.row[i+1]; j++){ magma_index_t lcol = A_copy.col[j]; for(magma_int_t k=A->row[i]; k<A->row[i+1]; k++){ if( A->col[k] == lcol ){ A->val[k] = A_copy.val[j]; } } } } } else { magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; CHECK( magma_dmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue )); CHECK( magma_dmconvert( hA, &CSRCOOA, hA.storage_type, Magma_CSR, queue )); CHECK( magma_dsymbilu( &CSRCOOA, levels, L, U, queue )); magma_dmfree( &hA, queue ); magma_dmfree( A, queue ); CHECK( magma_dmconvert( CSRCOOA, &hA, Magma_CSR, A_storage, queue )); CHECK( magma_dmtransfer( hA, A, Magma_CPU, A_location, queue )); } cleanup: if( info != 0 ){ magma_dmfree( L, queue ); magma_dmfree( U, queue ); } magma_dmfree( &A_copy, queue ); magma_dmfree( &B, queue ); magma_dmfree( &hA, queue ); magma_dmfree( &CSRCOOA, queue ); return info; }
extern "C" magma_int_t magma_dcumilusetup_transpose( magma_d_matrix A, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix Ah1={Magma_CSR}, Ah2={Magma_CSR}; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrLT=NULL; cusparseMatDescr_t descrUT=NULL; // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); // transpose the matrix magma_dmtransfer( precond->L, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransposeconjugate( Ah2, &Ah1, queue ); magma_dmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransfer( Ah2, &(precond->LT), Magma_CPU, Magma_DEV, queue ); magma_dmfree(&Ah2, queue ); magma_dmtransfer( precond->U, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransposeconjugate( Ah2, &Ah1, queue ); magma_dmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransfer( Ah2, &(precond->UT), Magma_CPU, Magma_DEV, queue ); magma_dmfree(&Ah2, queue ); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrLT )); CHECK_CUSPARSE( cusparseSetMatType( descrLT, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrLT, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrLT, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrLT, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoLT )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->LT.num_rows, precond->LT.nnz, descrLT, precond->LT.dval, precond->LT.drow, precond->LT.dcol, precond->cuinfoLT )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrUT )); CHECK_CUSPARSE( cusparseSetMatType( descrUT, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrUT, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrUT, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrUT, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoUT )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->UT.num_rows, precond->UT.nnz, descrUT, precond->UT.dval, precond->UT.drow, precond->UT.dcol, precond->cuinfoUT )); cleanup: cusparseDestroyMatDescr( descrLT ); cusparseDestroyMatDescr( descrUT ); cusparseDestroy( cusparseHandle ); magma_dmfree(&Ah1, queue ); magma_dmfree(&Ah2, queue ); return info; }
extern "C" magma_int_t magma_dqmr_merge( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_QMRMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; // local variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE; // solver variables double nom0, r0, res=0, nomb; double rho = c_one, rho1 = c_one, eta = -c_one , pds = c_one, thet = c_one, thet1 = c_one, epsilon = c_one, beta = c_one, delta = c_one, pde = c_one, rde = c_one, gamm = c_one, gamm1 = c_one, psi = c_one; magma_int_t dofs = A.num_rows* b.num_cols; // need to transpose the matrix magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; // GPU workspace magma_d_matrix r={Magma_CSR}, r_tld={Magma_CSR}, v={Magma_CSR}, w={Magma_CSR}, wt={Magma_CSR}, d={Magma_CSR}, s={Magma_CSR}, z={Magma_CSR}, q={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, y={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_dcopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, y.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, v.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, wt.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, z.dval, 1, queue ); // transpose the matrix magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransposeconjugate( Ah2, &Ah1, queue ); magma_dmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_dmfree(&Ah1, queue ); magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_dmfree(&Ah2, queue ); nomb = magma_dnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } psi = magma_dsqrt( magma_ddot( dofs, z.dval, 1, z.dval, 1, queue )); rho = magma_dsqrt( magma_ddot( dofs, y.dval, 1, y.dval, 1, queue )); // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_dqmr_1( r.num_rows, r.num_cols, rho, psi, y.dval, z.dval, v.dval, w.dval, queue ); //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; if( magma_d_isnan_inf( rho ) || magma_d_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } // delta = z' * y; delta = magma_ddot( dofs, z.dval, 1, y.dval, 1, queue ); if( magma_d_isnan_inf( delta ) ){ info = MAGMA_DIVERGENCE; break; } // no precond: yt = y, zt = z //magma_dcopy( dofs, y.dval, 1, yt.dval, 1 ); //magma_dcopy( dofs, z.dval, 1, zt.dval, 1 ); if( solver_par->numiter == 1 ){ // p = y; // q = z; magma_dcopy( dofs, y.dval, 1, p.dval, 1, queue ); magma_dcopy( dofs, z.dval, 1, q.dval, 1, queue ); } else{ pde = psi * delta / epsilon; rde = rho * MAGMA_D_CONJ(delta/epsilon); // p = y - pde * p // q = z - rde * q magma_dqmr_2( r.num_rows, r.num_cols, pde, rde, y.dval, z.dval, p.dval, q.dval, queue ); } if( magma_d_isnan_inf( rho ) || magma_d_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } CHECK( magma_d_spmv( c_one, A, p, c_zero, pt, queue )); solver_par->spmv_count++; // epsilon = q' * pt; epsilon = magma_ddot( dofs, q.dval, 1, pt.dval, 1, queue ); beta = epsilon / delta; if( magma_d_isnan_inf( epsilon ) || magma_d_isnan_inf( beta ) ){ info = MAGMA_DIVERGENCE; break; } // v = pt - beta * v // y = v magma_dqmr_3( r.num_rows, r.num_cols, beta, pt.dval, v.dval, y.dval, queue ); rho1 = rho; // rho = norm(y); rho = magma_dsqrt( magma_ddot( dofs, y.dval, 1, y.dval, 1, queue )); // wt = A' * q - beta' * w; CHECK( magma_d_spmv( c_one, AT, q, c_zero, wt, queue )); solver_par->spmv_count++; magma_daxpy( dofs, - MAGMA_D_CONJ( beta ), w.dval, 1, wt.dval, 1, queue ); // no precond: z = wt magma_dcopy( dofs, wt.dval, 1, z.dval, 1, queue ); thet1 = thet; thet = rho / (gamm * MAGMA_D_MAKE( MAGMA_D_ABS(beta), 0.0 )); gamm1 = gamm; gamm = c_one / magma_dsqrt(c_one + thet*thet); eta = - eta * rho1 * gamm * gamm / (beta * gamm1 * gamm1); if( magma_d_isnan_inf( thet ) || magma_d_isnan_inf( gamm ) || magma_d_isnan_inf( eta ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter == 1 ){ // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_dqmr_4( r.num_rows, r.num_cols, eta, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } else{ pds = (thet1 * gamm) * (thet1 * gamm); // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_dqmr_5( r.num_rows, r.num_cols, eta, pds, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } // psi = norm(z); psi = magma_dsqrt( magma_ddot( dofs, z.dval, 1, z.dval, 1, queue ) ); res = magma_dnrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_dqmr_1( r.num_rows, r.num_cols, rho, psi, y.dval, z.dval, v.dval, w.dval, queue ); if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&r_tld, queue ); magma_dmfree(&v, queue ); magma_dmfree(&w, queue ); magma_dmfree(&wt, queue ); magma_dmfree(&d, queue ); magma_dmfree(&s, queue ); magma_dmfree(&z, queue ); magma_dmfree(&q, queue ); magma_dmfree(&p, queue ); magma_dmfree(&pt, queue ); magma_dmfree(&y, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&Ah1, queue ); magma_dmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_dqmr_merge */
extern "C" magma_int_t magma_dcumiccsetup( magma_d_matrix A, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; #if CUDA_VERSION >= 7000 csric02Info_t info_M=NULL; void *pBuffer = NULL; #endif magma_d_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, U={Magma_CSR}; CHECK( magma_dmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); U.diagorder_type = Magma_VALUE; CHECK( magma_dmconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue )); // in case using fill-in if( precond->levels > 0 ){ magma_d_matrix hAL={Magma_CSR}, hAUt={Magma_CSR}; CHECK( magma_dsymbilu( &hACSR, precond->levels, &hAL, &hAUt, queue )); magma_dmfree(&hAL, queue); magma_dmfree(&hAUt, queue); } CHECK( magma_dmconvert( hACSR, &U, Magma_CSR, Magma_CSRL, queue )); magma_dmfree( &hACSR, queue ); CHECK( magma_dmtransfer(U, &(precond->M), Magma_CPU, Magma_DEV, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) )); // use kernel to manually check for zeros n the diagonal CHECK( magma_ddiagcheck( precond->M, queue ) ); #if CUDA_VERSION >= 7000 // this version has the bug fixed where a zero on the diagonal causes a crash CHECK_CUSPARSE( cusparseCreateCsric02Info(&info_M) ); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); int buffersize; int structural_zero; int numerical_zero; CHECK_CUSPARSE( cusparseDcsric02_bufferSize( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, &buffersize ) ); CHECK( magma_malloc((void**)&pBuffer, buffersize) ); CHECK_CUSPARSE( cusparseDcsric02_analysis( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer )); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &numerical_zero ) ); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &structural_zero ) ); CHECK_CUSPARSE( cusparseDcsric02( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer) ); #else // this version contains the bug but is needed for backward compability CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_SYMMETRIC )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrA, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrA, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK_CUSPARSE( cusparseDcsric0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); #endif CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // copy the matrix to precond->L and (transposed) to precond->U CHECK( magma_dmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue )); CHECK( magma_dmtranspose( precond->L, &(precond->U), queue )); // extract the diagonal of L into precond->d CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_dvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_dvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); } /* // to enable also the block-asynchronous iteration for the triangular solves CHECK( magma_dmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue )); hA.storage_type = Magma_CSR; magma_d_matrix hD, hR, hAt CHECK( magma_dcsrsplit( 256, hA, &hD, &hR, queue )); CHECK( magma_dmtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hD, queue ); magma_dmfree(&hR, queue ); CHECK( magma_d_cucsrtranspose( hA, &hAt, queue )); CHECK( magma_dcsrsplit( 256, hAt, &hD, &hR, queue )); CHECK( magma_dmtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hD, queue ); magma_dmfree(&hR, queue ); magma_dmfree(&hA, queue ); magma_dmfree(&hAt, queue ); */ cleanup: #if CUDA_VERSION >= 7000 magma_free( pBuffer ); cusparseDestroyCsric02Info( info_M ); #endif cusparseDestroySolveAnalysisInfo( precond->cuinfo ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroyMatDescr( descrA ); cusparseDestroy( cusparseHandle ); magma_dmfree(&U, queue ); magma_dmfree(&hA, queue ); return info; }
extern "C" magma_int_t magma_dtfqmr_unrolled( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_TFQMR; solver_par->numiter = 0; solver_par->spmv_count = 0; solver_par->spmv_count = 0; // local variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE; // solver variables double nom0, r0, res, nomb; double rho = c_one, rho_l = c_one, eta = c_zero , c = c_zero , theta = c_zero , tau = c_zero, alpha = c_one, beta = c_zero, sigma = c_zero; magma_int_t dofs = A.num_rows* b.num_cols; // GPU workspace magma_d_matrix r={Magma_CSR}, r_tld={Magma_CSR}, d={Magma_CSR}, w={Magma_CSR}, v={Magma_CSR}, u_mp1={Magma_CSR}, u_m={Magma_CSR}, Au={Magma_CSR}, Ad={Magma_CSR}, Au_new={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &u_mp1,Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_dvinit( &r_tld,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &u_m, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); CHECK( magma_dvinit( &Ad, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &Au_new, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &Au, Magma_DEV, A.num_rows, b.num_cols, c_one, queue )); // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_dcopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, w.dval, 1, queue ); magma_dcopy( dofs, r.dval, 1, u_mp1.dval, 1, queue ); CHECK( magma_d_spmv( c_one, A, u_mp1, c_zero, v, queue )); // v = A u magma_dcopy( dofs, v.dval, 1, Au.dval, 1, queue ); nomb = magma_dnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } tau = magma_dsqrt( magma_ddot( dofs, r.dval, 1, r_tld.dval, 1, queue ) ); rho = magma_ddot( dofs, r.dval, 1, r_tld.dval, 1, queue ); rho_l = rho; //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; // do this every iteration as unrolled alpha = rho / magma_ddot( dofs, v.dval, 1, r_tld.dval, 1, queue ); sigma = theta * theta / alpha * eta; magma_daxpy( dofs, -alpha, v.dval, 1, u_mp1.dval, 1, queue ); // u_mp1 = u_mp_1 - alpha*v; magma_daxpy( dofs, -alpha, Au.dval, 1, w.dval, 1, queue ); // w = w - alpha*Au; magma_dscal( dofs, sigma, d.dval, 1, queue ); magma_daxpy( dofs, c_one, u_mp1.dval, 1, d.dval, 1, queue ); // d = u_mp1 + sigma*d; //magma_dscal( dofs, sigma, Ad.dval, 1, queue ); //magma_daxpy( dofs, c_one, Au.dval, 1, Ad.dval, 1, queue ); // Ad = Au + sigma*Ad; theta = magma_dsqrt( magma_ddot(dofs, w.dval, 1, w.dval, 1, queue ) ) / tau; c = c_one / magma_dsqrt( c_one + theta*theta ); tau = tau * theta *c; eta = c * c * alpha; sigma = theta * theta / alpha * eta; printf("sigma: %f+%fi\n", MAGMA_D_REAL(sigma), MAGMA_D_IMAG(sigma) ); CHECK( magma_d_spmv( c_one, A, d, c_zero, Ad, queue )); // Au_new = A u_mp1 solver_par->spmv_count++; magma_daxpy( dofs, eta, d.dval, 1, x->dval, 1, queue ); // x = x + eta * d magma_daxpy( dofs, -eta, Ad.dval, 1, r.dval, 1, queue ); // r = r - eta * Ad // here starts the second part of the loop ################################# magma_daxpy( dofs, -alpha, Au.dval, 1, w.dval, 1, queue ); // w = w - alpha*Au; magma_dscal( dofs, sigma, d.dval, 1, queue ); magma_daxpy( dofs, c_one, u_mp1.dval, 1, d.dval, 1, queue ); // d = u_mp1 + sigma*d; magma_dscal( dofs, sigma, Ad.dval, 1, queue ); magma_daxpy( dofs, c_one, Au.dval, 1, Ad.dval, 1, queue ); // Ad = Au + sigma*Ad; theta = magma_dsqrt( magma_ddot(dofs, w.dval, 1, w.dval, 1, queue ) ) / tau; c = c_one / magma_dsqrt( c_one + theta*theta ); tau = tau * theta *c; eta = c * c * alpha; magma_daxpy( dofs, eta, d.dval, 1, x->dval, 1, queue ); // x = x + eta * d magma_daxpy( dofs, -eta, Ad.dval, 1, r.dval, 1, queue ); // r = r - eta * Ad res = magma_dnrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } // do this every loop as unrolled rho_l = rho; rho = magma_ddot( dofs, w.dval, 1, r_tld.dval, 1, queue ); beta = rho / rho_l; magma_dscal( dofs, beta, u_mp1.dval, 1, queue ); magma_daxpy( dofs, c_one, w.dval, 1, u_mp1.dval, 1, queue ); // u_mp1 = w + beta*u_mp1; CHECK( magma_d_spmv( c_one, A, u_mp1, c_zero, Au_new, queue )); // Au_new = A u_mp1 solver_par->spmv_count++; // do this every loop as unrolled magma_dscal( dofs, beta*beta, v.dval, 1, queue ); magma_daxpy( dofs, beta, Au.dval, 1, v.dval, 1, queue ); magma_daxpy( dofs, c_one, Au_new.dval, 1, v.dval, 1, queue ); // v = Au_new + beta*(Au+beta*v); magma_dcopy( dofs, Au_new.dval, 1, Au.dval, 1, queue ); } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == 0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&r_tld, queue ); magma_dmfree(&d, queue ); magma_dmfree(&w, queue ); magma_dmfree(&v, queue ); magma_dmfree(&u_m, queue ); magma_dmfree(&u_mp1, queue ); magma_dmfree(&d, queue ); magma_dmfree(&Au, queue ); magma_dmfree(&Au_new, queue ); magma_dmfree(&Ad, queue ); solver_par->info = info; return info; } /* magma_dfqmr_unrolled */
extern "C" magma_int_t magma_dmtransfer( magma_d_matrix A, magma_d_matrix *B, magma_location_t src, magma_location_t dst, magma_queue_t queue ) { magma_int_t info = 0; B->val = NULL; B->diag = NULL; B->row = NULL; B->rowidx = NULL; B->col = NULL; B->blockinfo = NULL; B->dval = NULL; B->ddiag = NULL; B->drow = NULL; B->drowidx = NULL; B->dcol = NULL; B->diag = NULL; B->ddiag = NULL; B->list = NULL; B->dlist = NULL; // first case: copy matrix from host to device if ( src == Magma_CPU && dst == Magma_DEV ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.nnz, A.rowidx, 1, B->drowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.nnz, A.rowidx, 1, B->drowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dsetvector( A.num_rows * A.max_nnz_row, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * A.max_nnz_row, A.col, 1, B->dcol, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dsetvector( A.num_rows * A.max_nnz_row, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * A.max_nnz_row, A.col, 1, B->dcol, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->drow, A.num_rows )); // data transfer magma_dsetvector( A.num_rows * rowlength, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * rowlength, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.num_rows, A.row, 1, B->drow, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.numblocks + 1 )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.numblocks + 1, A.row, 1, B->drow, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc( &B->dval, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc( &B->drow, r_blocks + 1 )); CHECK( magma_index_malloc( &B->dcol, A.numblocks )); // data transfer magma_dsetvector( size_b * size_b * A.numblocks, A.val, 1, B->dval, 1, queue ); magma_index_setvector( r_blocks + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.numblocks, A.col, 1, B->dcol, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.num_cols )); // data transfer magma_dsetvector( A.num_rows * A.num_cols, A.val, 1, B->dval, 1, queue ); } } // second case: copy matrix from host to host else if ( src == Magma_CPU && dst == Magma_CPU ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.num_rows+1; i++ ) { B->row[i] = A.row[i]; } } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; B->rowidx[i] = A.rowidx[i]; } } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; B->rowidx[i] = A.rowidx[i]; } for( magma_int_t i=0; i<A.num_rows+1; i++ ) { B->row[i] = A.row[i]; } } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.max_nnz_row; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.max_nnz_row; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc_cpu( &B->val, rowlength * A.num_rows )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows )); CHECK( magma_index_malloc_cpu( &B->col, rowlength * A.num_rows )); // data transfer for( magma_int_t i=0; i<A.num_rows*rowlength; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.num_rows; i++ ) { B->row[i] = A.row[i]; } } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; B->numblocks = A.numblocks; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.numblocks + 1 )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.numblocks+1; i++ ) { B->row[i] = A.row[i]; } } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc_cpu( &B->val, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc_cpu( &B->row, r_blocks + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.numblocks )); // data transfer //magma_dsetvector( size_b * size_b * A.numblocks, A.val, 1, B->dval, 1, queue ); for( magma_int_t i=0; i<size_b*size_b*A.numblocks; i++ ) { B->dval[i] = A.val[i]; } //magma_index_setvector( r_blocks + 1, A.row, 1, B->drow, 1, queue ); for( magma_int_t i=0; i<r_blocks+1; i++ ) { B->drow[i] = A.row[i]; } //magma_index_setvector( A.numblocks, A.col, 1, B->dcol, 1, queue ); for( magma_int_t i=0; i<A.numblocks; i++ ) { B->dcol[i] = A.col[i]; } } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.num_cols )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.num_cols; i++ ) { B->val[i] = A.val[i]; } } } // third case: copy matrix from device to host else if ( src == Magma_DEV && dst == Magma_CPU ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.nnz, A.drowidx, 1, B->rowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.nnz, A.drowidx, 1, B->rowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer magma_dgetvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->col, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer magma_dgetvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->col, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc_cpu( &B->val, rowlength * A.num_rows )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows )); CHECK( magma_index_malloc_cpu( &B->col, rowlength * A.num_rows )); // data transfer magma_dgetvector( A.num_rows * rowlength, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * rowlength, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.num_rows, A.drow, 1, B->row, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.numblocks + 1 )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.numblocks + 1, A.drow, 1, B->row, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc_cpu( &B->val, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc_cpu( &B->row, r_blocks + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.numblocks )); // data transfer magma_dgetvector( size_b * size_b * A.numblocks, A.dval, 1, B->val, 1, queue ); magma_index_getvector( r_blocks + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.numblocks, A.dcol, 1, B->col, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.num_cols )); // data transfer magma_dgetvector( A.num_rows * A.num_cols, A.dval, 1, B->val, 1, queue ); } } // fourth case: copy matrix from device to device else if ( src == Magma_DEV && dst == Magma_DEV ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.nnz, A.drowidx, 1, B->drowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.nnz, A.drowidx, 1, B->drowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dcopyvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->dcol, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dcopyvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->dcol, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->drow, A.num_rows )); // data transfer magma_dcopyvector( A.num_rows * rowlength, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * rowlength, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.num_rows, A.drow, 1, B->drow, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.numblocks + 1 )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.numblocks + 1, A.drow, 1, B->drow, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc( &B->dval, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc( &B->drow, r_blocks + 1 )); CHECK( magma_index_malloc( &B->dcol, A.numblocks )); // data transfer magma_dcopyvector( size_b * size_b * A.numblocks, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( r_blocks + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.numblocks, A.dcol, 1, B->dcol, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.num_cols )); // data transfer magma_dcopyvector( A.num_rows * A.num_cols, A.dval, 1, B->dval, 1, queue ); } } cleanup: if( info != 0 ){ magma_dmfree( B, queue ); } return info; }
extern "C" magma_int_t magma_dcumilugeneratesolverinfo( magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_d_matrix hA={Magma_CSR}, hL={Magma_CSR}, hU={Magma_CSR}; if (precond->L.memory_location != Magma_DEV ){ CHECK( magma_dmtransfer( precond->M, &hA, precond->M.memory_location, Magma_CPU, queue )); hL.diagorder_type = Magma_UNITY; CHECK( magma_dmconvert( hA, &hL , Magma_CSR, Magma_CSRL, queue )); hU.diagorder_type = Magma_VALUE; CHECK( magma_dmconvert( hA, &hU , Magma_CSR, Magma_CSRU, queue )); CHECK( magma_dmtransfer( hL, &(precond->L), Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hU, &(precond->U), Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hA, queue ); magma_dmfree(&hL, queue ); magma_dmfree(&hU, queue ); } // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.dval, precond->L.drow, precond->L.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.dval, precond->U.drow, precond->U.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // extract the diagonal of L into precond->d CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_dvinit( &precond->work1, Magma_DEV, precond->U.num_rows, 1, MAGMA_D_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_dvinit( &precond->work2, Magma_DEV, precond->U.num_rows, 1, MAGMA_D_ZERO, queue )); } cleanup: cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); return info; }
extern "C" magma_int_t magma_dbicgstab_merge( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BICGSTAB; solver_par->numiter = 0; solver_par->spmv_count = 0; // some useful variables double c_zero = MAGMA_D_ZERO; double c_one = MAGMA_D_ONE; magma_int_t dofs = A.num_rows * b.num_cols; // workspace magma_d_matrix r={Magma_CSR}, rr={Magma_CSR}, p={Magma_CSR}, v={Magma_CSR}, s={Magma_CSR}, t={Magma_CSR}, d1={Magma_CSR}, d2={Magma_CSR}; CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &rr,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &t, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &d1, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &d2, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables double alpha, beta, omega, rho_old, rho_new; double nom, betanom, nom0, r0, res, nomb; res=0; //double den; // solver setup CHECK( magma_dresidualvec( A, b, *x, &r, &nom0, queue)); magma_dcopy( dofs, r.dval, 1, rr.dval, 1, queue ); // rr = r betanom = nom0; nom = nom0*nom0; rho_new = magma_ddot( dofs, r.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho_old = omega = alpha = MAGMA_D_MAKE( 1.0, 0. ); solver_par->init_res = nom0; CHECK( magma_d_spmv( c_one, A, r, c_zero, v, queue )); // z = A r //den = MAGMA_D_REAL( magma_ddot( dofs, v.dval, 1, r.dval, 1), queue ); // den = z' * r nomb = magma_dnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } if ( nom < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; rho_old = rho_new; // rho_old=rho rho_new = magma_ddot( dofs, rr.dval, 1, r.dval, 1, queue ); // rho=<rr,r> beta = rho_new/rho_old * alpha/omega; // beta=rho/rho_old *alpha/omega if( magma_d_isnan_inf( beta ) ){ info = MAGMA_DIVERGENCE; break; } // p = r + beta * ( p - omega * v ) magma_dbicgstab_1( r.num_rows, r.num_cols, beta, omega, r.dval, v.dval, p.dval, queue ); CHECK( magma_d_spmv( c_one, A, p, c_zero, v, queue )); // v = Ap solver_par->spmv_count++; //alpha = rho_new / tmpval; alpha = rho_new /magma_ddot( dofs, rr.dval, 1, v.dval, 1, queue ); if( magma_d_isnan_inf( alpha ) ){ info = MAGMA_DIVERGENCE; break; } // s = r - alpha v magma_dbicgstab_2( r.num_rows, r.num_cols, alpha, r.dval, v.dval, s.dval, queue ); CHECK( magma_d_spmv( c_one, A, s, c_zero, t, queue )); // t=As solver_par->spmv_count++; omega = magma_ddot( dofs, t.dval, 1, s.dval, 1, queue ) // omega = <s,t>/<t,t> / magma_ddot( dofs, t.dval, 1, t.dval, 1, queue ); // x = x + alpha * p + omega * s // r = s - omega * t magma_dbicgstab_3( r.num_rows, r.num_cols, alpha, omega, p.dval, s.dval, t.dval, x->dval, r.dval, queue ); res = betanom = magma_dnrm2( dofs, r.dval, 1, queue ); nom = betanom*betanom; if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&rr, queue ); magma_dmfree(&p, queue ); magma_dmfree(&v, queue ); magma_dmfree(&s, queue ); magma_dmfree(&t, queue ); magma_dmfree(&d1, queue ); magma_dmfree(&d2, queue ); solver_par->info = info; return info; } /* magma_dbicgstab_merge */
extern "C" magma_int_t magma_d_spmv( double alpha, magma_d_matrix A, magma_d_matrix x, double beta, magma_d_matrix y, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix x2={Magma_CSR}; cusparseHandle_t cusparseHandle = 0; cusparseMatDescr_t descr = 0; // make sure RHS is a dense matrix if ( x.storage_type != Magma_DENSE ) { printf("error: only dense vectors are supported for SpMV.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if ( A.memory_location != x.memory_location || x.memory_location != y.memory_location ) { printf("error: linear algebra objects are not located in same memory!\n"); printf("memory locations are: %d %d %d\n", A.memory_location, x.memory_location, y.memory_location ); info = MAGMA_ERR_INVALID_PTR; goto cleanup; } // DEV case if ( A.memory_location == Magma_DEV ) { if ( A.num_cols == x.num_rows && x.num_cols == 1 ) { if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descr )); CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO )); cusparseDcsrmv( cusparseHandle,CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, &beta, y.dval ); } else if ( A.storage_type == Magma_ELL ) { //printf("using ELLPACKT kernel for SpMV: "); CHECK( magma_dgeelltmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_ELLPACKT ) { //printf("using ELL kernel for SpMV: "); CHECK( magma_dgeellmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, x.dval, beta, y.dval, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_ELLRT ) { //printf("using ELLRT kernel for SpMV: "); CHECK( magma_dgeellrtmv( MagmaNoTrans, A.num_rows, A.num_cols, A.max_nnz_row, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, A.alignment, A.blocksize, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_SELLP ) { //printf("using SELLP kernel for SpMV: "); CHECK( magma_dgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_DENSE ) { //printf("using DENSE kernel for SpMV: "); magmablas_dgemv( MagmaNoTrans, A.num_rows, A.num_cols, alpha, A.dval, A.num_rows, x.dval, 1, beta, y.dval, 1, queue ); //printf("done.\n"); } else if ( A.storage_type == Magma_SPMVFUNCTION ) { //printf("using DENSE kernel for SpMV: "); CHECK( magma_dcustomspmv( alpha, x, beta, y, queue )); //printf("done.\n"); } else if ( A.storage_type == Magma_BCSR ) { //printf("using CUSPARSE BCSR kernel for SpMV: "); // CUSPARSE context // cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; int mb = magma_ceildiv( A.num_rows, A.blocksize ); int nb = magma_ceildiv( A.num_cols, A.blocksize ); CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descr )); cusparseDbsrmv( cusparseHandle, dirA, CUSPARSE_OPERATION_NON_TRANSPOSE, mb, nb, A.numblocks, &alpha, descr, A.dval, A.drow, A.dcol, A.blocksize, x.dval, &beta, y.dval ); } else { printf("error: format not supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } } else if ( A.num_cols < x.num_rows || x.num_cols > 1 ) { magma_int_t num_vecs = x.num_rows / A.num_cols * x.num_cols; if ( A.storage_type == Magma_CSR ) { CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descr )); CHECK_CUSPARSE( cusparseSetMatType( descr, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descr, CUSPARSE_INDEX_BASE_ZERO )); if ( x.major == MagmaColMajor) { cusparseDcsrmm(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, num_vecs, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, A.num_cols, &beta, y.dval, A.num_cols); } else if ( x.major == MagmaRowMajor) { /*cusparseDcsrmm2(cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, A.num_rows, num_vecs, A.num_cols, A.nnz, &alpha, descr, A.dval, A.drow, A.dcol, x.dval, A.num_cols, &beta, y.dval, A.num_cols); */ } } else if ( A.storage_type == Magma_SELLP ) { if ( x.major == MagmaRowMajor) { CHECK( magma_dmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x.dval, beta, y.dval, queue )); } else if ( x.major == MagmaColMajor) { // transpose first to row major CHECK( magma_dvtranspose( x, &x2, queue )); CHECK( magma_dmgesellpmv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, A.blocksize, A.numblocks, A.alignment, alpha, A.dval, A.dcol, A.drow, x2.dval, beta, y.dval, queue )); } } /*if ( A.storage_type == Magma_DENSE ) { //printf("using DENSE kernel for SpMV: "); magmablas_dmgemv( MagmaNoTrans, A.num_rows, A.num_cols, num_vecs, alpha, A.dval, A.num_rows, x.dval, 1, beta, y.dval, 1 ); //printf("done.\n"); }*/ else { printf("error: format not supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } } } // CPU case missing! else { printf("error: CPU not yet supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: cusparseDestroyMatDescr( descr ); cusparseDestroy( cusparseHandle ); cusparseHandle = 0; descr = 0; magma_dmfree(&x2, queue ); return info; }
extern "C" magma_int_t magma_dcg_merge( magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x, magma_d_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_CGMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; // solver variables double alpha, beta, gamma, rho, tmp1, *skp_h={0}; double nom, nom0, betanom, den, nomb; // some useful variables double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE; magma_int_t dofs = A.num_rows*b.num_cols; magma_d_matrix r={Magma_CSR}, d={Magma_CSR}, z={Magma_CSR}, B={Magma_CSR}, C={Magma_CSR}; double *d1=NULL, *d2=NULL, *skp=NULL; // GPU workspace CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_dmalloc( &d1, dofs*(1) )); CHECK( magma_dmalloc( &d2, dofs*(1) )); // array for the parameters CHECK( magma_dmalloc( &skp, 6 )); // skp = [alpha|beta|gamma|rho|tmp1|tmp2] // solver setup magma_dscal( dofs, c_zero, x->dval, 1, queue ); // x = 0 //CHECK( magma_dresidualvec( A, b, *x, &r, nom0, queue)); magma_dcopy( dofs, b.dval, 1, r.dval, 1, queue ); // r = b magma_dcopy( dofs, r.dval, 1, d.dval, 1, queue ); // d = r nom0 = betanom = magma_dnrm2( dofs, r.dval, 1, queue ); nom = nom0 * nom0; // nom = r' * r CHECK( magma_d_spmv( c_one, A, d, c_zero, z, queue )); // z = A d den = MAGMA_D_ABS( magma_ddot( dofs, d.dval, 1, z.dval, 1, queue ) ); // den = d'* z solver_par->init_res = nom0; nomb = magma_dnrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } // array on host for the parameters CHECK( magma_dmalloc_cpu( &skp_h, 6 )); alpha = rho = gamma = tmp1 = c_one; beta = magma_ddot( dofs, r.dval, 1, r.dval, 1, queue ); skp_h[0]=alpha; skp_h[1]=beta; skp_h[2]=gamma; skp_h[3]=rho; skp_h[4]=tmp1; skp_h[5]=MAGMA_D_MAKE(nom, 0.0); magma_dsetvector( 6, skp_h, 1, skp, 1, queue ); if( nom0 < solver_par->atol || nom0/nomb < solver_par->rtol ){ info = MAGMA_SUCCESS; goto cleanup; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t) nom0; solver_par->timing[0] = 0.0; } // check positive definite if (den <= 0.0) { info = MAGMA_NONSPD; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; // computes SpMV and dot product CHECK( magma_dcgmerge_spmv1( A, d1, d2, d.dval, z.dval, skp, queue )); solver_par->spmv_count++; // updates x, r, computes scalars and updates d CHECK( magma_dcgmerge_xrbeta( dofs, d1, d2, x->dval, r.dval, d.dval, z.dval, skp, queue )); // check stopping criterion (asynchronous copy) magma_dgetvector( 1 , skp+1, 1, skp_h+1, 1, queue ); betanom = sqrt(MAGMA_D_ABS(skp_h[1])); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( betanom < solver_par->atol || betanom/nomb < solver_par->rtol ) { break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_dresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = betanom; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->atol || solver_par->iter_res/solver_par->init_res < solver_par->rtol ){ info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) betanom; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } solver_par->info = MAGMA_DIVERGENCE; } cleanup: magma_dmfree(&r, queue ); magma_dmfree(&z, queue ); magma_dmfree(&d, queue ); magma_dmfree(&B, queue ); magma_dmfree(&C, queue ); magma_free( d1 ); magma_free( d2 ); magma_free( skp ); magma_free_cpu( skp_h ); solver_par->info = info; return info; } /* magma_dcg_merge */