magma_int_t magma_zcsrget( magma_z_matrix A, magma_int_t *m, magma_int_t *n, magma_index_t **row, magma_index_t **col, magmaDoubleComplex **val, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix A_CPU={Magma_CSR}, A_CSR={Magma_CSR}; if ( A.memory_location == Magma_CPU && A.storage_type == Magma_CSR ) { *m = A.num_rows; *n = A.num_cols; *val = A.val; *col = A.col; *row = A.row; } else { CHECK( magma_zmtransfer( A, &A_CPU, A.memory_location, Magma_CPU, queue )); CHECK( magma_zmconvert( A_CPU, &A_CSR, A_CPU.storage_type, Magma_CSR, queue )); CHECK( magma_zcsrget( A_CSR, m, n, row, col, val, queue )); } cleanup: magma_zmfree( &A_CSR, queue ); magma_zmfree( &A_CPU, queue ); return info; }
extern "C" magma_int_t magma_zmcsrcompressor( magma_z_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix B={Magma_CSR}; magma_z_matrix hA={Magma_CSR}, CSRA={Magma_CSR}; if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSR ) { CHECK( magma_zmconvert( *A, &B, Magma_CSR, Magma_CSR, queue )); magma_free_cpu( A->row ); magma_free_cpu( A->col ); magma_free_cpu( A->val ); CHECK( magma_z_csr_compressor(&B.val, &B.row, &B.col, &A->val, &A->row, &A->col, &A->num_rows, queue )); A->nnz = A->row[A->num_rows]; } else { magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; CHECK( magma_zmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue )); CHECK( magma_zmconvert( hA, &CSRA, hA.storage_type, Magma_CSR, queue )); CHECK( magma_zmcsrcompressor( &CSRA, queue )); magma_zmfree( &hA, queue ); magma_zmfree( A, queue ); CHECK( magma_zmconvert( CSRA, &hA, Magma_CSR, A_storage, queue )); CHECK( magma_zmtransfer( hA, A, Magma_CPU, A_location, queue )); magma_zmfree( &hA, queue ); magma_zmfree( &CSRA, queue ); } cleanup: magma_zmfree( &hA, queue ); magma_zmfree( &CSRA, queue ); magma_zmfree( &B, queue ); return info; }
extern "C" magma_int_t magma_zmshrink( magma_z_matrix A, magma_z_matrix *B, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, hB={Magma_CSR}, hBCSR={Magma_CSR}; if( A.num_rows<=A.num_cols){ if( A.memory_location == Magma_CPU && A.storage_type == Magma_CSR ){ CHECK( magma_zmconvert( A, B, Magma_CSR, Magma_CSR, queue )); for(magma_int_t i=0; i<A.nnz; i++){ if( B->col[i] >= A.num_rows ){ B->val[i] = MAGMA_Z_ZERO; } } CHECK( magma_zmcsrcompressor( B, queue ) ); B->num_cols = B->num_rows; } else { CHECK( magma_zmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); CHECK( magma_zmconvert( hA, &hACSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_zmshrink( hACSR, &hBCSR, queue )); CHECK( magma_zmconvert( hBCSR, &hB, Magma_CSR, A.storage_type, queue )); CHECK( magma_zmtransfer( hB, B, Magma_CPU, A.memory_location, queue )); } } else { printf("%% error: A has too many rows: m > n.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } cleanup: magma_zmfree( &hA, queue ); magma_zmfree( &hB, queue ); magma_zmfree( &hACSR, queue ); magma_zmfree( &hBCSR, queue ); return info; }
magma_int_t magma_zmLdiagadd( magma_z_matrix *L, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix LL={Magma_CSR}; if( L->row[1]==1 ){ // lower triangular with unit diagonal //printf("L lower triangular.\n"); LL.diagorder_type = Magma_UNITY; CHECK( magma_zmconvert( *L, &LL, Magma_CSR, Magma_CSRL, queue )); } else if ( L->row[1]==0 ){ // strictly lower triangular //printf("L strictly lower triangular.\n"); CHECK( magma_zmtransfer( *L, &LL, Magma_CPU, Magma_CPU, queue )); magma_free_cpu( LL.col ); magma_free_cpu( LL.val ); LL.nnz = L->nnz+L->num_rows; CHECK( magma_zmalloc_cpu( &LL.val, LL.nnz )); CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz )); magma_int_t z=0; for( magma_int_t i=0; i<L->num_rows; i++){ LL.row[i] = z; for( magma_int_t j=L->row[i]; j<L->row[i+1]; j++){ LL.val[z] = L->val[j]; LL.col[z] = L->col[j]; z++; } // add unit diagonal LL.val[z] = MAGMA_Z_MAKE(1.0, 0.0); LL.col[z] = i; z++; } LL.row[LL.num_rows] = z; LL.nnz = z; } else { printf("error: L neither lower nor strictly lower triangular!\n"); } magma_zmfree( L, queue ); CHECK( magma_zmtransfer(LL, L, Magma_CPU, Magma_CPU, queue )); cleanup: if( info != 0 ){ magma_zmfree( L, queue ); } magma_zmfree( &LL, queue ); return info; }
extern "C" magma_int_t magma_zbaiter_overlap( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_BAITERO; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO; // initial residual real_Double_t tempo1, tempo2, runtime=0; double residual; magma_int_t localiter = precond_par->maxiter; magma_z_matrix Ah={Magma_CSR}, ACSR={Magma_CSR}, A_d={Magma_CSR}, r={Magma_CSR}, D={Magma_CSR}, R={Magma_CSR}; // setup magma_int_t matrices; matrices = precond_par->levels; struct magma_z_matrix D_d[ 256 ]; struct magma_z_matrix R_d[ 256 ]; magma_int_t overlap; magma_int_t blocksize = 256; if( matrices==2 || matrices==4 || matrices==8 || matrices==16 || matrices==32 || matrices==64 || matrices==128 ){ overlap = blocksize/matrices; }else if( matrices == 1){ overlap = 0; }else{ printf("error: overlap ratio not supported.\n"); goto cleanup; } CHECK( magma_zmtransfer( A, &Ah, A.memory_location, Magma_CPU, queue )); CHECK( magma_zmconvert( Ah, &ACSR, Ah.storage_type, Magma_CSR, queue )); CHECK( magma_zmtransfer( ACSR, &A_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zresidualvec( A_d, b, *x, &r, &residual, queue)); solver_par->init_res = residual; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t) residual; } // setup for( int i=0; i<matrices; i++ ){ CHECK( magma_zcsrsplit( i*overlap, 256, ACSR, &D, &R, queue )); CHECK( magma_zmtransfer( D, &D_d[i], Magma_CPU, Magma_DEV, queue )); CHECK( magma_zmtransfer( R, &R_d[i], Magma_CPU, Magma_DEV, queue )); magma_zmfree(&D, queue ); magma_zmfree(&R, queue ); } magma_int_t iterinc; if( solver_par->verbose == 0 ){ iterinc = solver_par->maxiter; } else{ iterinc = solver_par->verbose; } solver_par->numiter = 0; solver_par->spmv_count = 0; // block-asynchronous iteration iterator do { tempo1 = magma_sync_wtime( queue ); solver_par->numiter+= iterinc; for( int z=0; z<iterinc; z++){ CHECK( magma_zbajac_csr_overlap( localiter, matrices, overlap, D_d, R_d, b, x, queue )); } tempo2 = magma_sync_wtime( queue ); runtime += tempo2-tempo1; if ( solver_par->verbose > 0 ) { CHECK( magma_zresidualvec( A_d, b, *x, &r, &residual, queue)); solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) residual; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) runtime; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); solver_par->runtime = runtime; CHECK( magma_zresidual( A_d, b, *x, &residual, queue)); solver_par->final_res = residual; solver_par->numiter = solver_par->maxiter; if ( solver_par->init_res > solver_par->final_res ){ info = MAGMA_SUCCESS; } else { info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&D, queue ); magma_zmfree(&R, queue ); for( int i=0; i<matrices; i++ ){ magma_zmfree(&D_d[i], queue ); magma_zmfree(&R_d[i], queue ); } magma_zmfree(&A_d, queue ); magma_zmfree(&ACSR, queue ); magma_zmfree(&Ah, queue ); solver_par->info = info; return info; } /* magma_zbaiter_overlap */
magma_int_t magma_zsymbilu( magma_z_matrix *A, magma_int_t levels, magma_z_matrix *L, magma_z_matrix *U, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix A_copy={Magma_CSR}, B={Magma_CSR}; magma_z_matrix hA={Magma_CSR}, CSRCOOA={Magma_CSR}; if( A->memory_location == Magma_CPU && A->storage_type == Magma_CSR ){ CHECK( magma_zmtransfer( *A, &A_copy, Magma_CPU, Magma_CPU, queue )); CHECK( magma_zmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue )); // possibility to scale to unit diagonal //magma_zmscale( &B, Magma_UNITDIAG ); CHECK( magma_zmconvert( B, L, Magma_CSR, Magma_CSR , queue)); CHECK( magma_zmconvert( B, U, Magma_CSR, Magma_CSR, queue )); magma_int_t num_lnnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz; magma_int_t num_unnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz; magma_free_cpu( L->col ); magma_free_cpu( U->col ); CHECK( magma_index_malloc_cpu( &L->col, num_lnnz )); CHECK( magma_index_malloc_cpu( &U->col, num_unnz )); magma_zsymbolic_ilu( levels, A->num_rows, &num_lnnz, &num_unnz, B.row, B.col, L->row, L->col, U->row, U->col ); L->nnz = num_lnnz; U->nnz = num_unnz; magma_free_cpu( L->val ); magma_free_cpu( U->val ); CHECK( magma_zmalloc_cpu( &L->val, L->nnz )); CHECK( magma_zmalloc_cpu( &U->val, U->nnz )); for( magma_int_t i=0; i<L->nnz; i++ ) L->val[i] = MAGMA_Z_MAKE( 0.0, 0.0 ); for( magma_int_t i=0; i<U->nnz; i++ ) U->val[i] = MAGMA_Z_MAKE( 0.0, 0.0 ); // take the original values (scaled) as initial guess for L for(magma_int_t i=0; i<L->num_rows; i++){ for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){ magma_index_t lcol = B.col[j]; for(magma_int_t k=L->row[i]; k<L->row[i+1]; k++){ if( L->col[k] == lcol ){ L->val[k] = B.val[j]; } } } } // take the original values (scaled) as initial guess for U for(magma_int_t i=0; i<U->num_rows; i++){ for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){ magma_index_t lcol = B.col[j]; for(magma_int_t k=U->row[i]; k<U->row[i+1]; k++){ if( U->col[k] == lcol ){ U->val[k] = B.val[j]; } } } } magma_zmfree( &B, queue ); // fill A with the new structure; magma_free_cpu( A->col ); magma_free_cpu( A->val ); CHECK( magma_index_malloc_cpu( &A->col, L->nnz+U->nnz )); CHECK( magma_zmalloc_cpu( &A->val, L->nnz+U->nnz )); A->nnz = L->nnz+U->nnz; magma_int_t z = 0; for(magma_int_t i=0; i<A->num_rows; i++){ A->row[i] = z; for(magma_int_t j=L->row[i]; j<L->row[i+1]; j++){ A->col[z] = L->col[j]; A->val[z] = L->val[j]; z++; } for(magma_int_t j=U->row[i]; j<U->row[i+1]; j++){ A->col[z] = U->col[j]; A->val[z] = U->val[j]; z++; } } A->row[A->num_rows] = z; // reset the values of A to the original entries for(magma_int_t i=0; i<A->num_rows; i++){ for(magma_int_t j=A_copy.row[i]; j<A_copy.row[i+1]; j++){ magma_index_t lcol = A_copy.col[j]; for(magma_int_t k=A->row[i]; k<A->row[i+1]; k++){ if( A->col[k] == lcol ){ A->val[k] = A_copy.val[j]; } } } } } else { magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; CHECK( magma_zmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue )); CHECK( magma_zmconvert( hA, &CSRCOOA, hA.storage_type, Magma_CSR, queue )); CHECK( magma_zsymbilu( &CSRCOOA, levels, L, U, queue )); magma_zmfree( &hA, queue ); magma_zmfree( A, queue ); CHECK( magma_zmconvert( CSRCOOA, &hA, Magma_CSR, A_storage, queue )); CHECK( magma_zmtransfer( hA, A, Magma_CPU, A_location, queue )); } cleanup: if( info != 0 ){ magma_zmfree( L, queue ); magma_zmfree( U, queue ); } magma_zmfree( &A_copy, queue ); magma_zmfree( &B, queue ); magma_zmfree( &hA, queue ); magma_zmfree( &CSRCOOA, queue ); return info; }
magma_int_t magma_zinitguess( magma_z_matrix A, magma_z_matrix *L, magma_z_matrix *U, magma_queue_t queue ) { magma_int_t info = 0; magmaDoubleComplex one = MAGMA_Z_MAKE( 1.0, 0.0 ); magma_z_matrix hAL={Magma_CSR}, hAU={Magma_CSR}, dAL={Magma_CSR}, dAU={Magma_CSR}, dALU={Magma_CSR}, hALU={Magma_CSR}, hD={Magma_CSR}, dD={Magma_CSR}, dL={Magma_CSR}, hL={Magma_CSR}; magma_int_t i,j; magma_int_t offdiags = 0; magma_index_t *diag_offset; magmaDoubleComplex *diag_vals=NULL; // need only lower triangular hAL.diagorder_type = Magma_VALUE; CHECK( magma_zmconvert( A, &hAL, Magma_CSR, Magma_CSRL, queue )); //magma_zmconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO ); // need only upper triangular //magma_zmconvert( A, &hAU, Magma_CSR, Magma_CSRU ); CHECK( magma_z_cucsrtranspose( hAL, &hAU, queue )); //magma_zmconvert( hAU, &hAUCOO, Magma_CSR, Magma_CSRCOO ); CHECK( magma_zmtransfer( hAL, &dAL, Magma_CPU, Magma_DEV, queue )); CHECK( magma_z_spmm( one, dAL, dAU, &dALU, queue )); CHECK( magma_zmtransfer( dALU, &hALU, Magma_DEV, Magma_CPU, queue )); magma_zmfree( &dAU, queue); magma_zmfree( &dALU, queue); CHECK( magma_zmalloc_cpu( &diag_vals, offdiags+1 )); CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 )); diag_offset[0] = 0; diag_vals[0] = MAGMA_Z_MAKE( 1.0, 0.0 ); CHECK( magma_zmgenerator( hALU.num_rows, offdiags, diag_offset, diag_vals, &hD, queue )); magma_zmfree( &hALU, queue ); for(i=0; i<hALU.num_rows; i++){ for(j=hALU.row[i]; j<hALU.row[i+1]; j++){ if( hALU.col[j] == i ){ //printf("%d %d %d == %d -> %f -->", i, j, hALU.col[j], i, hALU.val[j]); hD.val[i] = MAGMA_Z_MAKE( 1.0 / sqrt(fabs(MAGMA_Z_REAL(hALU.val[j]))) , 0.0 ); //printf("insert %f at %d\n", hD.val[i], i); } } } CHECK( magma_zmtransfer( hD, &dD, Magma_CPU, Magma_DEV, queue )); magma_zmfree( &hD, queue); CHECK( magma_z_spmm( one, dD, dAL, &dL, queue )); magma_zmfree( &dAL, queue ); magma_zmfree( &dD, queue ); /* // check for diagonal = 1 magma_z_matrix dLt={Magma_CSR}, dLL={Magma_CSR}, LL={Magma_CSR}; CHECK( magma_z_cucsrtranspose( dL, &dLt )); CHECK( magma_zcuspmm( dL, dLt, &dLL )); CHECK( magma_zmtransfer( dLL, &LL, Magma_DEV, Magma_CPU )); //for(i=0; i < hALU.num_rows; i++) { for(i=0; i < 100; i++) { for(j=hALU.row[i]; j < hALU.row[i+1]; j++) { if( hALU.col[j] == i ){ printf("%d %d -> %f -->", i, i, LL.val[j]); } } } */ CHECK( magma_zmtransfer( dL, &hL, Magma_DEV, Magma_CPU, queue )); CHECK( magma_zmconvert( hL, L, Magma_CSR, Magma_CSRCOO, queue )); cleanup: if( info !=0 ){ magma_zmfree( L, queue ); magma_zmfree( U, queue ); } magma_zmfree( &dAU, queue); magma_zmfree( &dALU, queue); magma_zmfree( &dL, queue ); magma_zmfree( &hL, queue ); magma_zmfree( &dAL, queue ); magma_zmfree( &dD, queue ); magma_zmfree( &hD, queue); magma_zmfree( &hALU, queue ); return info; }
magma_int_t magma_zilures( magma_z_matrix A, magma_z_matrix L, magma_z_matrix U, magma_z_matrix *LU, real_Double_t *res, real_Double_t *nonlinres, magma_queue_t queue ) { magma_int_t info = 0; magmaDoubleComplex tmp; real_Double_t tmp2; magma_int_t i, j, k; magmaDoubleComplex one = MAGMA_Z_MAKE( 1.0, 0.0 ); magma_z_matrix LL={Magma_CSR}, L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR}; if( L.row[1]==1 ){ // lower triangular with unit diagonal //printf("L lower triangular.\n"); LL.diagorder_type = Magma_UNITY; CHECK( magma_zmconvert( L, &LL, Magma_CSR, Magma_CSRL, queue )); } else if ( L.row[1]==0 ){ // strictly lower triangular //printf("L strictly lower triangular.\n"); CHECK( magma_zmtransfer( L, &LL, Magma_CPU, Magma_CPU, queue )); magma_free_cpu( LL.col ); magma_free_cpu( LL.val ); LL.nnz = L.nnz+L.num_rows; CHECK( magma_zmalloc_cpu( &LL.val, LL.nnz )); CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz )); magma_int_t z=0; for (i=0; i < L.num_rows; i++) { LL.row[i] = z; for (j=L.row[i]; j < L.row[i+1]; j++) { LL.val[z] = L.val[j]; LL.col[z] = L.col[j]; z++; } // add unit diagonal LL.val[z] = MAGMA_Z_MAKE(1.0, 0.0); LL.col[z] = i; z++; } LL.row[LL.num_rows] = z; } else { printf("error: L neither lower nor strictly lower triangular!\n"); } CHECK( magma_zmtransfer( LL, &L_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_zmtransfer( U, &U_d, Magma_CPU, Magma_DEV, queue )); magma_zmfree( &LL, queue ); CHECK( magma_z_spmm( one, L_d, U_d, &LU_d, queue )); CHECK( magma_zmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue )); magma_zmfree( &L_d, queue ); magma_zmfree( &U_d, queue ); magma_zmfree( &LU_d, queue ); // compute Frobenius norm of A-LU for(i=0; i<A.num_rows; i++){ for(j=A.row[i]; j<A.row[i+1]; j++){ magma_index_t lcol = A.col[j]; for(k=LU->row[i]; k<LU->row[i+1]; k++){ if( LU->col[k] == lcol ){ tmp = MAGMA_Z_MAKE( MAGMA_Z_REAL( LU->val[k] )- MAGMA_Z_REAL( A.val[j] ) , 0.0 ); LU->val[k] = tmp; tmp2 = (real_Double_t) fabs( MAGMA_Z_REAL(tmp) ); (*nonlinres) = (*nonlinres) + tmp2*tmp2; } } } } for(i=0; i<LU->num_rows; i++){ for(j=LU->row[i]; j<LU->row[i+1]; j++){ tmp2 = (real_Double_t) fabs( MAGMA_Z_REAL(LU->val[j]) ); (*res) = (*res) + tmp2* tmp2; } } (*res) = sqrt((*res)); (*nonlinres) = sqrt((*nonlinres)); cleanup: if( info !=0 ){ magma_zmfree( LU, queue ); } magma_zmfree( &LL, queue ); magma_zmfree( &L_d, queue ); magma_zmfree( &U_d, queue ); magma_zmfree( &LU_d, queue ); return info; }
extern "C" magma_int_t magma_zcumilugeneratesolverinfo( magma_z_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; magma_z_matrix hA={Magma_CSR}, hL={Magma_CSR}, hU={Magma_CSR}; if (precond->L.memory_location != Magma_DEV ){ CHECK( magma_zmtransfer( precond->M, &hA, precond->M.memory_location, Magma_CPU, queue )); hL.diagorder_type = Magma_UNITY; CHECK( magma_zmconvert( hA, &hL , Magma_CSR, Magma_CSRL, queue )); hU.diagorder_type = Magma_VALUE; CHECK( magma_zmconvert( hA, &hU , Magma_CSR, Magma_CSRU, queue )); CHECK( magma_zmtransfer( hL, &(precond->L), Magma_CPU, Magma_DEV, queue )); CHECK( magma_zmtransfer( hU, &(precond->U), Magma_CPU, Magma_DEV, queue )); magma_zmfree(&hA, queue ); magma_zmfree(&hL, queue ); magma_zmfree(&hU, queue ); } // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseZcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows, precond->L.nnz, descrL, precond->L.dval, precond->L.drow, precond->L.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseZcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows, precond->U.nnz, descrU, precond->U.dval, precond->U.drow, precond->U.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // extract the diagonal of L into precond->d CHECK( magma_zjacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_zvinit( &precond->work1, Magma_DEV, precond->U.num_rows, 1, MAGMA_Z_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_zjacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_zvinit( &precond->work2, Magma_DEV, precond->U.num_rows, 1, MAGMA_Z_ZERO, queue )); } cleanup: cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroy( cusparseHandle ); return info; }
extern "C" magma_int_t magma_zcumilusetup_transpose( magma_z_matrix A, magma_z_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix Ah1={Magma_CSR}, Ah2={Magma_CSR}; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrLT=NULL; cusparseMatDescr_t descrUT=NULL; // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); // transpose the matrix magma_zmtransfer( precond->L, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_zmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransposeconjugate( Ah2, &Ah1, queue ); magma_zmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_zmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransfer( Ah2, &(precond->LT), Magma_CPU, Magma_DEV, queue ); magma_zmfree(&Ah2, queue ); magma_zmtransfer( precond->U, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_zmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransposeconjugate( Ah2, &Ah1, queue ); magma_zmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_zmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransfer( Ah2, &(precond->UT), Magma_CPU, Magma_DEV, queue ); magma_zmfree(&Ah2, queue ); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrLT )); CHECK_CUSPARSE( cusparseSetMatType( descrLT, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrLT, CUSPARSE_DIAG_TYPE_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrLT, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrLT, CUSPARSE_FILL_MODE_UPPER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoLT )); CHECK_CUSPARSE( cusparseZcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->LT.num_rows, precond->LT.nnz, descrLT, precond->LT.dval, precond->LT.drow, precond->LT.dcol, precond->cuinfoLT )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrUT )); CHECK_CUSPARSE( cusparseSetMatType( descrUT, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrUT, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrUT, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrUT, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoUT )); CHECK_CUSPARSE( cusparseZcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->UT.num_rows, precond->UT.nnz, descrUT, precond->UT.dval, precond->UT.drow, precond->UT.dcol, precond->cuinfoUT )); cleanup: cusparseDestroyMatDescr( descrLT ); cusparseDestroyMatDescr( descrUT ); cusparseDestroy( cusparseHandle ); magma_zmfree(&Ah1, queue ); magma_zmfree(&Ah2, queue ); return info; }
extern "C" magma_int_t magma_zmslice( magma_int_t num_slices, magma_int_t slice, magma_z_matrix A, magma_z_matrix *B, magma_z_matrix *ALOC, magma_z_matrix *ANLOC, magma_index_t *comm_i, magmaDoubleComplex *comm_v, magma_int_t *start, magma_int_t *end, magma_queue_t queue ) { magma_int_t info = 0; if( A.num_rows != A.num_cols ){ printf("%% error: only supported for square matrices.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if ( A.memory_location == Magma_CPU && A.storage_type == Magma_CSR ){ CHECK( magma_zmconvert( A, B, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( B->col ); magma_free_cpu( B->val ); CHECK( magma_zmconvert( A, ALOC, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( ALOC->col ); magma_free_cpu( ALOC->row ); magma_free_cpu( ALOC->val ); CHECK( magma_zmconvert( A, ANLOC, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( ANLOC->col ); magma_free_cpu( ANLOC->row ); magma_free_cpu( ANLOC->val ); magma_int_t i,j,k, nnz, nnz_loc=0, loc_row = 0, nnz_nloc = 0; magma_index_t col; magma_int_t size = magma_ceildiv( A.num_rows, num_slices ); magma_int_t lstart = slice*size; magma_int_t lend = min( (slice+1)*size, A.num_rows ); // correct size for last slice size = lend-lstart; CHECK( magma_index_malloc_cpu( &ALOC->row, size+1 ) ); CHECK( magma_index_malloc_cpu( &ANLOC->row, size+1 ) ); // count elements for slice - identity for rest nnz = A.row[ lend ] - A.row[ lstart ] + ( A.num_rows - size ); CHECK( magma_index_malloc_cpu( &B->col, nnz ) ); CHECK( magma_zmalloc_cpu( &B->val, nnz ) ); // for the communication plan for( i=0; i<A.num_rows; i++ ) { comm_i[i] = 0; comm_v[i] = MAGMA_Z_ZERO; } k=0; B->row[i] = 0; ALOC->row[0] = 0; ANLOC->row[0] = 0; // identity above slice for( i=0; i<lstart; i++ ) { B->row[i+1] = B->row[i]+1; B->val[k] = MAGMA_Z_ONE; B->col[k] = i; k++; } // slice for( i=lstart; i<lend; i++ ) { B->row[i+1] = B->row[i] + (A.row[i+1]-A.row[i]); for( j=A.row[i]; j<A.row[i+1]; j++ ){ B->val[k] = A.val[j]; col = A.col[j]; B->col[k] = col; // communication plan if( col<lstart || col>=lend ){ comm_i[ col ] = 1; comm_v[ col ] = comm_v[ col ] + MAGMA_Z_MAKE( MAGMA_Z_ABS( A.val[j] ), 0.0 ); nnz_nloc++; } else { nnz_loc++; } k++; } loc_row++; ALOC->row[ loc_row ] = nnz_loc; ANLOC->row[ loc_row ] = nnz_nloc; } CHECK( magma_index_malloc_cpu( &ALOC->col, nnz_loc ) ); CHECK( magma_zmalloc_cpu( &ALOC->val, nnz_loc ) ); ALOC->num_rows = size; ALOC->num_cols = size; ALOC->nnz = nnz_loc; CHECK( magma_index_malloc_cpu( &ANLOC->col, nnz_nloc ) ); CHECK( magma_zmalloc_cpu( &ANLOC->val, nnz_nloc ) ); ANLOC->num_rows = size; ANLOC->num_cols = A.num_cols; ANLOC->nnz = nnz_nloc; nnz_loc = 0; nnz_nloc = 0; // local/nonlocal matrix for( i=lstart; i<lend; i++ ) { for( j=A.row[i]; j<A.row[i+1]; j++ ){ col = A.col[j]; // insert only in local part in ALOC, nonlocal in ANLOC if( col<lstart || col>=lend ){ ANLOC->val[ nnz_nloc ] = A.val[j]; ANLOC->col[ nnz_nloc ] = col; nnz_nloc++; } else { ALOC->val[ nnz_loc ] = A.val[j]; ALOC->col[ nnz_loc ] = col-lstart; nnz_loc++; } } } // identity below slice for( i=lend; i<A.num_rows; i++ ) { B->row[i+1] = B->row[i]+1; B->val[k] = MAGMA_Z_ONE; B->col[k] = i; k++; } B->nnz = k; *start = lstart; *end = lend; } else { printf("error: mslice only supported for CSR matrices on the CPU: %d %d.\n", int(A.memory_location), int(A.storage_type) ); info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_zopts zopts; magma_queue_t queue=NULL; magma_queue_create( &queue ); real_Double_t res; magma_z_matrix Z={Magma_CSR}, Z2={Magma_CSR}, A={Magma_CSR}, A2={Magma_CSR}, AT={Magma_CSR}, AT2={Magma_CSR}, B={Magma_CSR}; int i=1; CHECK( magma_zparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_zm_5stencil( laplace_size, &Z, queue )); } else { // file-matrix test CHECK( magma_z_csr_mtx( &Z, argv[i], queue )); } printf("%% matrix info: %d-by-%d with %d nonzeros\n", int(Z.num_rows), int(Z.num_cols), int(Z.nnz) ); // convert to be non-symmetric CHECK( magma_zmconvert( Z, &A, Magma_CSR, Magma_CSRL, queue )); CHECK( magma_zmconvert( Z, &B, Magma_CSR, Magma_CSRU, queue )); // transpose CHECK( magma_zmtranspose( A, &AT, queue )); // quite some conversions //ELL CHECK( magma_zmconvert( AT, &AT2, Magma_CSR, Magma_ELL, queue )); magma_zmfree(&AT, queue ); CHECK( magma_zmconvert( AT2, &AT, Magma_ELL, Magma_CSR, queue )); magma_zmfree(&AT2, queue ); //ELLPACKT CHECK( magma_zmconvert( AT, &AT2, Magma_CSR, Magma_ELLPACKT, queue )); magma_zmfree(&AT, queue ); CHECK( magma_zmconvert( AT2, &AT, Magma_ELLPACKT, Magma_CSR, queue )); magma_zmfree(&AT2, queue ); //ELLRT AT2.blocksize = 8; AT2.alignment = 8; CHECK( magma_zmconvert( AT, &AT2, Magma_CSR, Magma_ELLRT, queue )); magma_zmfree(&AT, queue ); CHECK( magma_zmconvert( AT2, &AT, Magma_ELLRT, Magma_CSR, queue )); magma_zmfree(&AT2, queue ); //SELLP AT2.blocksize = 8; AT2.alignment = 8; CHECK( magma_zmconvert( AT, &AT2, Magma_CSR, Magma_SELLP, queue )); magma_zmfree(&AT, queue ); CHECK( magma_zmconvert( AT2, &AT, Magma_SELLP, Magma_CSR, queue )); magma_zmfree(&AT2, queue ); //ELLD CHECK( magma_zmconvert( AT, &AT2, Magma_CSR, Magma_ELLD, queue )); magma_zmfree(&AT, queue ); CHECK( magma_zmconvert( AT2, &AT, Magma_ELLD, Magma_CSR, queue )); magma_zmfree(&AT2, queue ); //CSRCOO CHECK( magma_zmconvert( AT, &AT2, Magma_CSR, Magma_CSRCOO, queue )); magma_zmfree(&AT, queue ); CHECK( magma_zmconvert( AT2, &AT, Magma_CSRCOO, Magma_CSR, queue )); magma_zmfree(&AT2, queue ); //CSRLIST CHECK( magma_zmconvert( AT, &AT2, Magma_CSR, Magma_CSRLIST, queue )); magma_zmfree(&AT, queue ); CHECK( magma_zmconvert( AT2, &AT, Magma_CSRLIST, Magma_CSR, queue )); magma_zmfree(&AT2, queue ); //CSRD CHECK( magma_zmconvert( AT, &AT2, Magma_CSR, Magma_CSRD, queue )); magma_zmfree(&AT, queue ); CHECK( magma_zmconvert( AT2, &AT, Magma_CSRD, Magma_CSR, queue )); magma_zmfree(&AT2, queue ); // transpose CHECK( magma_zmtranspose( AT, &A2, queue )); CHECK( magma_zmdiff( A, A2, &res, queue)); printf("%% ||A-A2||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% conversion tester: ok\n"); else printf("%% conversion tester: failed\n"); CHECK( magma_zmlumerge( A2, B, &Z2, queue )); CHECK( magma_zmdiff( Z, Z2, &res, queue)); printf("%% ||Z-Z2||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% LUmerge tester: ok\n"); else printf("%% LUmerge tester: failed\n"); magma_zmfree(&A, queue ); magma_zmfree(&A2, queue ); magma_zmfree(&AT, queue ); magma_zmfree(&AT2, queue ); magma_zmfree(&B, queue ); magma_zmfree(&Z2, queue ); magma_zmfree(&Z, queue ); i++; } cleanup: magma_zmfree(&A, queue ); magma_zmfree(&A2, queue ); magma_zmfree(&AT, queue ); magma_zmfree(&AT2, queue ); magma_zmfree(&B, queue ); magma_zmfree(&Z2, queue ); magma_zmfree(&Z, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_zopts zopts; magma_queue_t queue=NULL; magma_queue_create( /*devices[ opts->device ],*/ &queue ); magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0); magma_z_matrix A={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_z_matrix x={Magma_CSR}, b={Magma_CSR}; int i=1; CHECK( magma_zparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; if ( zopts.solver_par.solver != Magma_PCG && zopts.solver_par.solver != Magma_PGMRES && zopts.solver_par.solver != Magma_PBICGSTAB && zopts.solver_par.solver != Magma_ITERREF && zopts.solver_par.solver != Magma_LOBPCG ) zopts.precond_par.solver = Magma_NONE; CHECK( magma_zsolverinfo_init( &zopts.solver_par, &zopts.precond_par, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_zm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_z_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", (int) A.num_rows,(int) A.num_cols,(int) A.nnz ); // for the eigensolver case zopts.solver_par.ev_length = A.num_rows; CHECK( magma_zeigensolverinfo_init( &zopts.solver_par, queue )); // scale matrix CHECK( magma_zmscale( &A, zopts.scaling, queue )); CHECK( magma_zmconvert( A, &B, Magma_CSR, zopts.output_format, queue )); CHECK( magma_zmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); // vectors and initial guess CHECK( magma_zvinit( &b, Magma_DEV, A.num_cols, 1, one, queue )); //magma_zvinit( &x, Magma_DEV, A.num_cols, 1, one, queue ); //magma_z_spmv( one, B_d, x, zero, b, queue ); // b = A x //magma_zmfree(&x, queue ); CHECK( magma_zvinit( &x, Magma_DEV, A.num_cols, 1, zero, queue )); info = magma_z_solver( B_d, b, &x, &zopts, queue ); if( info != 0 ){ printf("error: solver returned: %s (%d).\n", magma_strerror( info ), info ); } magma_zsolverinfo( &zopts.solver_par, &zopts.precond_par, queue ); magma_zmfree(&B_d, queue ); magma_zmfree(&B, queue ); magma_zmfree(&A, queue ); magma_zmfree(&x, queue ); magma_zmfree(&b, queue ); i++; } cleanup: magma_zmfree(&B_d, queue ); magma_zmfree(&B, queue ); magma_zmfree(&A, queue ); magma_zmfree(&x, queue ); magma_zmfree(&b, queue ); magma_zsolverinfo_free( &zopts.solver_par, &zopts.precond_par, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
extern "C" magma_int_t magma_zqmr_merge( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_QMRMERGE; solver_par->numiter = 0; solver_par->spmv_count = 0; // local variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO, c_one = MAGMA_Z_ONE; // solver variables double nom0, r0, res=0, nomb; magmaDoubleComplex rho = c_one, rho1 = c_one, eta = -c_one , pds = c_one, thet = c_one, thet1 = c_one, epsilon = c_one, beta = c_one, delta = c_one, pde = c_one, rde = c_one, gamm = c_one, gamm1 = c_one, psi = c_one; magma_int_t dofs = A.num_rows* b.num_cols; // need to transpose the matrix magma_z_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; // GPU workspace magma_z_matrix r={Magma_CSR}, r_tld={Magma_CSR}, v={Magma_CSR}, w={Magma_CSR}, wt={Magma_CSR}, d={Magma_CSR}, s={Magma_CSR}, z={Magma_CSR}, q={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, y={Magma_CSR}; CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); solver_par->init_res = nom0; magma_zcopy( dofs, r.dval, 1, r_tld.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, y.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, v.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, wt.dval, 1, queue ); magma_zcopy( dofs, r.dval, 1, z.dval, 1, queue ); // transpose the matrix magma_zmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_zmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransposeconjugate( Ah2, &Ah1, queue ); magma_zmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_zmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_zmfree(&Ah2, queue ); nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = (real_Double_t)nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } psi = magma_zsqrt( magma_zdotc( dofs, z.dval, 1, z.dval, 1, queue )); rho = magma_zsqrt( magma_zdotc( dofs, y.dval, 1, y.dval, 1, queue )); // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_zqmr_1( r.num_rows, r.num_cols, rho, psi, y.dval, z.dval, v.dval, w.dval, queue ); //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; if( magma_z_isnan_inf( rho ) || magma_z_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } // delta = z' * y; delta = magma_zdotc( dofs, z.dval, 1, y.dval, 1, queue ); if( magma_z_isnan_inf( delta ) ){ info = MAGMA_DIVERGENCE; break; } // no precond: yt = y, zt = z //magma_zcopy( dofs, y.dval, 1, yt.dval, 1 ); //magma_zcopy( dofs, z.dval, 1, zt.dval, 1 ); if( solver_par->numiter == 1 ){ // p = y; // q = z; magma_zcopy( dofs, y.dval, 1, p.dval, 1, queue ); magma_zcopy( dofs, z.dval, 1, q.dval, 1, queue ); } else{ pde = psi * delta / epsilon; rde = rho * MAGMA_Z_CONJ(delta/epsilon); // p = y - pde * p // q = z - rde * q magma_zqmr_2( r.num_rows, r.num_cols, pde, rde, y.dval, z.dval, p.dval, q.dval, queue ); } if( magma_z_isnan_inf( rho ) || magma_z_isnan_inf( psi ) ){ info = MAGMA_DIVERGENCE; break; } CHECK( magma_z_spmv( c_one, A, p, c_zero, pt, queue )); solver_par->spmv_count++; // epsilon = q' * pt; epsilon = magma_zdotc( dofs, q.dval, 1, pt.dval, 1, queue ); beta = epsilon / delta; if( magma_z_isnan_inf( epsilon ) || magma_z_isnan_inf( beta ) ){ info = MAGMA_DIVERGENCE; break; } // v = pt - beta * v // y = v magma_zqmr_3( r.num_rows, r.num_cols, beta, pt.dval, v.dval, y.dval, queue ); rho1 = rho; // rho = norm(y); rho = magma_zsqrt( magma_zdotc( dofs, y.dval, 1, y.dval, 1, queue )); // wt = A' * q - beta' * w; CHECK( magma_z_spmv( c_one, AT, q, c_zero, wt, queue )); solver_par->spmv_count++; magma_zaxpy( dofs, - MAGMA_Z_CONJ( beta ), w.dval, 1, wt.dval, 1, queue ); // no precond: z = wt magma_zcopy( dofs, wt.dval, 1, z.dval, 1, queue ); thet1 = thet; thet = rho / (gamm * MAGMA_Z_MAKE( MAGMA_Z_ABS(beta), 0.0 )); gamm1 = gamm; gamm = c_one / magma_zsqrt(c_one + thet*thet); eta = - eta * rho1 * gamm * gamm / (beta * gamm1 * gamm1); if( magma_z_isnan_inf( thet ) || magma_z_isnan_inf( gamm ) || magma_z_isnan_inf( eta ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter == 1 ){ // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_zqmr_4( r.num_rows, r.num_cols, eta, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } else{ pds = (thet1 * gamm) * (thet1 * gamm); // d = eta * p + pds * d; // s = eta * pt + pds * d; // x = x + d; // r = r - s; magma_zqmr_5( r.num_rows, r.num_cols, eta, pds, p.dval, pt.dval, d.dval, s.dval, x->dval, r.dval, queue ); } // psi = norm(z); psi = magma_zsqrt( magma_zdotc( dofs, z.dval, 1, z.dval, 1, queue ) ); res = magma_dznrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } // v = y / rho // y = y / rho // w = wt / psi // z = z / psi magma_zqmr_1( r.num_rows, r.num_cols, rho, psi, y.dval, z.dval, v.dval, w.dval, queue ); if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose == c_zero ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&r_tld, queue ); magma_zmfree(&v, queue ); magma_zmfree(&w, queue ); magma_zmfree(&wt, queue ); magma_zmfree(&d, queue ); magma_zmfree(&s, queue ); magma_zmfree(&z, queue ); magma_zmfree(&q, queue ); magma_zmfree(&p, queue ); magma_zmfree(&pt, queue ); magma_zmfree(&y, queue ); magma_zmfree(&AT, queue ); magma_zmfree(&Ah1, queue ); magma_zmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_zqmr_merge */
extern "C" magma_int_t magma_zcumiccsetup( magma_z_matrix A, magma_z_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; #if CUDA_VERSION >= 7000 csric02Info_t info_M=NULL; void *pBuffer = NULL; #endif magma_z_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, U={Magma_CSR}; CHECK( magma_zmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); U.diagorder_type = Magma_VALUE; CHECK( magma_zmconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue )); // in case using fill-in if( precond->levels > 0 ){ magma_z_matrix hAL={Magma_CSR}, hAUt={Magma_CSR}; CHECK( magma_zsymbilu( &hACSR, precond->levels, &hAL, &hAUt, queue )); magma_zmfree(&hAL, queue); magma_zmfree(&hAUt, queue); } CHECK( magma_zmconvert( hACSR, &U, Magma_CSR, Magma_CSRL, queue )); magma_zmfree( &hACSR, queue ); CHECK( magma_zmtransfer(U, &(precond->M), Magma_CPU, Magma_DEV, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) )); // use kernel to manually check for zeros n the diagonal CHECK( magma_zdiagcheck( precond->M, queue ) ); #if CUDA_VERSION >= 7000 // this version has the bug fixed where a zero on the diagonal causes a crash CHECK_CUSPARSE( cusparseCreateCsric02Info(&info_M) ); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); int buffersize; int structural_zero; int numerical_zero; CHECK_CUSPARSE( cusparseZcsric02_bufferSize( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, &buffersize ) ); CHECK( magma_malloc((void**)&pBuffer, buffersize) ); CHECK_CUSPARSE( cusparseZcsric02_analysis( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer )); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &numerical_zero ) ); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &structural_zero ) ); CHECK_CUSPARSE( cusparseZcsric02( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer) ); #else // this version contains the bug but is needed for backward compability CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_SYMMETRIC )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrA, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrA, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseZcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK_CUSPARSE( cusparseZcsric0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); #endif CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseZcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseZcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // copy the matrix to precond->L and (transposed) to precond->U CHECK( magma_zmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue )); CHECK( magma_zmtranspose( precond->L, &(precond->U), queue )); // extract the diagonal of L into precond->d CHECK( magma_zjacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_zvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_Z_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_zjacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_zvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_Z_ZERO, queue )); } /* // to enable also the block-asynchronous iteration for the triangular solves CHECK( magma_zmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue )); hA.storage_type = Magma_CSR; magma_z_matrix hD, hR, hAt CHECK( magma_zcsrsplit( 256, hA, &hD, &hR, queue )); CHECK( magma_zmtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_zmtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue )); magma_zmfree(&hD, queue ); magma_zmfree(&hR, queue ); CHECK( magma_z_cucsrtranspose( hA, &hAt, queue )); CHECK( magma_zcsrsplit( 256, hAt, &hD, &hR, queue )); CHECK( magma_zmtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_zmtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue )); magma_zmfree(&hD, queue ); magma_zmfree(&hR, queue ); magma_zmfree(&hA, queue ); magma_zmfree(&hAt, queue ); */ cleanup: #if CUDA_VERSION >= 7000 magma_free( pBuffer ); cusparseDestroyCsric02Info( info_M ); #endif cusparseDestroySolveAnalysisInfo( precond->cuinfo ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroyMatDescr( descrA ); cusparseDestroy( cusparseHandle ); magma_zmfree(&U, queue ); magma_zmfree(&hA, queue ); return info; }
extern "C" magma_int_t magma_zpbicg( magma_z_matrix A, magma_z_matrix b, magma_z_matrix *x, magma_z_solver_par *solver_par, magma_z_preconditioner *precond_par, magma_queue_t queue ) { magma_int_t info = MAGMA_NOTCONVERGED; // prepare solver feedback solver_par->solver = Magma_PBICG; solver_par->numiter = 0; solver_par->spmv_count = 0; // some useful variables magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magma_int_t dofs = A.num_rows * b.num_cols; // workspace magma_z_matrix r={Magma_CSR}, rt={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, z={Magma_CSR}, zt={Magma_CSR}, q={Magma_CSR}, y={Magma_CSR}, yt={Magma_CSR}, qt={Magma_CSR}; // need to transpose the matrix magma_z_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR}; CHECK( magma_zvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &qt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &yt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); CHECK( magma_zvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue )); // solver variables magmaDoubleComplex alpha, rho, beta, rho_new, ptq; double res, nomb, nom0, r0; // transpose the matrix magma_zmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue ); magma_zmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransposeconjugate( Ah2, &Ah1, queue ); magma_zmfree(&Ah2, queue ); Ah2.blocksize = A.blocksize; Ah2.alignment = A.alignment; magma_zmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue ); magma_zmfree(&Ah1, queue ); magma_zmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue ); magma_zmfree(&Ah2, queue ); // solver setup CHECK( magma_zresidualvec( A, b, *x, &r, &nom0, queue)); res = nom0; solver_par->init_res = nom0; magma_zcopy( dofs, r.dval, 1, rt.dval, 1, queue ); // rr = r rho_new = magma_zdotc( dofs, rt.dval, 1, r.dval, 1, queue ); // rho=<rr,r> rho = alpha = MAGMA_Z_MAKE( 1.0, 0. ); nomb = magma_dznrm2( dofs, b.dval, 1, queue ); if ( nomb == 0.0 ){ nomb=1.0; } if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){ r0 = ATOLERANCE; } solver_par->final_res = solver_par->init_res; solver_par->iter_res = solver_par->init_res; if ( solver_par->verbose > 0 ) { solver_par->res_vec[0] = nom0; solver_par->timing[0] = 0.0; } if ( nom0 < r0 ) { info = MAGMA_SUCCESS; goto cleanup; } //Chronometry real_Double_t tempo1, tempo2; tempo1 = magma_sync_wtime( queue ); solver_par->numiter = 0; solver_par->spmv_count = 0; // start iteration do { solver_par->numiter++; CHECK( magma_z_applyprecond_left( MagmaNoTrans, A, r, &y, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaNoTrans, A, y, &z, precond_par, queue )); CHECK( magma_z_applyprecond_right( MagmaTrans, A, rt, &yt, precond_par, queue )); CHECK( magma_z_applyprecond_left( MagmaTrans, A, yt, &zt, precond_par, queue )); //magma_zcopy( dofs, r.dval, 1 , y.dval, 1, queue ); // y=r //magma_zcopy( dofs, y.dval, 1 , z.dval, 1, queue ); // z=y //magma_zcopy( dofs, rt.dval, 1 , yt.dval, 1, queue ); // yt=rt //magma_zcopy( dofs, yt.dval, 1 , zt.dval, 1, queue ); // yt=rt rho= rho_new; rho_new = magma_zdotc( dofs, rt.dval, 1, z.dval, 1, queue ); // rho=<rt,z> if( magma_z_isnan_inf( rho_new ) ){ info = MAGMA_DIVERGENCE; break; } if( solver_par->numiter==1 ){ magma_zcopy( dofs, z.dval, 1 , p.dval, 1, queue ); // yt=rt magma_zcopy( dofs, zt.dval, 1 , pt.dval, 1, queue ); // zt=yt } else { beta = rho_new/rho; magma_zscal( dofs, beta, p.dval, 1, queue ); // p = beta*p magma_zaxpy( dofs, c_one , z.dval, 1 , p.dval, 1, queue ); // p = z+beta*p magma_zscal( dofs, MAGMA_Z_CONJ(beta), pt.dval, 1, queue ); // pt = beta*pt magma_zaxpy( dofs, c_one , zt.dval, 1 , pt.dval, 1, queue ); // pt = zt+beta*pt } CHECK( magma_z_spmv( c_one, A, p, c_zero, q, queue )); // v = Ap CHECK( magma_z_spmv( c_one, AT, pt, c_zero, qt, queue )); // v = Ap solver_par->spmv_count++; solver_par->spmv_count++; ptq = magma_zdotc( dofs, pt.dval, 1, q.dval, 1, queue ); alpha = rho_new /ptq; magma_zaxpy( dofs, alpha, p.dval, 1 , x->dval, 1, queue ); // x=x+alpha*p magma_zaxpy( dofs, c_neg_one * alpha, q.dval, 1 , r.dval, 1, queue ); // r=r+alpha*q magma_zaxpy( dofs, c_neg_one * MAGMA_Z_CONJ(alpha), qt.dval, 1 , rt.dval, 1, queue ); // r=r+alpha*q res = magma_dznrm2( dofs, r.dval, 1, queue ); if ( solver_par->verbose > 0 ) { tempo2 = magma_sync_wtime( queue ); if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){ break; } } while ( solver_par->numiter+1 <= solver_par->maxiter ); tempo2 = magma_sync_wtime( queue ); solver_par->runtime = (real_Double_t) tempo2-tempo1; double residual; CHECK( magma_zresidualvec( A, b, *x, &r, &residual, queue)); solver_par->iter_res = res; solver_par->final_res = residual; if ( solver_par->numiter < solver_par->maxiter ) { info = MAGMA_SUCCESS; } else if ( solver_par->init_res > solver_par->final_res ) { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_SLOW_CONVERGENCE; if( solver_par->iter_res < solver_par->rtol*solver_par->init_res || solver_par->iter_res < solver_par->atol ) { info = MAGMA_SUCCESS; } } else { if ( solver_par->verbose > 0 ) { if ( (solver_par->numiter)%solver_par->verbose==0 ) { solver_par->res_vec[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) res; solver_par->timing[(solver_par->numiter)/solver_par->verbose] = (real_Double_t) tempo2-tempo1; } } info = MAGMA_DIVERGENCE; } cleanup: magma_zmfree(&r, queue ); magma_zmfree(&rt, queue ); magma_zmfree(&p, queue ); magma_zmfree(&pt, queue ); magma_zmfree(&q, queue ); magma_zmfree(&qt, queue ); magma_zmfree(&y, queue ); magma_zmfree(&yt, queue ); magma_zmfree(&z, queue ); magma_zmfree(&zt, queue ); magma_zmfree(&AT, queue ); magma_zmfree(&Ah1, queue ); magma_zmfree(&Ah2, queue ); solver_par->info = info; return info; } /* magma_zpbicg */
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_CHECK( magma_init() ); magma_print_environment(); magma_zopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_z_matrix A={Magma_CSR}, A2={Magma_CSR}, A3={Magma_CSR}, A4={Magma_CSR}, A5={Magma_CSR}; int i=1; TESTING_CHECK( magma_zparse_opts( argc, argv, &zopts, &i, queue )); while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); TESTING_CHECK( magma_zm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test TESTING_CHECK( magma_z_csr_mtx( &A, argv[i], queue )); } printf("%% matrix info: %lld-by-%lld with %lld nonzeros\n", (long long) A.num_rows, (long long) A.num_cols, (long long) A.nnz ); // filename for temporary matrix storage const char *filename = "testmatrix.mtx"; // write to file TESTING_CHECK( magma_zwrite_csrtomtx( A, filename, queue )); // read from file TESTING_CHECK( magma_z_csr_mtx( &A2, filename, queue )); // delete temporary matrix unlink( filename ); //visualize printf("A2:\n"); TESTING_CHECK( magma_zprint_matrix( A2, queue )); //visualize TESTING_CHECK( magma_zmconvert(A2, &A4, Magma_CSR, Magma_CSRL, queue )); printf("A4:\n"); TESTING_CHECK( magma_zprint_matrix( A4, queue )); TESTING_CHECK( magma_zmconvert(A4, &A5, Magma_CSR, Magma_ELL, queue )); printf("A5:\n"); TESTING_CHECK( magma_zprint_matrix( A5, queue )); // pass it to another application and back magma_int_t m, n; magma_index_t *row, *col; magmaDoubleComplex *val=NULL; TESTING_CHECK( magma_zcsrget( A2, &m, &n, &row, &col, &val, queue )); TESTING_CHECK( magma_zcsrset( m, n, row, col, val, &A3, queue )); TESTING_CHECK( magma_zmdiff( A, A2, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester IO: ok\n"); else printf("%% tester IO: failed\n"); TESTING_CHECK( magma_zmdiff( A, A3, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester matrix interface: ok\n"); else printf("%% tester matrix interface: failed\n"); magma_zmfree(&A, queue ); magma_zmfree(&A2, queue ); magma_zmfree(&A4, queue ); magma_zmfree(&A5, queue ); i++; } magma_queue_destroy( queue ); TESTING_CHECK( magma_finalize() ); return info; }