extern "C" magma_int_t magma_dcumiccsetup( magma_d_matrix A, magma_d_preconditioner *precond, magma_queue_t queue ) { magma_int_t info = 0; cusparseHandle_t cusparseHandle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrL=NULL; cusparseMatDescr_t descrU=NULL; #if CUDA_VERSION >= 7000 csric02Info_t info_M=NULL; void *pBuffer = NULL; #endif magma_d_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, U={Magma_CSR}; CHECK( magma_dmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); U.diagorder_type = Magma_VALUE; CHECK( magma_dmconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue )); // in case using fill-in if( precond->levels > 0 ){ magma_d_matrix hAL={Magma_CSR}, hAUt={Magma_CSR}; CHECK( magma_dsymbilu( &hACSR, precond->levels, &hAL, &hAUt, queue )); magma_dmfree(&hAL, queue); magma_dmfree(&hAUt, queue); } CHECK( magma_dmconvert( hACSR, &U, Magma_CSR, Magma_CSRL, queue )); magma_dmfree( &hACSR, queue ); CHECK( magma_dmtransfer(U, &(precond->M), Magma_CPU, Magma_DEV, queue )); // CUSPARSE context // CHECK_CUSPARSE( cusparseCreate( &cusparseHandle )); CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) )); // use kernel to manually check for zeros n the diagonal CHECK( magma_ddiagcheck( precond->M, queue ) ); #if CUDA_VERSION >= 7000 // this version has the bug fixed where a zero on the diagonal causes a crash CHECK_CUSPARSE( cusparseCreateCsric02Info(&info_M) ); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); int buffersize; int structural_zero; int numerical_zero; CHECK_CUSPARSE( cusparseDcsric02_bufferSize( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, &buffersize ) ); CHECK( magma_malloc((void**)&pBuffer, buffersize) ); CHECK_CUSPARSE( cusparseDcsric02_analysis( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer )); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &numerical_zero ) ); CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &structural_zero ) ); CHECK_CUSPARSE( cusparseDcsric02( cusparseHandle, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer) ); #else // this version contains the bug but is needed for backward compability CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_SYMMETRIC )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrA, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrA, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); CHECK_CUSPARSE( cusparseDcsric0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, descrA, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfo )); #endif CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL )); CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrL, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU )); CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR )); CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER )); CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU )); CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows, precond->M.nnz, descrU, precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU )); if( precond->maxiter < 50 ){ //prepare for iterative solves // copy the matrix to precond->L and (transposed) to precond->U CHECK( magma_dmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue )); CHECK( magma_dmtranspose( precond->L, &(precond->U), queue )); // extract the diagonal of L into precond->d CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue )); CHECK( magma_dvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); // extract the diagonal of U into precond->d2 CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue )); CHECK( magma_dvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue )); } /* // to enable also the block-asynchronous iteration for the triangular solves CHECK( magma_dmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue )); hA.storage_type = Magma_CSR; magma_d_matrix hD, hR, hAt CHECK( magma_dcsrsplit( 256, hA, &hD, &hR, queue )); CHECK( magma_dmtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hD, queue ); magma_dmfree(&hR, queue ); CHECK( magma_d_cucsrtranspose( hA, &hAt, queue )); CHECK( magma_dcsrsplit( 256, hAt, &hD, &hR, queue )); CHECK( magma_dmtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue )); CHECK( magma_dmtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&hD, queue ); magma_dmfree(&hR, queue ); magma_dmfree(&hA, queue ); magma_dmfree(&hAt, queue ); */ cleanup: #if CUDA_VERSION >= 7000 magma_free( pBuffer ); cusparseDestroyCsric02Info( info_M ); #endif cusparseDestroySolveAnalysisInfo( precond->cuinfo ); cusparseDestroyMatDescr( descrL ); cusparseDestroyMatDescr( descrU ); cusparseDestroyMatDescr( descrA ); cusparseDestroy( cusparseHandle ); magma_dmfree(&U, queue ); magma_dmfree(&hA, queue ); return info; }
magma_int_t magma_dsymbilu( magma_d_matrix *A, magma_int_t levels, magma_d_matrix *L, magma_d_matrix *U, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix A_copy={Magma_CSR}, B={Magma_CSR}; magma_d_matrix hA={Magma_CSR}, CSRCOOA={Magma_CSR}; if( A->memory_location == Magma_CPU && A->storage_type == Magma_CSR ){ CHECK( magma_dmtransfer( *A, &A_copy, Magma_CPU, Magma_CPU, queue )); CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue )); // possibility to scale to unit diagonal //magma_dmscale( &B, Magma_UNITDIAG ); CHECK( magma_dmconvert( B, L, Magma_CSR, Magma_CSR , queue)); CHECK( magma_dmconvert( B, U, Magma_CSR, Magma_CSR, queue )); magma_int_t num_lnnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz; magma_int_t num_unnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz; magma_free_cpu( L->col ); magma_free_cpu( U->col ); CHECK( magma_index_malloc_cpu( &L->col, num_lnnz )); CHECK( magma_index_malloc_cpu( &U->col, num_unnz )); magma_dsymbolic_ilu( levels, A->num_rows, &num_lnnz, &num_unnz, B.row, B.col, L->row, L->col, U->row, U->col ); L->nnz = num_lnnz; U->nnz = num_unnz; magma_free_cpu( L->val ); magma_free_cpu( U->val ); CHECK( magma_dmalloc_cpu( &L->val, L->nnz )); CHECK( magma_dmalloc_cpu( &U->val, U->nnz )); for( magma_int_t i=0; i<L->nnz; i++ ) L->val[i] = MAGMA_D_MAKE( 0.0, 0.0 ); for( magma_int_t i=0; i<U->nnz; i++ ) U->val[i] = MAGMA_D_MAKE( 0.0, 0.0 ); // take the original values (scaled) as initial guess for L for(magma_int_t i=0; i<L->num_rows; i++){ for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){ magma_index_t lcol = B.col[j]; for(magma_int_t k=L->row[i]; k<L->row[i+1]; k++){ if( L->col[k] == lcol ){ L->val[k] = B.val[j]; } } } } // take the original values (scaled) as initial guess for U for(magma_int_t i=0; i<U->num_rows; i++){ for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){ magma_index_t lcol = B.col[j]; for(magma_int_t k=U->row[i]; k<U->row[i+1]; k++){ if( U->col[k] == lcol ){ U->val[k] = B.val[j]; } } } } magma_dmfree( &B, queue ); // fill A with the new structure; magma_free_cpu( A->col ); magma_free_cpu( A->val ); CHECK( magma_index_malloc_cpu( &A->col, L->nnz+U->nnz )); CHECK( magma_dmalloc_cpu( &A->val, L->nnz+U->nnz )); A->nnz = L->nnz+U->nnz; magma_int_t z = 0; for(magma_int_t i=0; i<A->num_rows; i++){ A->row[i] = z; for(magma_int_t j=L->row[i]; j<L->row[i+1]; j++){ A->col[z] = L->col[j]; A->val[z] = L->val[j]; z++; } for(magma_int_t j=U->row[i]; j<U->row[i+1]; j++){ A->col[z] = U->col[j]; A->val[z] = U->val[j]; z++; } } A->row[A->num_rows] = z; // reset the values of A to the original entries for(magma_int_t i=0; i<A->num_rows; i++){ for(magma_int_t j=A_copy.row[i]; j<A_copy.row[i+1]; j++){ magma_index_t lcol = A_copy.col[j]; for(magma_int_t k=A->row[i]; k<A->row[i+1]; k++){ if( A->col[k] == lcol ){ A->val[k] = A_copy.val[j]; } } } } } else { magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; CHECK( magma_dmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue )); CHECK( magma_dmconvert( hA, &CSRCOOA, hA.storage_type, Magma_CSR, queue )); CHECK( magma_dsymbilu( &CSRCOOA, levels, L, U, queue )); magma_dmfree( &hA, queue ); magma_dmfree( A, queue ); CHECK( magma_dmconvert( CSRCOOA, &hA, Magma_CSR, A_storage, queue )); CHECK( magma_dmtransfer( hA, A, Magma_CPU, A_location, queue )); } cleanup: if( info != 0 ){ magma_dmfree( L, queue ); magma_dmfree( U, queue ); } magma_dmfree( &A_copy, queue ); magma_dmfree( &B, queue ); magma_dmfree( &hA, queue ); magma_dmfree( &CSRCOOA, queue ); return info; }