Exemplo n.º 1
0
magma_int_t
magma_dmLdiagadd(
    magma_d_matrix *L,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    magma_d_matrix LL={Magma_CSR};

    if( L->row[1]==1 ){        // lower triangular with unit diagonal
        //printf("L lower triangular.\n");
        LL.diagorder_type = Magma_UNITY;
        CHECK( magma_dmconvert( *L, &LL, Magma_CSR, Magma_CSRL, queue ));
    }
    else if( L->row[1]==0 ){ // strictly lower triangular
        //printf("L strictly lower triangular.\n");
        CHECK( magma_dmtransfer( *L, &LL, Magma_CPU, Magma_CPU, queue ));
        magma_free_cpu( LL.col );
        magma_free_cpu( LL.val );
        LL.nnz = L->nnz+L->num_rows;
        CHECK( magma_dmalloc_cpu( &LL.val, LL.nnz ));
        CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz ));
        magma_int_t z=0;
        for( magma_int_t i=0; i<L->num_rows; i++){
            LL.row[i] = z;
            for( magma_int_t j=L->row[i]; j<L->row[i+1]; j++){
                LL.val[z] = L->val[j];
                LL.col[z] = L->col[j];
                z++;
            }
            // add unit diagonal
            LL.val[z] = MAGMA_D_MAKE(1.0, 0.0);
            LL.col[z] = i;
            z++;
        }
        LL.row[LL.num_rows] = z;
        LL.nnz = z;
    }
    else{
        printf("error: L neither lower nor strictly lower triangular!\n");
    }
    magma_dmfree( L, queue );
    CHECK( magma_dmtransfer(LL, L, Magma_CPU, Magma_CPU, queue ));

cleanup:
    if( info != 0 ){
        magma_dmfree( L, queue );
    }
    magma_dmfree( &LL, queue );
    return info;
}
Exemplo n.º 2
0
magma_int_t
magma_dprint_vector(
    magma_d_matrix x,
    magma_int_t offset,
    magma_int_t  visulen,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_d_matrix y={Magma_CSR};
    
    //**************************************************************
    #define REAL
    double c_zero = MAGMA_D_ZERO;
    
    #ifdef COMPLEX
    #define magma_dprintval( tmp )       {                                  \
        if ( MAGMA_D_EQUAL( tmp, c_zero )) {                                \
            printf( "   0.              \n" );                                \
        }                                                                   \
        else {                                                              \
            printf( " %8.4f+%8.4fi\n",                                        \
                    MAGMA_D_REAL( tmp ), MAGMA_D_IMAG( tmp ));              \
        }                                                                   \
    }
    #else
    #define magma_dprintval( tmp )       {                                  \
        if ( MAGMA_D_EQUAL( tmp, c_zero )) {                                \
            printf( "   0.    \n" );                                          \
        }                                                                   \
        else {                                                              \
            printf( " %8.4f\n", MAGMA_D_REAL( tmp ));                         \
        }                                                                   \
    }
    #endif
    //**************************************************************
    
    printf("visualize entries %d - %d of vector ",
                    (int) offset, (int) (offset + visulen) );
    fflush(stdout);
    if ( x.memory_location == Magma_CPU ) {
        printf("located on CPU:\n");
        for( magma_int_t i=offset; i<offset + visulen; i++ )
            magma_dprintval(x.val[i]);
    }
    else if ( x.memory_location == Magma_DEV ) {
        printf("located on DEV:\n");
        CHECK( magma_dmtransfer( x, &y, Magma_DEV, Magma_CPU, queue ));
        for( magma_int_t i=offset; i<offset +  visulen; i++ )
            magma_dprintval(y.val[i]);


    }

cleanup:
    magma_free_cpu(y.val);
    return info;
}
Exemplo n.º 3
0
extern "C" magma_int_t
magma_dmshrink(
    magma_d_matrix A,
    magma_d_matrix *B,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    magma_d_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, hB={Magma_CSR}, hBCSR={Magma_CSR};
     
    if( A.num_rows<=A.num_cols){
        if( A.memory_location == Magma_CPU && A.storage_type == Magma_CSR ){
            CHECK( magma_dmconvert( A, B, Magma_CSR, Magma_CSR, queue ));
            for(magma_int_t i=0; i<A.nnz; i++){
                if( B->col[i] >= A.num_rows ){
                    B->val[i] = MAGMA_D_ZERO;   
                }
            }
            CHECK( magma_dmcsrcompressor( B, queue ) );
            B->num_cols = B->num_rows;
        } else {
            CHECK( magma_dmtransfer( A, &hA, A.memory_location, Magma_CPU, queue ));
            CHECK( magma_dmconvert( hA, &hACSR, A.storage_type, Magma_CSR, queue ));
            CHECK( magma_dmshrink( hACSR, &hBCSR, queue ));
            CHECK( magma_dmconvert( hBCSR, &hB, Magma_CSR, A.storage_type, queue ));
            CHECK( magma_dmtransfer( hB, B, Magma_CPU, A.memory_location, queue ));
        }
    } else {
        printf("%% error: A has too many rows: m > n.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    
cleanup:    
    magma_dmfree( &hA, queue );
    magma_dmfree( &hB, queue );
    magma_dmfree( &hACSR, queue );
    magma_dmfree( &hBCSR, queue );

    return info;
}
Exemplo n.º 4
0
extern "C" magma_int_t
magma_dmdiagadd(
    magma_d_matrix *A,
    double add,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_d_matrix hA={Magma_CSR}, CSRA={Magma_CSR};
    
    if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSRCOO ) {
        for( magma_int_t z=0; z<A->nnz; z++ ) {
            if ( A->col[z]== A->rowidx[z] ) {
                // add some identity matrix
                A->val[z] = A->val[z] +  add;
            }
        }
    }
    else {
        magma_storage_t A_storage = A->storage_type;
        magma_location_t A_location = A->memory_location;
        CHECK( magma_dmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue ));
        CHECK( magma_dmconvert( hA, &CSRA, hA.storage_type, Magma_CSRCOO, queue ));

        CHECK( magma_dmdiagadd( &CSRA, add, queue ));

        magma_dmfree( &hA, queue );
        magma_dmfree( A, queue );
        CHECK( magma_dmconvert( CSRA, &hA, Magma_CSRCOO, A_storage, queue ));
        CHECK( magma_dmtransfer( hA, A, Magma_CPU, A_location, queue ));
    }
    
cleanup:
    magma_dmfree( &hA, queue );
    magma_dmfree( &CSRA, queue );
    return info;
}
Exemplo n.º 5
0
magma_int_t
magma_dvget_dev(
    magma_d_matrix v,
    magma_int_t *m, magma_int_t *n,
    magmaDouble_ptr *val,
    magma_queue_t queue )
{
    magma_int_t info =0;
    
    magma_d_matrix v_DEV={Magma_CSR};
    
    if ( v.memory_location == Magma_DEV ) {
        *m = v.num_rows;
        *n = v.num_cols;
        *val = v.dval;
    } else {
        CHECK( magma_dmtransfer( v, &v_DEV, v.memory_location, Magma_DEV, queue ));
        CHECK( magma_dvget_dev( v_DEV, m, n, val, queue ));
    }
    
cleanup:
    magma_dmfree( &v_DEV, queue );
    return info;
}
Exemplo n.º 6
0
magma_int_t
magma_d_csr_mtx(
    magma_d_matrix *A,
    const char *filename,
    magma_queue_t queue )
{
    char buffer[ 1024 ];
    magma_int_t info = 0;

    int csr_compressor = 0;       // checks for zeros in original file
    
    magma_d_matrix B={Magma_CSR};

    magma_index_t *coo_col = NULL;
    magma_index_t *coo_row = NULL;
    double *coo_val = NULL;
    double *new_val = NULL;
    magma_index_t* new_row = NULL;
    magma_index_t* new_col = NULL;
    magma_int_t symmetric = 0;
    
    std::vector< std::pair< magma_index_t, double > > rowval;
    
    FILE *fid = NULL;
    MM_typecode matcode;
    fid = fopen(filename, "r");
    
    if (fid == NULL) {
        printf("%% Unable to open file %s\n", filename);
        info = MAGMA_ERR_NOT_FOUND;
        goto cleanup;
    }
    
    printf("%% Reading sparse matrix from file (%s):", filename);
    fflush(stdout);
    
    if (mm_read_banner(fid, &matcode) != 0) {
        printf("\n%% Could not process Matrix Market banner: %s.\n", matcode);
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    
    if (!mm_is_valid(matcode)) {
        printf("\n%% Invalid Matrix Market file.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    
    if ( ! ( ( mm_is_real(matcode)    ||
               mm_is_integer(matcode) ||
               mm_is_pattern(matcode) ||
               mm_is_real(matcode) ) &&
             mm_is_coordinate(matcode)  &&
             mm_is_sparse(matcode) ) )
    {
        mm_snprintf_typecode( buffer, sizeof(buffer), matcode );
        printf("\n%% Sorry, MAGMA-sparse does not support Market Market type: [%s]\n", buffer );
        printf("%% Only real-valued or pattern coordinate matrices are supported.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }

    magma_index_t num_rows, num_cols, num_nonzeros;
    if (mm_read_mtx_crd_size(fid, &num_rows, &num_cols, &num_nonzeros) != 0) {
        info = MAGMA_ERR_UNKNOWN;
        goto cleanup;
    }
    
    A->storage_type    = Magma_CSR;
    A->memory_location = Magma_CPU;
    A->num_rows        = num_rows;
    A->num_cols        = num_cols;
    A->nnz             = num_nonzeros;
    A->fill_mode       = MagmaFull;
    
    CHECK( magma_index_malloc_cpu( &coo_col, A->nnz ) );
    CHECK( magma_index_malloc_cpu( &coo_row, A->nnz ) );
    CHECK( magma_dmalloc_cpu( &coo_val, A->nnz ) );

    if (mm_is_real(matcode) || mm_is_integer(matcode)) {
        for(magma_int_t i = 0; i < A->nnz; ++i) {
            magma_index_t ROW, COL;
            double VAL;  // always read in a double and convert later if necessary
            
            fscanf(fid, " %d %d %lf \n", &ROW, &COL, &VAL);
            if ( VAL == 0 )
                csr_compressor = 1;
            coo_row[i] = ROW - 1;
            coo_col[i] = COL - 1;
            coo_val[i] = MAGMA_D_MAKE( VAL, 0.);
        }
    } else if (mm_is_pattern(matcode) ) {
        for(magma_int_t i = 0; i < A->nnz; ++i) {
            magma_index_t ROW, COL;
            
            fscanf(fid, " %d %d \n", &ROW, &COL );
            
            coo_row[i] = ROW - 1;
            coo_col[i] = COL - 1;
            coo_val[i] = MAGMA_D_MAKE( 1.0, 0.);
        }
    } else if (mm_is_real(matcode) ){
       for(magma_int_t i = 0; i < A->nnz; ++i) {
            magma_index_t ROW, COL;
            double VAL, VALC;  // always read in a double and convert later if necessary
            
            fscanf(fid, " %d %d %lf %lf\n", &ROW, &COL, &VAL, &VALC);
            
            coo_row[i] = ROW - 1;
            coo_col[i] = COL - 1;
            coo_val[i] = MAGMA_D_MAKE( VAL, VALC);
        }
        // printf(" ...successfully read real matrix... ");
    } else {
        printf("\n%% Unrecognized data type\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    fclose(fid);
    fid = NULL;
    printf(" done. Converting to CSR:");
    fflush(stdout);
    
    A->sym = Magma_GENERAL;


    if( mm_is_symmetric(matcode) ) {
        symmetric = 1;
    }
    if ( mm_is_symmetric(matcode) || mm_is_symmetric(matcode) ) { 
                                        // duplicate off diagonal entries
        printf("\n%% Detected symmetric case.");
        A->sym = Magma_SYMMETRIC;
        magma_index_t off_diagonals = 0;
        for(magma_int_t i = 0; i < A->nnz; ++i) {
            if (coo_row[i] != coo_col[i])
                ++off_diagonals;
        }
        magma_index_t true_nonzeros = 2*off_diagonals + (A->nnz - off_diagonals);
        
        //printf("%% total number of nonzeros: %d\n%%", int(A->nnz));

        CHECK( magma_index_malloc_cpu( &new_row, true_nonzeros ));
        CHECK( magma_index_malloc_cpu( &new_col, true_nonzeros ));
        CHECK( magma_dmalloc_cpu( &new_val, true_nonzeros ));
        
        magma_index_t ptr = 0;
        for(magma_int_t i = 0; i < A->nnz; ++i) {
            if (coo_row[i] != coo_col[i]) {
                new_row[ptr] = coo_row[i];
                new_col[ptr] = coo_col[i];
                new_val[ptr] = coo_val[i];
                ptr++;
                new_col[ptr] = coo_row[i];
                new_row[ptr] = coo_col[i];
                new_val[ptr] = (symmetric == 0) ? coo_val[i] : conj(coo_val[i]);
                ptr++;
            } else {
                new_row[ptr] = coo_row[i];
                new_col[ptr] = coo_col[i];
                new_val[ptr] = coo_val[i];
                ptr++;
            }
        }
        
        magma_free_cpu(coo_row);
        magma_free_cpu(coo_col);
        magma_free_cpu(coo_val);

        coo_row = new_row;
        coo_col = new_col;
        coo_val = new_val;
        A->nnz = true_nonzeros;
        //printf("total number of nonzeros: %d\n", A->nnz);
    } // end symmetric case
    
    CHECK( magma_dmalloc_cpu( &A->val, A->nnz ));
    CHECK( magma_index_malloc_cpu( &A->col, A->nnz ));
    CHECK( magma_index_malloc_cpu( &A->row, A->num_rows+1 ));
    
    // original code from Nathan Bell and Michael Garland
    for (magma_index_t i = 0; i < num_rows; i++)
        (A->row)[i] = 0;
    
    for (magma_index_t i = 0; i < A->nnz; i++)
        (A->row)[coo_row[i]]++;
        
    // cumulative sum the nnz per row to get row[]
    magma_int_t cumsum;
    cumsum = 0;
    for(magma_int_t i = 0; i < num_rows; i++) {
        magma_index_t temp = (A->row)[i];
        (A->row)[i] = cumsum;
        cumsum += temp;
    }
    (A->row)[num_rows] = A->nnz;
    
    // write Aj,Ax into Bj,Bx
    for(magma_int_t i = 0; i < A->nnz; i++) {
        magma_index_t row_ = coo_row[i];
        magma_index_t dest = (A->row)[row_];
        (A->col)[dest] = coo_col[i];
        (A->val)[dest] = coo_val[i];
        (A->row)[row_]++;
    }    
    magma_free_cpu(coo_row);
    magma_free_cpu(coo_col);
    magma_free_cpu(coo_val);
    coo_row = NULL;
    coo_col = NULL;
    coo_val = NULL;

    int last;
    last = 0;
    for(int i = 0; i <= num_rows; i++) {
        int temp    = (A->row)[i];
        (A->row)[i] = last;
        last        = temp;
    }
    (A->row)[A->num_rows] = A->nnz;
    
    // sort column indices within each row
    // copy into vector of pairs (column index, value), sort by column index, then copy back
    for (magma_index_t k=0; k < A->num_rows; ++k) {
        int kk  = (A->row)[k];
        int len = (A->row)[k+1] - (A->row)[k];
        rowval.resize( len );
        for( int i=0; i < len; ++i ) {
            rowval[i] = std::make_pair( (A->col)[kk+i], (A->val)[kk+i] );
        }
        std::sort( rowval.begin(), rowval.end(), compare_first );
        for( int i=0; i < len; ++i ) {
            (A->col)[kk+i] = rowval[i].first;
            (A->val)[kk+i] = rowval[i].second;
        }
    }

    if ( csr_compressor > 0) { // run the CSR compressor to remove zeros
        //printf("removing zeros: ");
        CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue ));
        CHECK( magma_d_csr_compressor(
            &(A->val), &(A->row), &(A->col),
            &B.val, &B.row, &B.col, &B.num_rows, queue ));
        B.nnz = B.row[num_rows];
        //printf(" remaining nonzeros:%d ", B.nnz);
        magma_free_cpu( A->val );
        magma_free_cpu( A->row );
        magma_free_cpu( A->col );
        CHECK( magma_dmtransfer( B, A, Magma_CPU, Magma_CPU, queue ));
        //printf("done.\n");
    }
    A->true_nnz = A->nnz;
    printf(" done.\n");
cleanup:
    if ( fid != NULL ) {
        fclose( fid );
        fid = NULL;
    }
    magma_dmfree( &B, queue );
    magma_free_cpu(coo_row);
    magma_free_cpu(coo_col);
    magma_free_cpu(coo_val);
    return info;
}
Exemplo n.º 7
0
magma_int_t
magma_dprint_matrix(
    magma_d_matrix A,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    //**************************************************************
    #define REAL
    
    #ifdef COMPLEX
    #define magma_dprintval( tmp )       {                                  \
        if ( MAGMA_D_EQUAL( tmp, c_zero )) {                                \
            printf( "   0.              " );                                \
        }                                                                   \
        else {                                                              \
            printf( " %8.4f+%8.4fi",                                        \
                    MAGMA_D_REAL( tmp ), MAGMA_D_IMAG( tmp ));              \
        }                                                                   \
    }
    #else
    #define magma_dprintval( tmp )       {                                  \
        if ( MAGMA_D_EQUAL( tmp, c_zero )) {                                \
            printf( "   0.    " );                                          \
        }                                                                   \
        else {                                                              \
            printf( " %8.4f", MAGMA_D_REAL( tmp ));                         \
        }                                                                   \
    }
    #endif
    //**************************************************************
    
    magma_index_t i, j, k;
    double c_zero = MAGMA_D_ZERO;
    magma_d_matrix C={Magma_CSR};

    if ( A.memory_location == Magma_CPU ) {
        printf("visualizing matrix of size %d x %d with %d nonzeros:\n",
            int(A.num_rows), int(A.num_cols), int(A.nnz));
        if ( A.storage_type == Magma_DENSE ) {
            for( i=0; i < (A.num_rows); i++ ) {
                for( j=0; j < A.num_cols; j++ ) {
                    magma_dprintval( A.val[i*(A.num_cols)+j] );
                }
                printf( "\n" );
            }
        }
        else if( A.num_cols < 8 || A.num_rows < 8 ) { 
            CHECK( magma_dmconvert( A, &C, A.storage_type, Magma_DENSE, queue ));
            CHECK( magma_dprint_matrix(  C, queue ));
        }
        else if ( A.storage_type == Magma_CSR ) {
            // visualize only small matrices like dense
            if ( A.num_rows < 11 && A.num_cols < 11 ) {
                CHECK( magma_dmconvert( A, &C, A.storage_type, Magma_DENSE, queue ));
                CHECK( magma_dprint_matrix(  C, queue ));
                magma_dmfree( &C, queue );
            }
            // otherwise visualize only coners
            else {
                // 4 beginning and 4 last elements of first four rows
                for( i=0; i < 4; i++ ) {
                    // upper left corner
                    for( j=0; j < 4; j++ ) {
                        double tmp = MAGMA_D_ZERO;
                        magma_index_t rbound = min( A.row[i]+4, A.row[i+1]);
                        magma_index_t lbound = max( A.row[i], A.row[i]);
                        for( k=lbound; k < rbound; k++ ) {
                            if ( A.col[k] == j ) {
                                tmp = A.val[k];
                            }
                        }
                        magma_dprintval( tmp );
                    }
                    if ( i == 0 ) {
                        printf( "    . . .    " );
                    } else {
                        printf( "             " );
                    }
                    // upper right corner
                    for( j=A.num_cols-4; j < A.num_cols; j++ ) {
                        double tmp = MAGMA_D_ZERO;
                        magma_index_t rbound = min( A.row[i+1], A.row[i+1]);
                        magma_index_t lbound = max( A.row[i+1]-4, A.row[i]);
                        for( k=lbound; k < rbound; k++ ) {
                            if ( A.col[k] == j ) {
                                tmp = A.val[k];
                            }
                        }
                        magma_dprintval( tmp );
                    }
                    printf( "\n");
                }
                printf( "     .                     .         .         .\n"
                        "     .                         .         .         .\n"
                        "     .                             .         .         .\n"
                        "     .                                 .         .         .\n" );
                for( i=A.num_rows-4; i < A.num_rows; i++ ) {
                    // lower left corner
                    for( j=0; j < 4; j++ ) {
                        double tmp = MAGMA_D_ZERO;
                        magma_index_t rbound = min( A.row[i]+4, A.row[i+1]);
                        magma_index_t lbound = max( A.row[i], A.row[i]);
                        for( k=lbound; k < rbound; k++ ) {
                            if ( A.col[k] == j ) {
                                tmp = A.val[k];
                            }
                        }
                        magma_dprintval( tmp );
                    }
                    printf( "             ");
                    // lower right corner
                    for( j=A.num_cols-4; j < A.num_cols; j++ ) {
                        double tmp = MAGMA_D_ZERO;
                        magma_index_t rbound = min( A.row[i+1], A.row[i+1]);
                        magma_index_t lbound = max( A.row[i+1]-4, A.row[i]);
                        for( k=lbound; k < rbound; k++ ) {
                            if ( A.col[k] == j ) {
                                tmp = A.val[k];
                            }
                        }
                        magma_dprintval( tmp );
                    }
                    printf( "\n");
                }
            }
        }
        else {
            CHECK( magma_dmconvert( A, &C, A.storage_type, Magma_CSR, queue ));
            CHECK( magma_dprint_matrix(  C, queue ));
        }
    }
    else {
        //magma_d_matrix C={Magma_CSR};
        CHECK( magma_dmtransfer( A, &C, A.memory_location, Magma_CPU, queue ));
        CHECK( magma_dprint_matrix(  C, queue ));
    }

cleanup:
    magma_dmfree( &C, queue );
    return info;
}
Exemplo n.º 8
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing any solver
*/
int main(  int argc, char** argv )
{
    magma_int_t info = 0;
    TESTING_INIT();

    magma_dopts zopts;
    magma_queue_t queue=NULL;
    magma_queue_create( 0, &queue );
    
    real_Double_t res;
    magma_d_matrix Z={Magma_CSR}, A={Magma_CSR}, AT={Magma_CSR}, 
    A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR};
    
    magma_index_t *comm_i=NULL;
    double *comm_v=NULL;
    magma_int_t start, end;
    
    int i=1;
    CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue ));

    B.blocksize = zopts.blocksize;
    B.alignment = zopts.alignment;

    while( i < argc ) {
        if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
            i++;
            magma_int_t laplace_size = atoi( argv[i] );
            CHECK( magma_dm_5stencil(  laplace_size, &Z, queue ));
        } else {                        // file-matrix test
            CHECK( magma_d_csr_mtx( &Z,  argv[i], queue ));
        }

        printf("%% matrix info: %d-by-%d with %d nonzeros\n",
                            int(Z.num_rows), int(Z.num_cols), int(Z.nnz) );
        
        // slice matrix
        CHECK( magma_index_malloc_cpu( &comm_i, Z.num_rows ) );
        CHECK( magma_dmalloc_cpu( &comm_v, Z.num_rows ) );
        
        CHECK( magma_dmslice( 1, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) );    
        magma_dprint_matrix( A2, queue );
        magma_dprint_matrix( AT, queue );
        magma_dprint_matrix( B, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&AT, queue );
        magma_dmfree(&B, queue );

        CHECK( magma_dmslice( 9, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) );    
        magma_dprint_matrix( A2, queue );
        magma_dprint_matrix( AT, queue );
        magma_dprint_matrix( B, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&AT, queue );
        magma_dmfree(&B, queue );
        
        CHECK( magma_dmslice( 9, 1, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) );    
        magma_dprint_matrix( A2, queue );
        magma_dprint_matrix( AT, queue );
        magma_dprint_matrix( B, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&AT, queue );
        magma_dmfree(&B, queue );

        CHECK( magma_dmslice( 9, 8, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) );    
        magma_dprint_matrix( A2, queue );
        magma_dprint_matrix( AT, queue );
        magma_dprint_matrix( B, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&AT, queue );
        magma_dmfree(&B, queue );
        
        
        // scale matrix
        CHECK( magma_dmscale( &Z, zopts.scaling, queue ));

        // remove nonzeros in matrix
        CHECK( magma_dmcsrcompressor( &Z, queue ));
        
        // convert to be non-symmetric
        CHECK( magma_dmconvert( Z, &A, Magma_CSR, Magma_CSRL, queue ));
        
        // transpose
        CHECK( magma_dmtranspose( A, &AT, queue ));

        // convert, copy back and forth to check everything works

        CHECK( magma_dmconvert( AT, &B, Magma_CSR, zopts.output_format, queue ));
        magma_dmfree(&AT, queue );
        CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ));
        magma_dmfree(&B, queue );
        CHECK( magma_dmcsrcompressor_gpu( &B_d, queue ));
        CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue ));
        magma_dmfree(&B_d, queue );
        CHECK( magma_dmconvert( B, &AT, zopts.output_format,Magma_CSR, queue ));
        magma_dmfree(&B, queue );

        // transpose back
        CHECK( magma_dmtranspose( AT, &A2, queue ));
        magma_dmfree(&AT, queue );
        CHECK( magma_dmdiff( A, A2, &res, queue));
        printf("%% ||A-B||_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("%% tester:  ok\n");
        else
            printf("%% tester:  failed\n");
        
        magma_free_cpu( comm_i );
        magma_free_cpu( comm_v );
        comm_i=NULL;
        comm_v=NULL;
        magma_dmfree(&A, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&Z, queue );

        i++;
    }

cleanup:
    magma_free_cpu( comm_i );
    magma_free_cpu( comm_v );
    magma_dmfree(&AT, queue );
    magma_dmfree(&A, queue );
    magma_dmfree(&B, queue );
    magma_dmfree(&B_d, queue );
    magma_dmfree(&A2, queue );
    magma_dmfree(&Z, queue );
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return info;
}
Exemplo n.º 9
0
magma_int_t
magma_dinitguess(
    magma_d_matrix A,
    magma_d_matrix *L,
    magma_d_matrix *U,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    double one = MAGMA_D_MAKE( 1.0, 0.0 );
    
    magma_d_matrix hAL={Magma_CSR}, hAU={Magma_CSR}, dAL={Magma_CSR}, 
    dAU={Magma_CSR}, dALU={Magma_CSR}, hALU={Magma_CSR}, hD={Magma_CSR}, 
    dD={Magma_CSR}, dL={Magma_CSR}, hL={Magma_CSR};
    magma_int_t i,j;
    
    magma_int_t offdiags = 0;
    magma_index_t *diag_offset;
    double *diag_vals=NULL;

    // need only lower triangular
    hAL.diagorder_type = Magma_VALUE;
    CHECK( magma_dmconvert( A, &hAL, Magma_CSR, Magma_CSRL, queue ));
    //magma_dmconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO );

    // need only upper triangular
    //magma_dmconvert( A, &hAU, Magma_CSR, Magma_CSRU );
    CHECK( magma_d_cucsrtranspose(  hAL, &hAU, queue ));
    //magma_dmconvert( hAU, &hAUCOO, Magma_CSR, Magma_CSRCOO );
    CHECK( magma_dmtransfer( hAL, &dAL, Magma_CPU, Magma_DEV, queue ));
    CHECK( magma_d_spmm( one, dAL, dAU, &dALU, queue ));
    CHECK( magma_dmtransfer( dALU, &hALU, Magma_DEV, Magma_CPU, queue ));

    magma_dmfree( &dAU, queue);
    magma_dmfree( &dALU, queue);


    CHECK( magma_dmalloc_cpu( &diag_vals, offdiags+1 ));
    CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 ));
    diag_offset[0] = 0;
    diag_vals[0] = MAGMA_D_MAKE( 1.0, 0.0 );
    CHECK( magma_dmgenerator( hALU.num_rows, offdiags, diag_offset, diag_vals, &hD, queue ));
    magma_dmfree( &hALU, queue );

    
    for(i=0; i<hALU.num_rows; i++){
        for(j=hALU.row[i]; j<hALU.row[i+1]; j++){
            if( hALU.col[j] == i ){
                //printf("%d %d  %d == %d -> %f   -->", i, j, hALU.col[j], i, hALU.val[j]);
                hD.val[i] = MAGMA_D_MAKE(
                        1.0 / sqrt(fabs(MAGMA_D_REAL(hALU.val[j])))  , 0.0 );
                //printf("insert %f at %d\n", hD.val[i], i);
            }
        }
    }


    CHECK( magma_dmtransfer( hD, &dD, Magma_CPU, Magma_DEV, queue ));
    magma_dmfree( &hD, queue);

    CHECK( magma_d_spmm( one, dD, dAL, &dL, queue ));
    magma_dmfree( &dAL, queue );
    magma_dmfree( &dD, queue );



/*
    // check for diagonal = 1
    magma_d_matrix dLt={Magma_CSR}, dLL={Magma_CSR}, LL={Magma_CSR};
    CHECK( magma_d_cucsrtranspose(  dL, &dLt ));
    CHECK( magma_dcuspmm( dL, dLt, &dLL ));
    CHECK( magma_dmtransfer( dLL, &LL, Magma_DEV, Magma_CPU ));
    //for(i=0; i < hALU.num_rows; i++) {
    for(i=0; i < 100; i++) {
        for(j=hALU.row[i]; j < hALU.row[i+1]; j++) {
            if( hALU.col[j] == i ){
                printf("%d %d -> %f   -->", i, i, LL.val[j]);
            }
        }
    }
*/
    CHECK( magma_dmtransfer( dL, &hL, Magma_DEV, Magma_CPU, queue ));
    CHECK( magma_dmconvert( hL, L, Magma_CSR, Magma_CSRCOO, queue ));



cleanup:
    if( info !=0 ){
        magma_dmfree( L, queue  );
        magma_dmfree( U, queue  );
    }
    magma_dmfree( &dAU, queue);
    magma_dmfree( &dALU, queue);
    magma_dmfree( &dL, queue );
    magma_dmfree( &hL, queue );
    magma_dmfree( &dAL, queue );
    magma_dmfree( &dD, queue );
    magma_dmfree( &hD, queue);
    magma_dmfree( &hALU, queue );
    return info;
}
Exemplo n.º 10
0
magma_int_t
magma_dicres(
    magma_d_matrix A,
    magma_d_matrix C,
    magma_d_matrix CT,
    magma_d_matrix *LU,
    real_Double_t *res,
    real_Double_t *nonlinres,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    double tmp;
    real_Double_t tmp2;
    magma_int_t i,j,k;

    double one = MAGMA_D_MAKE( 1.0, 0.0 );
    
    magma_d_matrix L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR};
    
    *res = 0.0;
    *nonlinres = 0.0;

    CHECK( magma_dmtransfer( C, &L_d, Magma_CPU, Magma_DEV, queue ));
    CHECK( magma_dmtransfer( CT, &U_d, Magma_CPU, Magma_DEV, queue ));
    CHECK( magma_d_spmm( one, L_d, U_d, &LU_d, queue ));
    CHECK( magma_dmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue ));

    magma_dmfree( &LU_d, queue );

    // compute Frobenius norm of A-LU
    for(i=0; i<A.num_rows; i++){
        for(j=A.row[i]; j<A.row[i+1]; j++){
            magma_index_t lcol = A.col[j];
            for(k=LU->row[i]; k<LU->row[i+1]; k++){
                if( LU->col[k] == lcol ){

                    tmp = MAGMA_D_MAKE(
                        MAGMA_D_REAL( LU->val[k] )- MAGMA_D_REAL( A.val[j] )
                                                , 0.0 );
                    LU->val[k] = tmp;

                    tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(tmp) );
                    (*nonlinres) = (*nonlinres) + tmp2*tmp2;
                }
            }
        }
    }

    for(i=0; i<LU->num_rows; i++){
        for(j=LU->row[i]; j<LU->row[i+1]; j++){
            tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(LU->val[j]) );
            (*res) = (*res) + tmp2* tmp2;
        }
    }


    (*res) =  sqrt((*res));
    (*nonlinres) =  sqrt((*nonlinres));

cleanup:
    if( info !=0 ){
        magma_dmfree( LU, queue  );
    }
    magma_dmfree( &L_d, queue  );
    magma_dmfree( &U_d, queue  );
    magma_dmfree( &LU_d, queue  );
    return info;
}
Exemplo n.º 11
0
extern "C" magma_int_t
magma_dpbicg(
    magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
    magma_d_solver_par *solver_par,
    magma_d_preconditioner *precond_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_PBICG;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;

    // some useful variables
    double c_zero = MAGMA_D_ZERO;
    double c_one  = MAGMA_D_ONE;
    double c_neg_one = MAGMA_D_NEG_ONE;
    
    magma_int_t dofs = A.num_rows * b.num_cols;

    // workspace
    magma_d_matrix r={Magma_CSR}, rt={Magma_CSR}, p={Magma_CSR}, pt={Magma_CSR}, 
                z={Magma_CSR}, zt={Magma_CSR}, q={Magma_CSR}, y={Magma_CSR}, 
                yt={Magma_CSR},  qt={Magma_CSR};
                
    // need to transpose the matrix
    magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR};
    
    CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &rt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &qt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &yt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // solver variables
    double alpha, rho, beta, rho_new, ptq;
    double res, nomb, nom0, r0;

        // transpose the matrix
    magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransposeconjugate( Ah2, &Ah1, queue );
    magma_dmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue );
    magma_dmfree(&Ah2, queue );
    
    // solver setup
    CHECK(  magma_dresidualvec( A, b, *x, &r, &nom0, queue));
    res = nom0;
    solver_par->init_res = nom0;
    magma_dcopy( dofs, r.dval, 1, rt.dval, 1, queue );                  // rr = r
    rho_new = magma_ddot( dofs, rt.dval, 1, r.dval, 1, queue );             // rho=<rr,r>
    rho = alpha = MAGMA_D_MAKE( 1.0, 0. );

    nomb = magma_dnrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom0 < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }

    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );


    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    // start iteration
    do
    {
        solver_par->numiter++;

        CHECK( magma_d_applyprecond_left( MagmaNoTrans, A, r, &y, precond_par, queue ));
        CHECK( magma_d_applyprecond_right( MagmaNoTrans, A, y, &z, precond_par, queue ));
        CHECK( magma_d_applyprecond_right( MagmaTrans, A, rt, &yt, precond_par, queue ));
        CHECK( magma_d_applyprecond_left( MagmaTrans, A, yt, &zt, precond_par, queue ));
        //magma_dcopy( dofs, r.dval, 1 , y.dval, 1, queue );             // y=r
        //magma_dcopy( dofs, y.dval, 1 , z.dval, 1, queue );             // z=y
        //magma_dcopy( dofs, rt.dval, 1 , yt.dval, 1, queue );           // yt=rt
        //magma_dcopy( dofs, yt.dval, 1 , zt.dval, 1, queue );           // yt=rt
        
        rho= rho_new;
        rho_new = magma_ddot( dofs, rt.dval, 1, z.dval, 1, queue );  // rho=<rt,z>
        if( magma_d_isnan_inf( rho_new ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        if( solver_par->numiter==1 ){
            magma_dcopy( dofs, z.dval, 1 , p.dval, 1, queue );           // yt=rt
            magma_dcopy( dofs, zt.dval, 1 , pt.dval, 1, queue );           // zt=yt
        } else {
            beta = rho_new/rho;
            magma_dscal( dofs, beta, p.dval, 1, queue );                 // p = beta*p
            magma_daxpy( dofs, c_one , z.dval, 1 , p.dval, 1, queue );   // p = z+beta*p
            magma_dscal( dofs, MAGMA_D_CONJ(beta), pt.dval, 1, queue );   // pt = beta*pt
            magma_daxpy( dofs, c_one , zt.dval, 1 , pt.dval, 1, queue );  // pt = zt+beta*pt
        }
        CHECK( magma_d_spmv( c_one, A, p, c_zero, q, queue ));      // v = Ap
        CHECK( magma_d_spmv( c_one, AT, pt, c_zero, qt, queue ));   // v = Ap
        solver_par->spmv_count++;
        solver_par->spmv_count++;
        ptq = magma_ddot( dofs, pt.dval, 1, q.dval, 1, queue );
        alpha = rho_new /ptq;
        
        
        magma_daxpy( dofs, alpha, p.dval, 1 , x->dval, 1, queue );                // x=x+alpha*p
        magma_daxpy( dofs, c_neg_one * alpha, q.dval, 1 , r.dval, 1, queue );     // r=r+alpha*q
        magma_daxpy( dofs, c_neg_one * MAGMA_D_CONJ(alpha), qt.dval, 1 , rt.dval, 1, queue );     // r=r+alpha*q

        res = magma_dnrm2( dofs, r.dval, 1, queue );

        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }

        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            break;
        }
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    CHECK(  magma_dresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose==0 ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_dmfree(&r, queue );
    magma_dmfree(&rt, queue );
    magma_dmfree(&p, queue );
    magma_dmfree(&pt, queue );
    magma_dmfree(&q, queue );
    magma_dmfree(&qt, queue );
    magma_dmfree(&y, queue );
    magma_dmfree(&yt, queue );
    magma_dmfree(&z, queue );
    magma_dmfree(&zt, queue );
    magma_dmfree(&AT, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmfree(&Ah2, queue );

    solver_par->info = info;
    return info;
}   /* magma_dpbicg */
Exemplo n.º 12
0
extern "C" magma_int_t
magma_dmscale(
    magma_d_matrix *A,
    magma_scale_t scaling,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    double *tmp=NULL;
    
    magma_d_matrix hA={Magma_CSR}, CSRA={Magma_CSR};
    
    if( A->num_rows != A->num_cols && scaling != Magma_NOSCALE ){
        printf("%% warning: non-square matrix.\n");
        printf("%% Fallback: no scaling.\n");
        scaling = Magma_NOSCALE;
    } 
        
   
    if ( A->memory_location == Magma_CPU && A->storage_type == Magma_CSRCOO ) {
        if ( scaling == Magma_NOSCALE ) {
            // no scale
            ;
        }
        else if( A->num_rows == A->num_cols ){
            if ( scaling == Magma_UNITROW ) {
                // scale to unit rownorm
                CHECK( magma_dmalloc_cpu( &tmp, A->num_rows ));
                for( magma_int_t z=0; z<A->num_rows; z++ ) {
                    double s = MAGMA_D_MAKE( 0.0, 0.0 );
                    for( magma_int_t f=A->row[z]; f<A->row[z+1]; f++ )
                        s+= MAGMA_D_REAL(A->val[f])*MAGMA_D_REAL(A->val[f]);
                    tmp[z] = MAGMA_D_MAKE( 1.0/sqrt(  MAGMA_D_REAL( s )  ), 0.0 );
                }        
                for( magma_int_t z=0; z<A->nnz; z++ ) {
                    A->val[z] = A->val[z] * tmp[A->col[z]] * tmp[A->rowidx[z]];
                }
            }
            else if (scaling == Magma_UNITDIAG ) {
                // scale to unit diagonal
                CHECK( magma_dmalloc_cpu( &tmp, A->num_rows ));
                for( magma_int_t z=0; z<A->num_rows; z++ ) {
                    double s = MAGMA_D_MAKE( 0.0, 0.0 );
                    for( magma_int_t f=A->row[z]; f<A->row[z+1]; f++ ) {
                        if ( A->col[f]== z ) {
                            // add some identity matrix
                            //A->val[f] = A->val[f] +  MAGMA_D_MAKE( 100000.0, 0.0 );
                            s = A->val[f];
                        }
                    }
                    if ( s == MAGMA_D_MAKE( 0.0, 0.0 ) ){
                        printf("%%error: zero diagonal element.\n");
                        info = MAGMA_ERR;
                    }
                    tmp[z] = MAGMA_D_MAKE( 1.0/sqrt(  MAGMA_D_REAL( s )  ), 0.0 );
                }
                for( magma_int_t z=0; z<A->nnz; z++ ) {
                    A->val[z] = A->val[z] * tmp[A->col[z]] * tmp[A->rowidx[z]];
                }
            }
            else {
                printf( "%%error: scaling not supported.\n" );
                info = MAGMA_ERR_NOT_SUPPORTED;
            }
        }
        else {
            printf( "%%error: scaling not supported.\n" );
            info = MAGMA_ERR_NOT_SUPPORTED;
        }
    }
    else {
        magma_storage_t A_storage = A->storage_type;
        magma_location_t A_location = A->memory_location;
        CHECK( magma_dmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue ));
        CHECK( magma_dmconvert( hA, &CSRA, hA.storage_type, Magma_CSRCOO, queue ));

        CHECK( magma_dmscale( &CSRA, scaling, queue ));

        magma_dmfree( &hA, queue );
        magma_dmfree( A, queue );
        CHECK( magma_dmconvert( CSRA, &hA, Magma_CSRCOO, A_storage, queue ));
        CHECK( magma_dmtransfer( hA, A, Magma_CPU, A_location, queue ));
    }
    
cleanup:
    magma_free_cpu( tmp );
    magma_dmfree( &hA, queue );
    magma_dmfree( &CSRA, queue );
    return info;
}
Exemplo n.º 13
0
extern "C" magma_int_t
magma_dqmr_merge(
    magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
    magma_d_solver_par *solver_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_QMRMERGE;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    // local variables
    double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE;
    // solver variables
    double nom0, r0, res=0, nomb;
    double rho = c_one, rho1 = c_one, eta = -c_one , pds = c_one, 
                        thet = c_one, thet1 = c_one, epsilon = c_one, 
                        beta = c_one, delta = c_one, pde = c_one, rde = c_one,
                        gamm = c_one, gamm1 = c_one, psi = c_one;
    
    magma_int_t dofs = A.num_rows* b.num_cols;

    // need to transpose the matrix
    magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR};
    
    // GPU workspace
    magma_d_matrix r={Magma_CSR}, r_tld={Magma_CSR},
                    v={Magma_CSR}, w={Magma_CSR}, wt={Magma_CSR},
                    d={Magma_CSR}, s={Magma_CSR}, z={Magma_CSR}, q={Magma_CSR}, 
                    p={Magma_CSR}, pt={Magma_CSR}, y={Magma_CSR};
    CHECK( magma_dvinit( &r, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &r_tld, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &v, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &w, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &wt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &d, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &s, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &z, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &q, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &p, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &pt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &y, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // solver setup
    CHECK(  magma_dresidualvec( A, b, *x, &r, &nom0, queue));
    solver_par->init_res = nom0;
    magma_dcopy( dofs, r.dval, 1, r_tld.dval, 1, queue );   
    magma_dcopy( dofs, r.dval, 1, y.dval, 1, queue );   
    magma_dcopy( dofs, r.dval, 1, v.dval, 1, queue );  
    magma_dcopy( dofs, r.dval, 1, wt.dval, 1, queue );   
    magma_dcopy( dofs, r.dval, 1, z.dval, 1, queue );  
    
    // transpose the matrix
    magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransposeconjugate( Ah2, &Ah1, queue );
    magma_dmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue );
    magma_dmfree(&Ah2, queue );
    
    nomb = magma_dnrm2( dofs, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom0 < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }

    psi = magma_dsqrt( magma_ddot( dofs, z.dval, 1, z.dval, 1, queue ));
    rho = magma_dsqrt( magma_ddot( dofs, y.dval, 1, y.dval, 1, queue ));
    
        // v = y / rho
        // y = y / rho
        // w = wt / psi
        // z = z / psi
    magma_dqmr_1(  
    r.num_rows, 
    r.num_cols, 
    rho,
    psi,
    y.dval, 
    z.dval,
    v.dval,
    w.dval,
    queue );
    
    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    // start iteration
    do
    {
        solver_par->numiter++;
        if( magma_d_isnan_inf( rho ) || magma_d_isnan_inf( psi ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
 
            // delta = z' * y;
        delta = magma_ddot( dofs, z.dval, 1, y.dval, 1, queue );
        
        if( magma_d_isnan_inf( delta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        // no precond: yt = y, zt = z
        //magma_dcopy( dofs, y.dval, 1, yt.dval, 1 );
        //magma_dcopy( dofs, z.dval, 1, zt.dval, 1 );
        
        if( solver_par->numiter == 1 ){
                // p = y;
                // q = z;
            magma_dcopy( dofs, y.dval, 1, p.dval, 1, queue );
            magma_dcopy( dofs, z.dval, 1, q.dval, 1, queue );
        }
        else{
            pde = psi * delta / epsilon;
            rde = rho * MAGMA_D_CONJ(delta/epsilon);
            
                // p = y - pde * p
                // q = z - rde * q
            magma_dqmr_2(  
            r.num_rows, 
            r.num_cols, 
            pde,
            rde,
            y.dval,
            z.dval,
            p.dval, 
            q.dval, 
            queue );
        }
        if( magma_d_isnan_inf( rho ) || magma_d_isnan_inf( psi ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        CHECK( magma_d_spmv( c_one, A, p, c_zero, pt, queue ));
        solver_par->spmv_count++;
            // epsilon = q' * pt;
        epsilon = magma_ddot( dofs, q.dval, 1, pt.dval, 1, queue );
        beta = epsilon / delta;

        if( magma_d_isnan_inf( epsilon ) || magma_d_isnan_inf( beta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
            // v = pt - beta * v
            // y = v
        magma_dqmr_3(  
        r.num_rows, 
        r.num_cols, 
        beta,
        pt.dval,
        v.dval,
        y.dval,
        queue );
        
        
        rho1 = rho;      
            // rho = norm(y);
        rho = magma_dsqrt( magma_ddot( dofs, y.dval, 1, y.dval, 1, queue ));
        
            // wt = A' * q - beta' * w;
        CHECK( magma_d_spmv( c_one, AT, q, c_zero, wt, queue ));
        solver_par->spmv_count++;
        magma_daxpy( dofs, - MAGMA_D_CONJ( beta ), w.dval, 1, wt.dval, 1, queue );  
        
                    // no precond: z = wt
        magma_dcopy( dofs, wt.dval, 1, z.dval, 1, queue );
        


        thet1 = thet;        
        thet = rho / (gamm * MAGMA_D_MAKE( MAGMA_D_ABS(beta), 0.0 ));
        gamm1 = gamm;        
        
        gamm = c_one / magma_dsqrt(c_one + thet*thet);        
        eta = - eta * rho1 * gamm * gamm / (beta * gamm1 * gamm1);        

        if( magma_d_isnan_inf( thet ) || magma_d_isnan_inf( gamm ) || magma_d_isnan_inf( eta ) ){
            info = MAGMA_DIVERGENCE;
            break;
        }
        
        if( solver_par->numiter == 1 ){
            
                // d = eta * p + pds * d;
                // s = eta * pt + pds * d;
                // x = x + d;
                // r = r - s;
            magma_dqmr_4(  
            r.num_rows, 
            r.num_cols, 
            eta,
            p.dval,
            pt.dval,
            d.dval, 
            s.dval, 
            x->dval, 
            r.dval, 
            queue );
        }
        else{

            pds = (thet1 * gamm) * (thet1 * gamm);
            
                // d = eta * p + pds * d;
                // s = eta * pt + pds * d;
                // x = x + d;
                // r = r - s;
            magma_dqmr_5(  
            r.num_rows, 
            r.num_cols, 
            eta,
            pds,
            p.dval,
            pt.dval,
            d.dval, 
            s.dval, 
            x->dval, 
            r.dval, 
            queue );
        }
            // psi = norm(z);
        psi = magma_dsqrt( magma_ddot( dofs, z.dval, 1, z.dval, 1, queue ) );
        
        res = magma_dnrm2( dofs, r.dval, 1, queue );
        
        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        
        // v = y / rho
        // y = y / rho
        // w = wt / psi
        // z = z / psi
        magma_dqmr_1(  
        r.num_rows, 
        r.num_cols, 
        rho,
        psi,
        y.dval, 
        z.dval,
        v.dval,
        w.dval,
        queue );

        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            break;
        }
 
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    CHECK(  magma_dresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_dmfree(&r, queue );
    magma_dmfree(&r_tld, queue );
    magma_dmfree(&v,  queue );
    magma_dmfree(&w,  queue );
    magma_dmfree(&wt, queue );
    magma_dmfree(&d,  queue );
    magma_dmfree(&s,  queue );
    magma_dmfree(&z,  queue );
    magma_dmfree(&q,  queue );
    magma_dmfree(&p,  queue );
    magma_dmfree(&pt, queue );
    magma_dmfree(&y,  queue );
    magma_dmfree(&AT, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmfree(&Ah2, queue );


    
    solver_par->info = info;
    return info;
}   /* magma_dqmr_merge */
Exemplo n.º 14
0
magma_int_t
magma_dcustomilusetup(
    magma_d_matrix A,
    magma_d_matrix b,
    magma_d_preconditioner *precond,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    cusparseHandle_t cusparseHandle=NULL;
    cusparseMatDescr_t descrL=NULL;
    cusparseMatDescr_t descrU=NULL;
    
    magma_d_matrix hA={Magma_CSR};
    char preconditionermatrix[255];
    
    // first L
    snprintf( preconditionermatrix, sizeof(preconditionermatrix),
                "precondL.mtx" );
    
    CHECK( magma_d_csr_mtx( &hA, preconditionermatrix , queue) );
    CHECK( magma_dmtransfer( hA, &precond->L, Magma_CPU, Magma_DEV , queue ));
    // extract the diagonal of L into precond->d
    CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue ));
    CHECK( magma_dvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue ));

    magma_dmfree( &hA, queue );
    
    // now U
    snprintf( preconditionermatrix, sizeof(preconditionermatrix),
                "precondU.mtx" );

    CHECK( magma_d_csr_mtx( &hA, preconditionermatrix , queue) );
    CHECK( magma_dmtransfer( hA, &precond->U, Magma_CPU, Magma_DEV , queue ));
    // extract the diagonal of U into precond->d2
    CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue ));
    CHECK( magma_dvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue ));


    // CUSPARSE context //
    CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL ));
    CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL ));
    CHECK_CUSPARSE( cusparseDcsrsv_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows,
        precond->L.nnz, descrL,
        precond->L.val, precond->L.row, precond->L.col, precond->cuinfoL ));

    
    
    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU ));
    CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU ));
    CHECK_CUSPARSE( cusparseDcsrsv_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows,
        precond->U.nnz, descrU,
        precond->U.val, precond->U.row, precond->U.col, precond->cuinfoU ));

    
    cleanup:
        
    cusparseDestroy( cusparseHandle );
    cusparseDestroyMatDescr( descrL );
    cusparseDestroyMatDescr( descrU );
    cusparseHandle=NULL;
    descrL=NULL;
    descrU=NULL;    
    magma_dmfree( &hA, queue );
    
    return info;
}
Exemplo n.º 15
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing any solver
*/
int main(  int argc, char** argv )
{
    magma_int_t info = 0;
    TESTING_INIT();

    magma_dopts zopts;
    magma_queue_t queue=NULL;
    magma_queue_create( 0, &queue );

    real_Double_t res;
    magma_d_matrix A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, 
    B={Magma_CSR}, B_d={Magma_CSR};
    
    int i=1;
    real_Double_t start, end;
    CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue ));

    B.blocksize = zopts.blocksize;
    B.alignment = zopts.alignment;

    while( i < argc ) {
        if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
            i++;
            magma_int_t laplace_size = atoi( argv[i] );
            CHECK( magma_dm_5stencil(  laplace_size, &A, queue ));
        } else {                        // file-matrix test
            CHECK( magma_d_csr_mtx( &A,  argv[i], queue ));
        }

        printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n",
                            int(A.num_rows), int(A.num_cols), int(A.nnz) );

        // scale matrix
        CHECK( magma_dmscale( &A, zopts.scaling, queue ));

        // remove nonzeros in matrix
        start = magma_sync_wtime( queue );
        for (int j=0; j<10; j++)
            CHECK( magma_dmcsrcompressor( &A, queue ));
        end = magma_sync_wtime( queue );
        printf( " > MAGMA CPU: %.2e seconds.\n", (end-start)/10 );
        // transpose
        CHECK( magma_dmtranspose( A, &AT, queue ));

        // convert, copy back and forth to check everything works
        CHECK( magma_dmconvert( AT, &B, Magma_CSR, Magma_CSR, queue ));
        magma_dmfree(&AT, queue );
        CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ));
        magma_dmfree(&B, queue );

        start = magma_sync_wtime( queue );
        for (int j=0; j<10; j++)
            CHECK( magma_dmcsrcompressor_gpu( &B_d, queue ));
        end = magma_sync_wtime( queue );
        printf( " > MAGMA GPU: %.2e seconds.\n", (end-start)/10 );


        CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue ));
        magma_dmfree(&B_d, queue );
        CHECK( magma_dmconvert( B, &AT, Magma_CSR, Magma_CSR, queue ));
        magma_dmfree(&B, queue );

        // transpose back
        CHECK( magma_dmtranspose( AT, &A2, queue ));
        magma_dmfree(&AT, queue );
        CHECK( magma_dmdiff( A, A2, &res, queue ));
        printf("%% ||A-B||_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("%% tester matrix compressor:  ok\n");
        else
            printf("%% tester matrix compressor:  failed\n");

        magma_dmfree(&A, queue );
        magma_dmfree(&A2, queue );

        i++;
    }
    
cleanup:
    magma_dmfree(&AT, queue );
    magma_dmfree(&B, queue );
    magma_dmfree(&A, queue );
    magma_dmfree(&A2, queue );
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return info;
}
Exemplo n.º 16
0
extern "C" magma_int_t
magma_dlsqr(
    magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
    magma_d_solver_par *solver_par,
    magma_d_preconditioner *precond_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;
    
    // prepare solver feedback
    solver_par->solver = Magma_LSQR;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    
    magma_int_t m = A.num_rows * b.num_cols;
    magma_int_t n = A.num_cols * b.num_cols;
    
    // local variables
    double c_zero = MAGMA_D_ZERO, c_one = MAGMA_D_ONE;
    // solver variables
    double s, nom0, r0, res=0, nomb, phibar, beta, alpha, c, rho, rhot, phi, thet, normr, normar, norma, sumnormd2, normd;

    // need to transpose the matrix
    magma_d_matrix AT={Magma_CSR}, Ah1={Magma_CSR}, Ah2={Magma_CSR};
    
    // GPU workspace
    magma_d_matrix r={Magma_CSR},
                    v={Magma_CSR}, z={Magma_CSR}, zt={Magma_CSR},
                    d={Magma_CSR}, vt={Magma_CSR}, q={Magma_CSR}, 
                    w={Magma_CSR}, u={Magma_CSR};
    CHECK( magma_dvinit( &r, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &v, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &z, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &d, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &vt,Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &q, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &w, Magma_DEV, A.num_cols, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &u, Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));
    CHECK( magma_dvinit( &zt,Magma_DEV, A.num_rows, b.num_cols, c_zero, queue ));

    
    // transpose the matrix
    magma_dmtransfer( A, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransposeconjugate( Ah2, &Ah1, queue );
    magma_dmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransfer( Ah2, &AT, Magma_CPU, Magma_DEV, queue );
    magma_dmfree(&Ah2, queue );
    

    
    // solver setup
    CHECK(  magma_dresidualvec( A, b, *x, &r, &nom0, queue));
    solver_par->init_res = nom0;
    nomb = magma_dnrm2( m, b.dval, 1, queue );
    if ( nomb == 0.0 ){
        nomb=1.0;
    }       
    if ( (r0 = nomb * solver_par->rtol) < ATOLERANCE ){
        r0 = ATOLERANCE;
    }
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nom0;
        solver_par->timing[0] = 0.0;
    }
    if ( nom0 < r0 ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }
    magma_dcopy( m, b.dval, 1, u.dval, 1, queue );  
    beta = magma_dnrm2( m, u.dval, 1, queue );
    magma_dscal( m, MAGMA_D_MAKE(1./beta, 0.0 ), u.dval, 1, queue );
    normr = beta;
    c = 1.0;
    s = 0.0;
    phibar = beta;
    CHECK( magma_d_spmv( c_one, AT, u, c_zero, v, queue ));
    
    if( precond_par->solver == Magma_NONE ){
        ;
    } else {
      CHECK( magma_d_applyprecond_right( MagmaTrans, A, v, &zt, precond_par, queue ));
      CHECK( magma_d_applyprecond_left( MagmaTrans, A, zt, &v, precond_par, queue ));
    }
    alpha = magma_dnrm2( n, v.dval, 1, queue );
    magma_dscal( n, MAGMA_D_MAKE(1./alpha, 0.0 ), v.dval, 1, queue );
    normar = alpha * beta;
    norma = 0;
    sumnormd2 = 0;
        
    //Chronometry
    real_Double_t tempo1, tempo2;
    tempo1 = magma_sync_wtime( queue );
    solver_par->numiter = 0;
    // start iteration
    do
    {
        solver_par->numiter++;
        if( precond_par->solver == Magma_NONE || A.num_rows != A.num_cols ) {
            magma_dcopy( n, v.dval, 1 , z.dval, 1, queue );    
        } else {
            CHECK( magma_d_applyprecond_left( MagmaNoTrans, A, v, &zt, precond_par, queue ));
            CHECK( magma_d_applyprecond_right( MagmaNoTrans, A, zt, &z, precond_par, queue ));
        }
        //CHECK( magma_d_spmv( c_one, A, z, MAGMA_D_MAKE(-alpha,0.0), u, queue ));
        CHECK( magma_d_spmv( c_one, A, z, c_zero, zt, queue ));
        magma_dscal( m, MAGMA_D_MAKE(-alpha, 0.0 ), u.dval, 1, queue ); 
        magma_daxpy( m, c_one, zt.dval, 1, u.dval, 1, queue );
        
        solver_par->spmv_count++;
        beta = magma_dnrm2( m, u.dval, 1, queue );
        magma_dscal( m, MAGMA_D_MAKE(1./beta, 0.0 ), u.dval, 1, queue ); 
        // norma = norm([norma alpha beta]);
        norma = sqrt(norma*norma + alpha*alpha + beta*beta );
        
        //lsvec( solver_par->numiter-1 ) = normar / norma;
        
        thet = -s * alpha;
        rhot = c * alpha;
        rho = sqrt( rhot * rhot + beta * beta );
        c = rhot / rho;
        s = - beta / rho;
        phi = c * phibar;
        phibar = s * phibar;
        
        // d = (z - thet * d) / rho;
        magma_dscal( n, MAGMA_D_MAKE(-thet, 0.0 ), d.dval, 1, queue ); 
        magma_daxpy( n, c_one, z.dval, 1, d.dval, 1, queue );
        magma_dscal( n, MAGMA_D_MAKE(1./rho, 0.0 ), d.dval, 1, queue );
        normd = magma_dnrm2( n, d.dval, 1, queue );
        sumnormd2 = sumnormd2 + normd*normd;
        
        // convergence check
        res = normr;        
        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        // check for convergence in A*x=b
        if ( res/nomb <= solver_par->rtol || res <= solver_par->atol ){
            info = MAGMA_SUCCESS;
            break;
        }
        // check for convergence in min{|b-A*x|}
        if ( A.num_rows != A.num_cols &&
               ( normar/(norma*normr) <= solver_par->rtol || normar <= solver_par->atol ) ){
            printf("%% warning: quit from minimization convergence check.\n");
            info = MAGMA_SUCCESS;
            break;
        }
        
        magma_daxpy( n, MAGMA_D_MAKE( phi, 0.0 ), d.dval, 1, x->dval, 1, queue );
        normr = fabs(s) * normr;
        CHECK( magma_d_spmv( c_one, AT, u, c_zero, vt, queue ));
        solver_par->spmv_count++;
        if( precond_par->solver == Magma_NONE ){
            ;    
        } else {
            CHECK( magma_d_applyprecond_right( MagmaTrans, A, vt, &zt, precond_par, queue ));
            CHECK( magma_d_applyprecond_left( MagmaTrans, A, zt, &vt, precond_par, queue ));
        }

        magma_dscal( n, MAGMA_D_MAKE(-beta, 0.0 ), v.dval, 1, queue ); 
        magma_daxpy( n, c_one, vt.dval, 1, v.dval, 1, queue );
        alpha = magma_dnrm2( n, v.dval, 1, queue );
        magma_dscal( n, MAGMA_D_MAKE(1./alpha, 0.0 ), v.dval, 1, queue ); 
        normar = alpha * fabs(s*phi);
         
    }
    while ( solver_par->numiter+1 <= solver_par->maxiter );
    
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t) tempo2-tempo1;
    double residual;
    CHECK(  magma_dresidualvec( A, b, *x, &r, &residual, queue));
    solver_par->iter_res = res;
    solver_par->final_res = residual;

    if ( solver_par->numiter < solver_par->maxiter && info == MAGMA_SUCCESS ) {
        info = MAGMA_SUCCESS;
    } else if ( solver_par->init_res > solver_par->final_res ) {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_SLOW_CONVERGENCE;
        if( solver_par->iter_res < solver_par->rtol*solver_par->init_res ||
            solver_par->iter_res < solver_par->atol ) {
            info = MAGMA_SUCCESS;
        }
    }
    else {
        if ( solver_par->verbose > 0 ) {
            if ( (solver_par->numiter)%solver_par->verbose == c_zero ) {
                solver_par->res_vec[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) res;
                solver_par->timing[(solver_par->numiter)/solver_par->verbose]
                        = (real_Double_t) tempo2-tempo1;
            }
        }
        info = MAGMA_DIVERGENCE;
    }
    
cleanup:
    magma_dmfree(&r, queue );
    magma_dmfree(&v,  queue );
    magma_dmfree(&z,  queue );
    magma_dmfree(&zt, queue );
    magma_dmfree(&d,  queue );
    magma_dmfree(&vt,  queue );
    magma_dmfree(&q,  queue );
    magma_dmfree(&u,  queue );
    magma_dmfree(&w,  queue );
    magma_dmfree(&AT, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmfree(&Ah2, queue );

    
    solver_par->info = info;
    return info;
}   /* magma_dqmr */
Exemplo n.º 17
0
extern "C" magma_int_t
magma_didr(
    magma_d_matrix A, magma_d_matrix b, magma_d_matrix *x,
    magma_d_solver_par *solver_par,
    magma_queue_t queue )
{
    magma_int_t info = MAGMA_NOTCONVERGED;

    // prepare solver feedback
    solver_par->solver = Magma_IDR;
    solver_par->numiter = 0;
    solver_par->spmv_count = 0;
    solver_par->init_res = 0.0;
    solver_par->final_res = 0.0;
    solver_par->iter_res = 0.0;
    solver_par->runtime = 0.0;

    // constants
    const double c_zero = MAGMA_D_ZERO;
    const double c_one = MAGMA_D_ONE;
    const double c_n_one = MAGMA_D_NEG_ONE;

    // internal user parameters
    const magma_int_t smoothing = 1;   // 0 = disable, 1 = enable
    const double angle = 0.7;          // [0-1]

    // local variables
    magma_int_t iseed[4] = {0, 0, 0, 1};
    magma_int_t dof;
    magma_int_t s;
    magma_int_t distr;
    magma_int_t k, i, sk;
    magma_int_t innerflag;
    double residual;
    double nrm;
    double nrmb;
    double nrmr;
    double nrmt;
    double rho;
    double om;
    double tt;
    double tr;
    double gamma;
    double alpha;
    double mkk;
    double fk;

    // matrices and vectors
    magma_d_matrix dxs = {Magma_CSR};
    magma_d_matrix dr = {Magma_CSR}, drs = {Magma_CSR};
    magma_d_matrix dP = {Magma_CSR}, dP1 = {Magma_CSR};
    magma_d_matrix dG = {Magma_CSR};
    magma_d_matrix dU = {Magma_CSR};
    magma_d_matrix dM = {Magma_CSR};
    magma_d_matrix df = {Magma_CSR};
    magma_d_matrix dt = {Magma_CSR};
    magma_d_matrix dc = {Magma_CSR};
    magma_d_matrix dv = {Magma_CSR};
    magma_d_matrix dbeta = {Magma_CSR}, hbeta = {Magma_CSR};

    // chronometry
    real_Double_t tempo1, tempo2;

    // initial s space
    // TODO: add option for 's' (shadow space number)
    // Hack: uses '--restart' option as the shadow space number.
    //       This is not a good idea because the default value of restart option is used to detect
    //       if the user provided a custom restart. This means that if the default restart value
    //       is changed then the code will think it was the user (unless the default value is
    //       also updated in the 'if' statement below.
    s = 1;
    if ( solver_par->restart != 50 ) {
        if ( solver_par->restart > A.num_cols ) {
            s = A.num_cols;
        } else {
            s = solver_par->restart;
        }
    }
    solver_par->restart = s;

    // set max iterations
    solver_par->maxiter = min( 2 * A.num_cols, solver_par->maxiter );

    // check if matrix A is square
    if ( A.num_rows != A.num_cols ) {
        //printf("Matrix A is not square.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }

    // |b|
    nrmb = magma_dnrm2( b.num_rows, b.dval, 1, queue );
    if ( nrmb == 0.0 ) {
        magma_dscal( x->num_rows, MAGMA_D_ZERO, x->dval, 1, queue );
        info = MAGMA_SUCCESS;
        goto cleanup;
    }

    // r = b - A x
    CHECK( magma_dvinit( &dr, Magma_DEV, b.num_rows, 1, c_zero, queue ));
    CHECK( magma_dresidualvec( A, b, *x, &dr, &nrmr, queue ));
    
    // |r|
    solver_par->init_res = nrmr;
    solver_par->final_res = solver_par->init_res;
    solver_par->iter_res = solver_par->init_res;
    if ( solver_par->verbose > 0 ) {
        solver_par->res_vec[0] = (real_Double_t)nrmr;
    }

    // check if initial is guess good enough
    if ( nrmr <= solver_par->atol ||
        nrmr/nrmb <= solver_par->rtol ) {
        info = MAGMA_SUCCESS;
        goto cleanup;
    }

    // P = randn(n, s)
    // P = ortho(P)
//---------------------------------------
    // P = 0.0
    CHECK( magma_dvinit( &dP, Magma_CPU, A.num_cols, s, c_zero, queue ));

    // P = randn(n, s)
    distr = 3;        // 1 = unif (0,1), 2 = unif (-1,1), 3 = normal (0,1) 
    dof = dP.num_rows * dP.num_cols;
    lapackf77_dlarnv( &distr, iseed, &dof, dP.val );

    // transfer P to device
    CHECK( magma_dmtransfer( dP, &dP1, Magma_CPU, Magma_DEV, queue ));
    magma_dmfree( &dP, queue );

    // P = ortho(P1)
    if ( dP1.num_cols > 1 ) {
        // P = magma_dqr(P1), QR factorization
        CHECK( magma_dqr( dP1.num_rows, dP1.num_cols, dP1, dP1.ld, &dP, NULL, queue ));
    } else {
        // P = P1 / |P1|
        nrm = magma_dnrm2( dof, dP1.dval, 1, queue );
        nrm = 1.0 / nrm;
        magma_dscal( dof, nrm, dP1.dval, 1, queue );
        CHECK( magma_dmtransfer( dP1, &dP, Magma_DEV, Magma_DEV, queue ));
    }
    magma_dmfree( &dP1, queue );
//---------------------------------------

    // allocate memory for the scalar products
    CHECK( magma_dvinit( &hbeta, Magma_CPU, s, 1, c_zero, queue ));
    CHECK( magma_dvinit( &dbeta, Magma_DEV, s, 1, c_zero, queue ));

    // smoothing enabled
    if ( smoothing > 0 ) {
        // set smoothing solution vector
        CHECK( magma_dmtransfer( *x, &dxs, Magma_DEV, Magma_DEV, queue ));

        // set smoothing residual vector
        CHECK( magma_dmtransfer( dr, &drs, Magma_DEV, Magma_DEV, queue ));
    }

    // G(n,s) = 0
    CHECK( magma_dvinit( &dG, Magma_DEV, A.num_cols, s, c_zero, queue ));

    // U(n,s) = 0
    CHECK( magma_dvinit( &dU, Magma_DEV, A.num_cols, s, c_zero, queue ));

    // M(s,s) = I
    CHECK( magma_dvinit( &dM, Magma_DEV, s, s, c_zero, queue ));
    magmablas_dlaset( MagmaFull, s, s, c_zero, c_one, dM.dval, s, queue );

    // f = 0
    CHECK( magma_dvinit( &df, Magma_DEV, dP.num_cols, 1, c_zero, queue ));

    // t = 0
    CHECK( magma_dvinit( &dt, Magma_DEV, dr.num_rows, 1, c_zero, queue ));

    // c = 0
    CHECK( magma_dvinit( &dc, Magma_DEV, dM.num_cols, 1, c_zero, queue ));

    // v = 0
    CHECK( magma_dvinit( &dv, Magma_DEV, dr.num_rows, 1, c_zero, queue ));

    //--------------START TIME---------------
    // chronometry
    tempo1 = magma_sync_wtime( queue );
    if ( solver_par->verbose > 0 ) {
        solver_par->timing[0] = 0.0;
    }

    om = MAGMA_D_ONE;
    innerflag = 0;

    // start iteration
    do
    {
        solver_par->numiter++;
    
        // new RHS for small systems
        // f = P' r
        magmablas_dgemv( MagmaConjTrans, dP.num_rows, dP.num_cols, c_one, dP.dval, dP.ld, dr.dval, 1, c_zero, df.dval, 1, queue );

        // shadow space loop
        for ( k = 0; k < s; ++k ) {
            sk = s - k;
    
            // f(k:s) = M(k:s,k:s) c(k:s)
            magma_dcopyvector( sk, &df.dval[k], 1, &dc.dval[k], 1, queue );
            magma_dtrsv( MagmaLower, MagmaNoTrans, MagmaNonUnit, sk, &dM.dval[k*dM.ld+k], dM.ld, &dc.dval[k], 1, queue );

            // v = r - G(:,k:s) c(k:s)
            magma_dcopyvector( dr.num_rows, dr.dval, 1, dv.dval, 1, queue );
            magmablas_dgemv( MagmaNoTrans, dG.num_rows, sk, c_n_one, &dG.dval[k*dG.ld], dG.ld, &dc.dval[k], 1, c_one, dv.dval, 1, queue );

            // U(:,k) = om * v + U(:,k:s) c(k:s)
            magmablas_dgemv( MagmaNoTrans, dU.num_rows, sk, c_one, &dU.dval[k*dU.ld], dU.ld, &dc.dval[k], 1, om, dv.dval, 1, queue );
            magma_dcopyvector( dU.num_rows, dv.dval, 1, &dU.dval[k*dU.ld], 1, queue );

            // G(:,k) = A U(:,k)
            CHECK( magma_d_spmv( c_one, A, dv, c_zero, dv, queue ));
            solver_par->spmv_count++;
            magma_dcopyvector( dG.num_rows, dv.dval, 1, &dG.dval[k*dG.ld], 1, queue );

            // bi-orthogonalize the new basis vectors
            for ( i = 0; i < k; ++i ) {
                // alpha = P(:,i)' G(:,k)
                alpha = magma_ddot( dP.num_rows, &dP.dval[i*dP.ld], 1, &dG.dval[k*dG.ld], 1, queue );

                // alpha = alpha / M(i,i)
                magma_dgetvector( 1, &dM.dval[i*dM.ld+i], 1, &mkk, 1, queue );
                alpha = alpha / mkk;

                // G(:,k) = G(:,k) - alpha * G(:,i)
                magma_daxpy( dG.num_rows, -alpha, &dG.dval[i*dG.ld], 1, &dG.dval[k*dG.ld], 1, queue );

                // U(:,k) = U(:,k) - alpha * U(:,i)
                magma_daxpy( dU.num_rows, -alpha, &dU.dval[i*dU.ld], 1, &dU.dval[k*dU.ld], 1, queue );
            }

            // new column of M = P'G, first k-1 entries are zero
            // M(k:s,k) = P(:,k:s)' G(:,k)
            magmablas_dgemv( MagmaConjTrans, dP.num_rows, sk, c_one, &dP.dval[k*dP.ld], dP.ld, &dG.dval[k*dG.ld], 1, c_zero, &dM.dval[k*dM.ld+k], 1, queue );

            // check M(k,k) == 0
            magma_dgetvector( 1, &dM.dval[k*dM.ld+k], 1, &mkk, 1, queue );
            if ( MAGMA_D_EQUAL(mkk, MAGMA_D_ZERO) ) {
                innerflag = 1;
                info = MAGMA_DIVERGENCE;
                break;
            }

            // beta = f(k) / M(k,k)
            magma_dgetvector( 1, &df.dval[k], 1, &fk, 1, queue );
            hbeta.val[k] = fk / mkk;

            // check for nan
            if ( magma_d_isnan( hbeta.val[k] ) || magma_d_isinf( hbeta.val[k] )) {
                innerflag = 1;
                info = MAGMA_DIVERGENCE;
                break;
            }

            // r = r - beta * G(:,k)
            magma_daxpy( dr.num_rows, -hbeta.val[k], &dG.dval[k*dG.ld], 1, dr.dval, 1, queue );

            // smoothing disabled
            if ( smoothing <= 0 ) {
                // |r|
                nrmr = magma_dnrm2( dr.num_rows, dr.dval, 1, queue );

            // smoothing enabled
            } else {
                // x = x + beta * U(:,k)
                magma_daxpy( x->num_rows, hbeta.val[k], &dU.dval[k*dU.ld], 1, x->dval, 1, queue );

                // smoothing operation
//---------------------------------------
                // t = rs - r
                magma_dcopyvector( drs.num_rows, drs.dval, 1, dt.dval, 1, queue );
                magma_daxpy( dt.num_rows, c_n_one, dr.dval, 1, dt.dval, 1, queue );

                // t't
                // t'rs 
                tt = magma_ddot( dt.num_rows, dt.dval, 1, dt.dval, 1, queue );
                tr = magma_ddot( dt.num_rows, dt.dval, 1, drs.dval, 1, queue );

                // gamma = (t' * rs) / (t' * t)
                gamma = tr / tt;

                // rs = rs - gamma * (rs - r) 
                magma_daxpy( drs.num_rows, -gamma, dt.dval, 1, drs.dval, 1, queue );

                // xs = xs - gamma * (xs - x) 
                magma_dcopyvector( dxs.num_rows, dxs.dval, 1, dt.dval, 1, queue );
                magma_daxpy( dt.num_rows, c_n_one, x->dval, 1, dt.dval, 1, queue );
                magma_daxpy( dxs.num_rows, -gamma, dt.dval, 1, dxs.dval, 1, queue );

                // |rs|
                nrmr = magma_dnrm2( drs.num_rows, drs.dval, 1, queue );           
//---------------------------------------
            }

            // store current timing and residual
            if ( solver_par->verbose > 0 ) {
                tempo2 = magma_sync_wtime( queue );
                if ( (solver_par->numiter) % solver_par->verbose == 0 ) {
                    solver_par->res_vec[(solver_par->numiter) / solver_par->verbose]
                            = (real_Double_t)nrmr;
                    solver_par->timing[(solver_par->numiter) / solver_par->verbose]
                            = (real_Double_t)tempo2 - tempo1;
                }
            }

            // check convergence
            if ( nrmr <= solver_par->atol ||
                nrmr/nrmb <= solver_par->rtol ) {
                s = k + 1; // for the x-update outside the loop
                innerflag = 2;
                info = MAGMA_SUCCESS;
                break;
            }

            // non-last s iteration
            if ( (k + 1) < s ) {
                // f(k+1:s) = f(k+1:s) - beta * M(k+1:s,k)
                magma_daxpy( sk-1, -hbeta.val[k], &dM.dval[k*dM.ld+(k+1)], 1, &df.dval[k+1], 1, queue );
            }

        }

        // smoothing disabled
        if ( smoothing <= 0 && innerflag != 1 ) {
            // update solution approximation x
            // x = x + U(:,1:s) * beta(1:s)
            magma_dsetvector( s, hbeta.val, 1, dbeta.dval, 1, queue );
            magmablas_dgemv( MagmaNoTrans, dU.num_rows, s, c_one, dU.dval, dU.ld, dbeta.dval, 1, c_one, x->dval, 1, queue );
        }

        // check convergence or iteration limit or invalid result of inner loop
        if ( innerflag > 0 ) {
            break;
        }

        // t = A v
        // t = A r
        CHECK( magma_d_spmv( c_one, A, dr, c_zero, dt, queue ));
        solver_par->spmv_count++;

        // computation of a new omega
//---------------------------------------
        // |t|
        nrmt = magma_dnrm2( dt.num_rows, dt.dval, 1, queue );

        // t'r 
        tr = magma_ddot( dt.num_rows, dt.dval, 1, dr.dval, 1, queue );

        // rho = abs(t' * r) / (|t| * |r|))
        rho = MAGMA_D_ABS( MAGMA_D_REAL(tr) / (nrmt * nrmr) );

        // om = (t' * r) / (|t| * |t|)
        om = tr / (nrmt * nrmt);
        if ( rho < angle ) {
            om = (om * angle) / rho;
        }
//---------------------------------------
        if ( MAGMA_D_EQUAL(om, MAGMA_D_ZERO) ) {
            info = MAGMA_DIVERGENCE;
            break;
        }

        // update approximation vector
        // x = x + om * v
        // x = x + om * r
        magma_daxpy( x->num_rows, om, dr.dval, 1, x->dval, 1, queue );

        // update residual vector
        // r = r - om * t
        magma_daxpy( dr.num_rows, -om, dt.dval, 1, dr.dval, 1, queue );

        // smoothing disabled
        if ( smoothing <= 0 ) {
            // residual norm
            nrmr = magma_dnrm2( b.num_rows, dr.dval, 1, queue );

        // smoothing enabled
        } else {
            // smoothing operation
//---------------------------------------
            // t = rs - r
            magma_dcopyvector( drs.num_rows, drs.dval, 1, dt.dval, 1, queue );
            magma_daxpy( dt.num_rows, c_n_one, dr.dval, 1, dt.dval, 1, queue );

            // t't
            // t'rs
            tt = magma_ddot( dt.num_rows, dt.dval, 1, dt.dval, 1, queue );
            tr = magma_ddot( dt.num_rows, dt.dval, 1, drs.dval, 1, queue );

            // gamma = (t' * rs) / (|t| * |t|)
            gamma = tr / tt;

            // rs = rs - gamma * (rs - r) 
            magma_daxpy( drs.num_rows, -gamma, dt.dval, 1, drs.dval, 1, queue );

            // xs = xs - gamma * (xs - x) 
            magma_dcopyvector( dxs.num_rows, dxs.dval, 1, dt.dval, 1, queue );
            magma_daxpy( dt.num_rows, c_n_one, x->dval, 1, dt.dval, 1, queue );
            magma_daxpy( dxs.num_rows, -gamma, dt.dval, 1, dxs.dval, 1, queue );

            // |rs|
            nrmr = magma_dnrm2( b.num_rows, drs.dval, 1, queue );           
//---------------------------------------
        }

        // store current timing and residual
        if ( solver_par->verbose > 0 ) {
            tempo2 = magma_sync_wtime( queue );
            if ( (solver_par->numiter) % solver_par->verbose == 0 ) {
                solver_par->res_vec[(solver_par->numiter) / solver_par->verbose]
                        = (real_Double_t)nrmr;
                solver_par->timing[(solver_par->numiter) / solver_par->verbose]
                        = (real_Double_t)tempo2 - tempo1;
            }
        }

        // check convergence
        if ( nrmr <= solver_par->atol ||
            nrmr/nrmb <= solver_par->rtol ) { 
            info = MAGMA_SUCCESS;
            break;
        }
    }
    while ( solver_par->numiter + 1 <= solver_par->maxiter );

    // smoothing enabled
    if ( smoothing > 0 ) {
        // x = xs
        magma_dcopyvector( x->num_rows, dxs.dval, 1, x->dval, 1, queue );

        // r = rs
        magma_dcopyvector( dr.num_rows, drs.dval, 1, dr.dval, 1, queue );
    }

    // get last iteration timing
    tempo2 = magma_sync_wtime( queue );
    solver_par->runtime = (real_Double_t)tempo2 - tempo1;
//--------------STOP TIME----------------

    // get final stats
    solver_par->iter_res = nrmr;
    CHECK( magma_dresidualvec( A, b, *x, &dr, &residual, queue ));
    solver_par->final_res = residual;

    // set solver conclusion
    if ( info != MAGMA_SUCCESS && info != MAGMA_DIVERGENCE ) {
        if ( solver_par->init_res > solver_par->final_res ) {
            info = MAGMA_SLOW_CONVERGENCE;
        }
    }


cleanup:
    // free resources
    // smoothing enabled
    if ( smoothing > 0 ) {
        magma_dmfree( &dxs, queue );
        magma_dmfree( &drs, queue );
    }
    magma_dmfree( &dr, queue );
    magma_dmfree( &dP, queue );
    magma_dmfree( &dP1, queue );
    magma_dmfree( &dG, queue );
    magma_dmfree( &dU, queue );
    magma_dmfree( &dM, queue );
    magma_dmfree( &df, queue );
    magma_dmfree( &dt, queue );
    magma_dmfree( &dc, queue );
    magma_dmfree( &dv, queue );
    magma_dmfree( &dbeta, queue );
    magma_dmfree( &hbeta, queue );

    solver_par->info = info;
    return info;
    /* magma_didr */
}
Exemplo n.º 18
0
extern "C" magma_int_t
magma_dmlumerge(
    magma_d_matrix L,
    magma_d_matrix U,
    magma_d_matrix *A,
    magma_queue_t queue )
{
    magma_int_t info = 0;    

    if( L.storage_type == Magma_CSR && U.storage_type == Magma_CSR ){
        if( L.memory_location == Magma_CPU && U.memory_location == Magma_CPU ){
            CHECK( magma_dmtransfer( L, A, Magma_CPU, Magma_CPU, queue ));
            magma_free_cpu( A->col );
            magma_free_cpu( A->val );
            // make sure it is strictly lower triangular
            magma_int_t z = 0;
            for(magma_int_t i=0; i<A->num_rows; i++){
                for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){
                    if( L.col[j] < i ){// make sure it is strictly lower triangular
                        z++;
                    }
                }
                for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){
                    z++;
                }
            }
            A->nnz = z;
            // fill A with the new structure;
            CHECK( magma_index_malloc_cpu( &A->col, A->nnz ));
            CHECK( magma_dmalloc_cpu( &A->val, A->nnz ));
            z = 0;
            for(magma_int_t i=0; i<A->num_rows; i++){
                A->row[i] = z;
                for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){
                    if( L.col[j] < i ){// make sure it is strictly lower triangular
                        A->col[z] = L.col[j];
                        A->val[z] = L.val[j];
                        z++;
                    }
                }
                for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){
                    A->col[z] = U.col[j];
                    A->val[z] = U.val[j];
                    z++;
                }
            }
            A->row[A->num_rows] = z;
            A->nnz = z;
        }
        else {
            printf("error: matrix not on CPU.\n"); 
            info = MAGMA_ERR_NOT_SUPPORTED;
        }
    }
    else {
            printf("error: matrix in wrong format.\n"); 
            info = MAGMA_ERR_NOT_SUPPORTED;
    }
cleanup:
    if( info != 0 ){
        magma_dmfree( A, queue );
    }
    return info;
}
Exemplo n.º 19
0
magma_int_t
magma_dsymbilu(
    magma_d_matrix *A,
    magma_int_t levels,
    magma_d_matrix *L,
    magma_d_matrix *U,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_d_matrix A_copy={Magma_CSR}, B={Magma_CSR};
    magma_d_matrix hA={Magma_CSR}, CSRCOOA={Magma_CSR};
    
    if( A->memory_location == Magma_CPU && A->storage_type == Magma_CSR ){
        CHECK( magma_dmtransfer( *A, &A_copy, Magma_CPU, Magma_CPU, queue ));
        CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue ));

        // possibility to scale to unit diagonal
        //magma_dmscale( &B, Magma_UNITDIAG );

        CHECK( magma_dmconvert( B, L, Magma_CSR, Magma_CSR , queue));
        CHECK( magma_dmconvert( B, U, Magma_CSR, Magma_CSR, queue ));

        magma_int_t num_lnnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz;
        magma_int_t num_unnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz;

        magma_free_cpu( L->col );
        magma_free_cpu( U->col );
        CHECK( magma_index_malloc_cpu( &L->col, num_lnnz ));
        CHECK( magma_index_malloc_cpu( &U->col, num_unnz ));

        magma_dsymbolic_ilu( levels, A->num_rows, &num_lnnz, &num_unnz, B.row, B.col,
                                            L->row, L->col, U->row, U->col );
        L->nnz = num_lnnz;
        U->nnz = num_unnz;
        magma_free_cpu( L->val );
        magma_free_cpu( U->val );
        CHECK( magma_dmalloc_cpu( &L->val, L->nnz ));
        CHECK( magma_dmalloc_cpu( &U->val, U->nnz ));
        for( magma_int_t i=0; i<L->nnz; i++ )
            L->val[i] = MAGMA_D_MAKE( 0.0, 0.0 );

        for( magma_int_t i=0; i<U->nnz; i++ )
            U->val[i] = MAGMA_D_MAKE( 0.0, 0.0 );
        // take the original values (scaled) as initial guess for L
        for(magma_int_t i=0; i<L->num_rows; i++){
            for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){
                magma_index_t lcol = B.col[j];
                for(magma_int_t k=L->row[i]; k<L->row[i+1]; k++){
                    if( L->col[k] == lcol ){
                        L->val[k] =  B.val[j];
                    }
                }
            }
        }

        // take the original values (scaled) as initial guess for U
        for(magma_int_t i=0; i<U->num_rows; i++){
            for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){
                magma_index_t lcol = B.col[j];
                for(magma_int_t k=U->row[i]; k<U->row[i+1]; k++){
                    if( U->col[k] == lcol ){
                        U->val[k] =  B.val[j];
                    }
                }
            }
        }
        magma_dmfree( &B, queue );
        // fill A with the new structure;
        magma_free_cpu( A->col );
        magma_free_cpu( A->val );
        CHECK( magma_index_malloc_cpu( &A->col, L->nnz+U->nnz ));
        CHECK( magma_dmalloc_cpu( &A->val, L->nnz+U->nnz ));
        A->nnz = L->nnz+U->nnz;
        
        magma_int_t z = 0;
        for(magma_int_t i=0; i<A->num_rows; i++){
            A->row[i] = z;
            for(magma_int_t j=L->row[i]; j<L->row[i+1]; j++){
                A->col[z] = L->col[j];
                A->val[z] = L->val[j];
                z++;
            }
            for(magma_int_t j=U->row[i]; j<U->row[i+1]; j++){
                A->col[z] = U->col[j];
                A->val[z] = U->val[j];
                z++;
            }
        }
        A->row[A->num_rows] = z;
        // reset the values of A to the original entries
        for(magma_int_t i=0; i<A->num_rows; i++){
            for(magma_int_t j=A_copy.row[i]; j<A_copy.row[i+1]; j++){
                magma_index_t lcol = A_copy.col[j];
                for(magma_int_t k=A->row[i]; k<A->row[i+1]; k++){
                    if( A->col[k] == lcol ){
                        A->val[k] =  A_copy.val[j];
                    }
                }
            }
        }
    }
    else {
        magma_storage_t A_storage = A->storage_type;
        magma_location_t A_location = A->memory_location;
        CHECK( magma_dmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue ));
        CHECK( magma_dmconvert( hA, &CSRCOOA, hA.storage_type, Magma_CSR, queue ));

        CHECK( magma_dsymbilu( &CSRCOOA, levels, L, U, queue ));

        magma_dmfree( &hA, queue );
        magma_dmfree( A, queue );
        CHECK( magma_dmconvert( CSRCOOA, &hA, Magma_CSR, A_storage, queue ));
        CHECK( magma_dmtransfer( hA, A, Magma_CPU, A_location, queue ));
    }
    
cleanup:
    if( info != 0 ){
        magma_dmfree( L, queue );
        magma_dmfree( U, queue );
    }
    magma_dmfree( &A_copy, queue );
    magma_dmfree( &B, queue );
    magma_dmfree( &hA, queue );
    magma_dmfree( &CSRCOOA, queue );
    return info;
}
Exemplo n.º 20
0
extern "C" magma_int_t
magma_dcumilugeneratesolverinfo(
    magma_d_preconditioner *precond,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    cusparseHandle_t cusparseHandle=NULL;
    cusparseMatDescr_t descrL=NULL;
    cusparseMatDescr_t descrU=NULL;
    
    magma_d_matrix hA={Magma_CSR}, hL={Magma_CSR}, hU={Magma_CSR};
    
    if (precond->L.memory_location != Magma_DEV ){
        CHECK( magma_dmtransfer( precond->M, &hA,
        precond->M.memory_location, Magma_CPU, queue ));

        hL.diagorder_type = Magma_UNITY;
        CHECK( magma_dmconvert( hA, &hL , Magma_CSR, Magma_CSRL, queue ));
        hU.diagorder_type = Magma_VALUE;
        CHECK( magma_dmconvert( hA, &hU , Magma_CSR, Magma_CSRU, queue ));
        CHECK( magma_dmtransfer( hL, &(precond->L), Magma_CPU, Magma_DEV, queue ));
        CHECK( magma_dmtransfer( hU, &(precond->U), Magma_CPU, Magma_DEV, queue ));
        
        magma_dmfree(&hA, queue );
        magma_dmfree(&hL, queue );
        magma_dmfree(&hU, queue );
    }
    
    // CUSPARSE context //
    CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
    CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));


    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL ));
    CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL ));
    CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->L.num_rows,
        precond->L.nnz, descrL,
        precond->L.dval, precond->L.drow, precond->L.dcol, precond->cuinfoL ));


    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU ));
    CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_UPPER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU ));
    CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->U.num_rows,
        precond->U.nnz, descrU,
        precond->U.dval, precond->U.drow, precond->U.dcol, precond->cuinfoU ));

    
    if( precond->maxiter < 50 ){
        //prepare for iterative solves

        // extract the diagonal of L into precond->d
        CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue ));
        CHECK( magma_dvinit( &precond->work1, Magma_DEV, precond->U.num_rows, 1, MAGMA_D_ZERO, queue ));
        
        // extract the diagonal of U into precond->d2
        CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue ));
        CHECK( magma_dvinit( &precond->work2, Magma_DEV, precond->U.num_rows, 1, MAGMA_D_ZERO, queue ));
    }
    
cleanup:
    cusparseDestroyMatDescr( descrL );
    cusparseDestroyMatDescr( descrU );
    cusparseDestroy( cusparseHandle );
     
    return info;
}
Exemplo n.º 21
0
extern "C" magma_int_t
magma_dcumilusetup_transpose(
    magma_d_matrix A,
    magma_d_preconditioner *precond,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    magma_d_matrix Ah1={Magma_CSR}, Ah2={Magma_CSR};
    cusparseHandle_t cusparseHandle=NULL;
    cusparseMatDescr_t descrLT=NULL;
    cusparseMatDescr_t descrUT=NULL;
    
    // CUSPARSE context //
    CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
    CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));

    // transpose the matrix
    magma_dmtransfer( precond->L, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransposeconjugate( Ah2, &Ah1, queue );
    magma_dmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransfer( Ah2, &(precond->LT), Magma_CPU, Magma_DEV, queue );
    magma_dmfree(&Ah2, queue );
    
    magma_dmtransfer( precond->U, &Ah1, Magma_DEV, Magma_CPU, queue );
    magma_dmconvert( Ah1, &Ah2, A.storage_type, Magma_CSR, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransposeconjugate( Ah2, &Ah1, queue );
    magma_dmfree(&Ah2, queue );
    Ah2.blocksize = A.blocksize;
    Ah2.alignment = A.alignment;
    magma_dmconvert( Ah1, &Ah2, Magma_CSR, A.storage_type, queue );
    magma_dmfree(&Ah1, queue );
    magma_dmtransfer( Ah2, &(precond->UT), Magma_CPU, Magma_DEV, queue );
    magma_dmfree(&Ah2, queue );
   
    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrLT ));
    CHECK_CUSPARSE( cusparseSetMatType( descrLT, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrLT, CUSPARSE_DIAG_TYPE_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrLT, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrLT, CUSPARSE_FILL_MODE_UPPER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoLT ));
    CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->LT.num_rows,
        precond->LT.nnz, descrLT,
        precond->LT.dval, precond->LT.drow, precond->LT.dcol, precond->cuinfoLT ));
    
    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrUT ));
    CHECK_CUSPARSE( cusparseSetMatType( descrUT, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrUT, CUSPARSE_DIAG_TYPE_NON_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrUT, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrUT, CUSPARSE_FILL_MODE_LOWER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoUT ));
    CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->UT.num_rows,
        precond->UT.nnz, descrUT,
        precond->UT.dval, precond->UT.drow, precond->UT.dcol, precond->cuinfoUT ));
cleanup:
    cusparseDestroyMatDescr( descrLT );
    cusparseDestroyMatDescr( descrUT );
    cusparseDestroy( cusparseHandle );
    magma_dmfree(&Ah1, queue );
    magma_dmfree(&Ah2, queue );

    return info;
}
Exemplo n.º 22
0
magma_int_t
magma_dnonlinres(
    magma_d_matrix A,
    magma_d_matrix L,
    magma_d_matrix U,
    magma_d_matrix *LU,
    real_Double_t *res,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    real_Double_t tmp2;
    magma_int_t i,j,k;
        
    double one = MAGMA_D_MAKE( 1.0, 0.0 );

    magma_d_matrix L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR}, A_t={Magma_CSR};

    CHECK( magma_dmtransfer( L, &L_d, Magma_CPU, Magma_DEV, queue  ));
    CHECK( magma_dmtransfer( U, &U_d, Magma_CPU, Magma_DEV, queue  ));
    CHECK( magma_dmtransfer( A, &A_t, Magma_CPU, Magma_CPU, queue  ));
    CHECK( magma_d_spmm( one, L_d, U_d, &LU_d, queue ));

    CHECK( magma_dmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue ));
    magma_dmfree( &L_d, queue  );
    magma_dmfree( &U_d, queue  );
    magma_dmfree( &LU_d, queue  );

    // compute Frobenius norm of A-LU
    for(i=0; i<A.num_rows; i++){
        for(j=A.row[i]; j<A.row[i+1]; j++){
            magma_index_t lcol = A.col[j];
            double newval = MAGMA_D_MAKE(0.0, 0.0);
            for(k=LU->row[i]; k<LU->row[i+1]; k++){
                if( LU->col[k] == lcol ){
                    newval = MAGMA_D_MAKE(
                        MAGMA_D_REAL( LU->val[k] )- MAGMA_D_REAL( A.val[j] )
                                                , 0.0 );
                }
            }
            A_t.val[j] = newval;
        }
    }

    for(i=0; i<A.num_rows; i++){
        for(j=A.row[i]; j<A.row[i+1]; j++){
            tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(A_t.val[j]) );
            (*res) = (*res) + tmp2* tmp2;
        }
    }

    magma_dmfree( LU, queue  );
    magma_dmfree( &A_t, queue  );

    (*res) =  sqrt((*res));
    
cleanup:
    if( info !=0 ){
        magma_dmfree( LU, queue  );
    }
    magma_dmfree( &A_t, queue  );
    magma_dmfree( &L_d, queue  );
    magma_dmfree( &U_d, queue  );
    magma_dmfree( &LU_d, queue  );
    return info;
}
Exemplo n.º 23
0
extern "C" magma_int_t
magma_dcumiccsetup(
    magma_d_matrix A,
    magma_d_preconditioner *precond,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    cusparseHandle_t cusparseHandle=NULL;
    cusparseMatDescr_t descrA=NULL;
    cusparseMatDescr_t descrL=NULL;
    cusparseMatDescr_t descrU=NULL;
#if CUDA_VERSION >= 7000
    csric02Info_t info_M=NULL;
    void *pBuffer = NULL;
#endif
    
    magma_d_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, U={Magma_CSR};
    CHECK( magma_dmtransfer( A, &hA, A.memory_location, Magma_CPU, queue ));
    U.diagorder_type = Magma_VALUE;
    CHECK( magma_dmconvert( hA, &hACSR, hA.storage_type, Magma_CSR, queue ));

    // in case using fill-in
    if( precond->levels > 0 ){
            magma_d_matrix hAL={Magma_CSR}, hAUt={Magma_CSR};
            CHECK( magma_dsymbilu( &hACSR, precond->levels, &hAL, &hAUt,  queue ));
            magma_dmfree(&hAL, queue);
            magma_dmfree(&hAUt, queue);
    }

    CHECK( magma_dmconvert( hACSR, &U, Magma_CSR, Magma_CSRL, queue ));
    magma_dmfree( &hACSR, queue );
    CHECK( magma_dmtransfer(U, &(precond->M), Magma_CPU, Magma_DEV, queue ));

    // CUSPARSE context //
    CHECK_CUSPARSE( cusparseCreate( &cusparseHandle ));
    CHECK_CUSPARSE( cusparseSetStream( cusparseHandle, queue->cuda_stream() ));
    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &(precond->cuinfo) ));
    // use kernel to manually check for zeros n the diagonal
    CHECK( magma_ddiagcheck( precond->M, queue ) );
        
#if CUDA_VERSION >= 7000
    // this version has the bug fixed where a zero on the diagonal causes a crash
    CHECK_CUSPARSE( cusparseCreateCsric02Info(&info_M) );
    CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO ));
    int buffersize;
    int structural_zero;
    int numerical_zero;
    
    CHECK_CUSPARSE(
    cusparseDcsric02_bufferSize( cusparseHandle,
                         precond->M.num_rows, precond->M.nnz, descrA,
                         precond->M.dval, precond->M.drow, precond->M.dcol,
                         info_M,
                         &buffersize ) );
    
    CHECK( magma_malloc((void**)&pBuffer, buffersize) );

    CHECK_CUSPARSE( cusparseDcsric02_analysis( cusparseHandle,
            precond->M.num_rows, precond->M.nnz, descrA,
            precond->M.dval, precond->M.drow, precond->M.dcol,
            info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer ));
    CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &numerical_zero ) );
    CHECK_CUSPARSE( cusparseXcsric02_zeroPivot( cusparseHandle, info_M, &structural_zero ) );

    CHECK_CUSPARSE(
    cusparseDcsric02( cusparseHandle,
                         precond->M.num_rows, precond->M.nnz, descrA,
                         precond->M.dval, precond->M.drow, precond->M.dcol,
                         info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer) );    

#else
    // this version contains the bug but is needed for backward compability
    CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_SYMMETRIC ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrA, CUSPARSE_DIAG_TYPE_NON_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrA, CUSPARSE_FILL_MODE_LOWER ));
    
    CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle,
                CUSPARSE_OPERATION_NON_TRANSPOSE,
                precond->M.num_rows, precond->M.nnz, descrA,
                precond->M.dval, precond->M.drow, precond->M.dcol,
                precond->cuinfo ));
    CHECK_CUSPARSE( cusparseDcsric0( cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                      precond->M.num_rows, descrA,
                      precond->M.dval,
                      precond->M.drow,
                      precond->M.dcol,
                      precond->cuinfo ));
#endif

    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrL ));
    CHECK_CUSPARSE( cusparseSetMatType( descrL, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrL, CUSPARSE_DIAG_TYPE_NON_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrL, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrL, CUSPARSE_FILL_MODE_LOWER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoL ));
    CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle,
        CUSPARSE_OPERATION_NON_TRANSPOSE, precond->M.num_rows,
        precond->M.nnz, descrL,
        precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoL ));
    CHECK_CUSPARSE( cusparseCreateMatDescr( &descrU ));
    CHECK_CUSPARSE( cusparseSetMatType( descrU, CUSPARSE_MATRIX_TYPE_TRIANGULAR ));
    CHECK_CUSPARSE( cusparseSetMatDiagType( descrU, CUSPARSE_DIAG_TYPE_NON_UNIT ));
    CHECK_CUSPARSE( cusparseSetMatIndexBase( descrU, CUSPARSE_INDEX_BASE_ZERO ));
    CHECK_CUSPARSE( cusparseSetMatFillMode( descrU, CUSPARSE_FILL_MODE_LOWER ));
    CHECK_CUSPARSE( cusparseCreateSolveAnalysisInfo( &precond->cuinfoU ));
    CHECK_CUSPARSE( cusparseDcsrsm_analysis( cusparseHandle,
        CUSPARSE_OPERATION_TRANSPOSE, precond->M.num_rows,
        precond->M.nnz, descrU,
        precond->M.dval, precond->M.drow, precond->M.dcol, precond->cuinfoU ));

    if( precond->maxiter < 50 ){
        //prepare for iterative solves
        
        // copy the matrix to precond->L and (transposed) to precond->U
        CHECK( magma_dmtransfer(precond->M, &(precond->L), Magma_DEV, Magma_DEV, queue ));
        CHECK( magma_dmtranspose( precond->L, &(precond->U), queue ));
        
        // extract the diagonal of L into precond->d
        CHECK( magma_djacobisetup_diagscal( precond->L, &precond->d, queue ));
        CHECK( magma_dvinit( &precond->work1, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue ));
        
        // extract the diagonal of U into precond->d2
        CHECK( magma_djacobisetup_diagscal( precond->U, &precond->d2, queue ));
        CHECK( magma_dvinit( &precond->work2, Magma_DEV, hA.num_rows, 1, MAGMA_D_ZERO, queue ));
    }



/*
    // to enable also the block-asynchronous iteration for the triangular solves
    CHECK( magma_dmtransfer( precond->M, &hA, Magma_DEV, Magma_CPU, queue ));
    hA.storage_type = Magma_CSR;

    magma_d_matrix hD, hR, hAt

    CHECK( magma_dcsrsplit( 256, hA, &hD, &hR, queue ));

    CHECK( magma_dmtransfer( hD, &precond->LD, Magma_CPU, Magma_DEV, queue ));
    CHECK( magma_dmtransfer( hR, &precond->L, Magma_CPU, Magma_DEV, queue ));

    magma_dmfree(&hD, queue );
    magma_dmfree(&hR, queue );

    CHECK( magma_d_cucsrtranspose(   hA, &hAt, queue ));

    CHECK( magma_dcsrsplit( 256, hAt, &hD, &hR, queue ));

    CHECK( magma_dmtransfer( hD, &precond->UD, Magma_CPU, Magma_DEV, queue ));
    CHECK( magma_dmtransfer( hR, &precond->U, Magma_CPU, Magma_DEV, queue ));
    
    magma_dmfree(&hD, queue );
    magma_dmfree(&hR, queue );
    magma_dmfree(&hA, queue );
    magma_dmfree(&hAt, queue );
*/

cleanup:
#if CUDA_VERSION >= 7000
    magma_free( pBuffer );
    cusparseDestroyCsric02Info( info_M );
#endif
    cusparseDestroySolveAnalysisInfo( precond->cuinfo );
    cusparseDestroyMatDescr( descrL );
    cusparseDestroyMatDescr( descrU );
    cusparseDestroyMatDescr( descrA );
    cusparseDestroy( cusparseHandle );
    magma_dmfree(&U, queue );
    magma_dmfree(&hA, queue );

    return info;
}
Exemplo n.º 24
0
magma_int_t
magma_dilures(
    magma_d_matrix A,
    magma_d_matrix L,
    magma_d_matrix U,
    magma_d_matrix *LU,
    real_Double_t *res,
    real_Double_t *nonlinres,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    double tmp;
    real_Double_t tmp2;
    magma_int_t i,j,k;
    
    double one = MAGMA_D_MAKE( 1.0, 0.0 );

    magma_d_matrix LL={Magma_CSR}, L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR};

    if( L.row[1]==1 ){        // lower triangular with unit diagonal
        //printf("L lower triangular.\n");
        LL.diagorder_type = Magma_UNITY;
        CHECK( magma_dmconvert( L, &LL, Magma_CSR, Magma_CSRL, queue ));
    }
    else if( L.row[1]==0 ){ // strictly lower triangular
        //printf("L strictly lower triangular.\n");
        CHECK( magma_dmtransfer( L, &LL, Magma_CPU, Magma_CPU, queue ));
        magma_free_cpu( LL.col );
        magma_free_cpu( LL.val );
        LL.nnz = L.nnz+L.num_rows;
        CHECK( magma_dmalloc_cpu( &LL.val, LL.nnz ));
        CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz ));
        magma_int_t z=0;
        for( magma_int_t i=0; i<L.num_rows; i++){
            LL.row[i] = z;
            for( magma_int_t j=L.row[i]; j<L.row[i+1]; j++){
                LL.val[z] = L.val[j];
                LL.col[z] = L.col[j];
                z++;
            }
            // add unit diagonal
            LL.val[z] = MAGMA_D_MAKE(1.0, 0.0);
            LL.col[z] = i;
            z++;
        }
        LL.row[LL.num_rows] = z;
    }
    else{
        printf("error: L neither lower nor strictly lower triangular!\n");
    }

    CHECK( magma_dmtransfer( LL, &L_d, Magma_CPU, Magma_DEV, queue  ));
    CHECK( magma_dmtransfer( U, &U_d, Magma_CPU, Magma_DEV, queue  ));
    magma_dmfree( &LL, queue );
    CHECK( magma_d_spmm( one, L_d, U_d, &LU_d, queue ));



    CHECK( magma_dmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue ));
    magma_dmfree( &L_d, queue );
    magma_dmfree( &U_d, queue );
    magma_dmfree( &LU_d, queue );

    // compute Frobenius norm of A-LU
    for(i=0; i<A.num_rows; i++){
        for(j=A.row[i]; j<A.row[i+1]; j++){
            magma_index_t lcol = A.col[j];
            for(k=LU->row[i]; k<LU->row[i+1]; k++){
                if( LU->col[k] == lcol ){

                    tmp = MAGMA_D_MAKE(
                        MAGMA_D_REAL( LU->val[k] )- MAGMA_D_REAL( A.val[j] )
                                                , 0.0 );
                    LU->val[k] = tmp;

                    tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(tmp) );
                    (*nonlinres) = (*nonlinres) + tmp2*tmp2;
                }

            }
        }
    }

    for(i=0; i<LU->num_rows; i++){
        for(j=LU->row[i]; j<LU->row[i+1]; j++){
            tmp2 = (real_Double_t) fabs( MAGMA_D_REAL(LU->val[j]) );
            (*res) = (*res) + tmp2* tmp2;
        }
    }

    (*res) =  sqrt((*res));
    (*nonlinres) =  sqrt((*nonlinres));

cleanup:
    if( info !=0 ){
        magma_dmfree( LU, queue  );
    }
    magma_dmfree( &LL, queue );
    magma_dmfree( &L_d, queue  );
    magma_dmfree( &U_d, queue  );
    magma_dmfree( &LU_d, queue  );
    return info;
}