Example #1
0
extern "C" magma_int_t
magma_zrowentries(
    magma_z_matrix *A,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_index_t *length=NULL;
    magma_index_t i,j, maxrowlength=0;
    
    // check whether matrix on CPU
    if ( A->memory_location == Magma_CPU ) {
        // CSR
        if ( A->storage_type == Magma_CSR ) {
            CHECK( magma_index_malloc_cpu( &length, A->num_rows));
            for( i=0; i<A->num_rows; i++ ) {
                length[i] = A->row[i+1]-A->row[i];
                if (length[i] > maxrowlength)
                     maxrowlength = length[i];
            }
            A->max_nnz_row = maxrowlength;
        }
        // Dense
        else if ( A->storage_type == Magma_DENSE ) {
            CHECK( magma_index_malloc_cpu( &length, A->num_rows));

            for( i=0; i<A->num_rows; i++ ) {
                length[i] = 0;
                for( j=0; j<A->num_cols; j++ ) {
                    if ( MAGMA_Z_REAL( A->val[i*A->num_cols + j] ) != 0. )
                        length[i]++;
                    }
                if (length[i] > maxrowlength)
                     maxrowlength = length[i];
            }
            A->max_nnz_row = maxrowlength;
        }
    } // end CPU case

    else {
        printf("error: matrix not on CPU.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
    }
cleanup:
    magma_free( length );
    return info;
}
Example #2
0
magma_int_t
magma_zmLdiagadd(
    magma_z_matrix *L,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    magma_z_matrix LL={Magma_CSR};

    if( L->row[1]==1 ){        // lower triangular with unit diagonal
        //printf("L lower triangular.\n");
        LL.diagorder_type = Magma_UNITY;
        CHECK( magma_zmconvert( *L, &LL, Magma_CSR, Magma_CSRL, queue ));
    }
    else if ( L->row[1]==0 ){ // strictly lower triangular
        //printf("L strictly lower triangular.\n");
        CHECK( magma_zmtransfer( *L, &LL, Magma_CPU, Magma_CPU, queue ));
        magma_free_cpu( LL.col );
        magma_free_cpu( LL.val );
        LL.nnz = L->nnz+L->num_rows;
        CHECK( magma_zmalloc_cpu( &LL.val, LL.nnz ));
        CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz ));
        magma_int_t z=0;
        for( magma_int_t i=0; i<L->num_rows; i++){
            LL.row[i] = z;
            for( magma_int_t j=L->row[i]; j<L->row[i+1]; j++){
                LL.val[z] = L->val[j];
                LL.col[z] = L->col[j];
                z++;
            }
            // add unit diagonal
            LL.val[z] = MAGMA_Z_MAKE(1.0, 0.0);
            LL.col[z] = i;
            z++;
        }
        LL.row[LL.num_rows] = z;
        LL.nnz = z;
    }
    else {
        printf("error: L neither lower nor strictly lower triangular!\n");
    }
    magma_zmfree( L, queue );
    CHECK( magma_zmtransfer(LL, L, Magma_CPU, Magma_CPU, queue ));

cleanup:
    if( info != 0 ){
        magma_zmfree( L, queue );
    }
    magma_zmfree( &LL, queue );
    return info;
}
Example #3
0
magma_int_t
magma_ddomainoverlap(
    magma_index_t num_rows,
    magma_int_t *num_indices,
    magma_index_t *rowptr,
    magma_index_t *colidx,
    magma_index_t *x,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_int_t blocksize=128;

    magma_int_t row=0, col=0, num_ind=0, offset=0;

    magma_index_t *tmp_x;
    CHECK( magma_index_malloc_cpu( &tmp_x, blocksize ));

    for(magma_int_t i=0; i<blocksize; i++ ){
        tmp_x[i] = -1;
    }
    
    for(magma_int_t i=0; i<num_rows; i++){
        row = i;
        for(magma_int_t j=rowptr[row]; j<rowptr[row+1]; j++){
            col = colidx[j];
            int doubleitem = 0;
            for(magma_int_t k=0; k<blocksize; k++){
              if( tmp_x[k] == col )
                  doubleitem = 1;
            }
            if( doubleitem == 0 ){
                tmp_x[num_ind] = col;
                num_ind++;
                (*num_indices)++;
            }
            if( num_ind == blocksize || j == rowptr[num_rows]-1 ){
                magma_dindexcopy( num_ind, offset, tmp_x, x, queue );
                offset=offset+num_ind;
                num_ind = 0;
                break;
            }
        }
    }

cleanup:
    magma_free_cpu( tmp_x );
    return info;
}
Example #4
0
magma_int_t
magma_zinitguess(
    magma_z_matrix A,
    magma_z_matrix *L,
    magma_z_matrix *U,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    magmaDoubleComplex one = MAGMA_Z_MAKE( 1.0, 0.0 );
    
    magma_z_matrix hAL={Magma_CSR}, hAU={Magma_CSR}, dAL={Magma_CSR}, 
    dAU={Magma_CSR}, dALU={Magma_CSR}, hALU={Magma_CSR}, hD={Magma_CSR}, 
    dD={Magma_CSR}, dL={Magma_CSR}, hL={Magma_CSR};
    magma_int_t i,j;
    
    magma_int_t offdiags = 0;
    magma_index_t *diag_offset;
    magmaDoubleComplex *diag_vals=NULL;

    // need only lower triangular
    hAL.diagorder_type = Magma_VALUE;
    CHECK( magma_zmconvert( A, &hAL, Magma_CSR, Magma_CSRL, queue ));
    //magma_zmconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO );

    // need only upper triangular
    //magma_zmconvert( A, &hAU, Magma_CSR, Magma_CSRU );
    CHECK( magma_z_cucsrtranspose(  hAL, &hAU, queue ));
    //magma_zmconvert( hAU, &hAUCOO, Magma_CSR, Magma_CSRCOO );
    CHECK( magma_zmtransfer( hAL, &dAL, Magma_CPU, Magma_DEV, queue ));
    CHECK( magma_z_spmm( one, dAL, dAU, &dALU, queue ));
    CHECK( magma_zmtransfer( dALU, &hALU, Magma_DEV, Magma_CPU, queue ));

    magma_zmfree( &dAU, queue);
    magma_zmfree( &dALU, queue);


    CHECK( magma_zmalloc_cpu( &diag_vals, offdiags+1 ));
    CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 ));
    diag_offset[0] = 0;
    diag_vals[0] = MAGMA_Z_MAKE( 1.0, 0.0 );
    CHECK( magma_zmgenerator( hALU.num_rows, offdiags, diag_offset, diag_vals, &hD, queue ));
    magma_zmfree( &hALU, queue );

    
    for(i=0; i<hALU.num_rows; i++){
        for(j=hALU.row[i]; j<hALU.row[i+1]; j++){
            if( hALU.col[j] == i ){
                //printf("%d %d  %d == %d -> %f   -->", i, j, hALU.col[j], i, hALU.val[j]);
                hD.val[i] = MAGMA_Z_MAKE(
                        1.0 / sqrt(fabs(MAGMA_Z_REAL(hALU.val[j])))  , 0.0 );
                //printf("insert %f at %d\n", hD.val[i], i);
            }
        }
    }


    CHECK( magma_zmtransfer( hD, &dD, Magma_CPU, Magma_DEV, queue ));
    magma_zmfree( &hD, queue);

    CHECK( magma_z_spmm( one, dD, dAL, &dL, queue ));
    magma_zmfree( &dAL, queue );
    magma_zmfree( &dD, queue );



/*
    // check for diagonal = 1
    magma_z_matrix dLt={Magma_CSR}, dLL={Magma_CSR}, LL={Magma_CSR};
    CHECK( magma_z_cucsrtranspose(  dL, &dLt ));
    CHECK( magma_zcuspmm( dL, dLt, &dLL ));
    CHECK( magma_zmtransfer( dLL, &LL, Magma_DEV, Magma_CPU ));
    //for(i=0; i < hALU.num_rows; i++) {
    for(i=0; i < 100; i++) {
        for(j=hALU.row[i]; j < hALU.row[i+1]; j++) {
            if( hALU.col[j] == i ){
                printf("%d %d -> %f   -->", i, i, LL.val[j]);
            }
        }
    }
*/
    CHECK( magma_zmtransfer( dL, &hL, Magma_DEV, Magma_CPU, queue ));
    CHECK( magma_zmconvert( hL, L, Magma_CSR, Magma_CSRCOO, queue ));



cleanup:
    if( info !=0 ){
        magma_zmfree( L, queue  );
        magma_zmfree( U, queue  );
    }
    magma_zmfree( &dAU, queue);
    magma_zmfree( &dALU, queue);
    magma_zmfree( &dL, queue );
    magma_zmfree( &hL, queue );
    magma_zmfree( &dAL, queue );
    magma_zmfree( &dD, queue );
    magma_zmfree( &hD, queue);
    magma_zmfree( &hALU, queue );
    return info;
}
Example #5
0
magma_int_t
magma_zilures(
    magma_z_matrix A,
    magma_z_matrix L,
    magma_z_matrix U,
    magma_z_matrix *LU,
    real_Double_t *res,
    real_Double_t *nonlinres,
    magma_queue_t queue )
{
    magma_int_t info = 0;

    magmaDoubleComplex tmp;
    real_Double_t tmp2;
    magma_int_t i, j, k;
    
    magmaDoubleComplex one = MAGMA_Z_MAKE( 1.0, 0.0 );

    magma_z_matrix LL={Magma_CSR}, L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR};

    if( L.row[1]==1 ){        // lower triangular with unit diagonal
        //printf("L lower triangular.\n");
        LL.diagorder_type = Magma_UNITY;
        CHECK( magma_zmconvert( L, &LL, Magma_CSR, Magma_CSRL, queue ));
    }
    else if ( L.row[1]==0 ){ // strictly lower triangular
        //printf("L strictly lower triangular.\n");
        CHECK( magma_zmtransfer( L, &LL, Magma_CPU, Magma_CPU, queue ));
        magma_free_cpu( LL.col );
        magma_free_cpu( LL.val );
        LL.nnz = L.nnz+L.num_rows;
        CHECK( magma_zmalloc_cpu( &LL.val, LL.nnz ));
        CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz ));
        magma_int_t z=0;
        for (i=0; i < L.num_rows; i++) {
            LL.row[i] = z;
            for (j=L.row[i]; j < L.row[i+1]; j++) {
                LL.val[z] = L.val[j];
                LL.col[z] = L.col[j];
                z++;
            }
            // add unit diagonal
            LL.val[z] = MAGMA_Z_MAKE(1.0, 0.0);
            LL.col[z] = i;
            z++;
        }
        LL.row[LL.num_rows] = z;
    }
    else {
        printf("error: L neither lower nor strictly lower triangular!\n");
    }

    CHECK( magma_zmtransfer( LL, &L_d, Magma_CPU, Magma_DEV, queue  ));
    CHECK( magma_zmtransfer( U, &U_d, Magma_CPU, Magma_DEV, queue  ));
    magma_zmfree( &LL, queue );
    CHECK( magma_z_spmm( one, L_d, U_d, &LU_d, queue ));



    CHECK( magma_zmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue ));
    magma_zmfree( &L_d, queue );
    magma_zmfree( &U_d, queue );
    magma_zmfree( &LU_d, queue );

    // compute Frobenius norm of A-LU
    for(i=0; i<A.num_rows; i++){
        for(j=A.row[i]; j<A.row[i+1]; j++){
            magma_index_t lcol = A.col[j];
            for(k=LU->row[i]; k<LU->row[i+1]; k++){
                if( LU->col[k] == lcol ){
                    tmp = MAGMA_Z_MAKE(
                        MAGMA_Z_REAL( LU->val[k] )- MAGMA_Z_REAL( A.val[j] )
                                                , 0.0 );
                    LU->val[k] = tmp;

                    tmp2 = (real_Double_t) fabs( MAGMA_Z_REAL(tmp) );
                    (*nonlinres) = (*nonlinres) + tmp2*tmp2;
                }
            }
        }
    }

    for(i=0; i<LU->num_rows; i++){
        for(j=LU->row[i]; j<LU->row[i+1]; j++){
            tmp2 = (real_Double_t) fabs( MAGMA_Z_REAL(LU->val[j]) );
            (*res) = (*res) + tmp2* tmp2;
        }
    }

    (*res) =  sqrt((*res));
    (*nonlinres) =  sqrt((*nonlinres));

cleanup:
    if( info !=0 ){
        magma_zmfree( LU, queue  );
    }
    magma_zmfree( &LL, queue );
    magma_zmfree( &L_d, queue  );
    magma_zmfree( &U_d, queue  );
    magma_zmfree( &LU_d, queue  );
    return info;
}
Example #6
0
magma_int_t
magma_cmgenerator(
    magma_int_t n,
    magma_int_t offdiags,
    magma_index_t *diag_offset,
    magmaFloatComplex *diag_vals,
    magma_c_matrix *A,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_c_matrix B={Magma_CSR};
    
    B.val = NULL;
    B.col = NULL;
    B.row = NULL;
    B.rowidx = NULL;
    B.blockinfo = NULL;
    B.diag = NULL;
    B.dval = NULL;
    B.dcol = NULL;
    B.drow = NULL;
    B.drowidx = NULL;
    B.ddiag = NULL;
    B.list = NULL;
    B.dlist = NULL;
    
    B.num_rows = n;
    B.num_cols = n;
    B.fill_mode = MagmaFull;
    B.memory_location = Magma_CPU;
    B.storage_type = Magma_ELLPACKT;
    B.max_nnz_row = (2*offdiags+1);

    CHECK( magma_cmalloc_cpu( &B.val, B.max_nnz_row*n ));
    CHECK( magma_index_malloc_cpu( &B.col, B.max_nnz_row*n ));
    
    for( int i=0; i<n; i++ ) { // stride over rows
        // stride over the number of nonzeros in each row
        // left of diagonal
        for( int j=0; j<offdiags; j++ ) {
            B.val[ i*B.max_nnz_row + j ] = diag_vals[ offdiags - j ];
            B.col[ i*B.max_nnz_row + j ] = -1 * diag_offset[ offdiags-j ] + i;
        }
        // elements on the diagonal
        B.val[ i*B.max_nnz_row + offdiags ] = diag_vals[ 0 ];
        B.col[ i*B.max_nnz_row + offdiags ] = i;
        // right of diagonal
        for( int j=0; j<offdiags; j++ ) {
            B.val[ i*B.max_nnz_row + j + offdiags +1 ] = diag_vals[ j+1 ];
            B.col[ i*B.max_nnz_row + j + offdiags +1 ] = diag_offset[ j+1 ] + i;
        }
    }

    // set invalid entries to zero
    for( int i=0; i<n; i++ ) { // stride over rows
        for( int j=0; j<B.max_nnz_row; j++ ) { // nonzeros in every row
            if ( (B.col[i*B.max_nnz_row + j] < 0) ||
                    (B.col[i*B.max_nnz_row + j] >= n) ) {
                B.val[ i*B.max_nnz_row + j ] = MAGMA_C_MAKE( 0.0, 0.0 );
            }
        }
    }

    B.nnz = 0;

    for( int i=0; i<n; i++ ) { // stride over rows
        for( int j=0; j<B.max_nnz_row; j++ ) { // nonzeros in every row
            if ( MAGMA_C_REAL( B.val[i*B.max_nnz_row + j]) != 0.0 )
                B.nnz++;
        }
    }
    B.true_nnz = B.nnz;
    // converting it to CSR will remove the invalit entries completely
    CHECK( magma_cmconvert( B, A, Magma_ELLPACKT, Magma_CSR, queue ));

cleanup:
    if( info != 0 ){
        magma_cmfree( &B, queue );
    }
    return info;
}
Example #7
0
/**
    Purpose
    -------

    Computes the diameter of a sparse matrix and stores the value in diameter.


    Arguments
    ---------

    @param[in,out]
    A           magma_z_matrix*
                sparse matrix
    @param[in]
    queue       magma_queue_t
                Queue to execute in.

    @ingroup magmasparse_zaux
    ********************************************************************/
extern "C" magma_int_t
magma_zdiameter(
    magma_z_matrix *A,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_index_t i, j, tmp,  *dim=NULL, maxdim=0;
    
    // check whether matrix on CPU
    if ( A->memory_location == Magma_CPU ) {
        // CSR
        if ( A->storage_type == Magma_CSR ) {
            CHECK( magma_index_malloc_cpu( &dim, A->num_rows));
            for( i=0; i<A->num_rows; i++ ) {
                dim[i] = 0;
                for( j=A->row[i]; j<A->row[i+1]; j++ ) {
                   // if ( MAGMA_Z_REAL(A->val[j]) > THRESHOLD ) {
                        tmp = abs( i - A->col[j] );
                        if ( tmp > dim[i] )
                            dim[i] = tmp;
                   // }
                }
                if ( dim[i] > maxdim )
                     maxdim = dim[i];
            }
            A->diameter = maxdim;
        }
        // Dense
        else if ( A->storage_type == Magma_DENSE ) {
            magma_index_t i, j, tmp,  *dim, maxdim=0;
            CHECK( magma_index_malloc_cpu( &dim, A->num_rows));
            for( i=0; i<A->num_rows; i++ ) {
                dim[i] = 0;
                for( j=0; j<A->num_cols; j++ ) {
                    if ( MAGMA_Z_REAL( A->val[i*A->num_cols + j] ) !=  0.0 ) {
                        tmp = abs( i -j );
                        if ( tmp > dim[i] )
                            dim[i] = tmp;
                    }
                }
                if ( dim[i] > maxdim )
                     maxdim = dim[i];
            }
            A->diameter = maxdim;
        }
        // ELLPACK
        else if ( A->storage_type == Magma_ELL ) {
            CHECK( magma_index_malloc_cpu( &dim, A->num_rows));
            for( i=0; i<A->num_rows; i++ ) {
                dim[i] = 0;
                for( j=i*A->max_nnz_row; j<(i+1)*A->max_nnz_row; j++ ) {
                    if ( MAGMA_Z_REAL( A->val[j] ) > THRESHOLD ) {
                        tmp = abs( i - A->col[j] );
                        if ( tmp > dim[i] )
                            dim[i] = tmp;
                    }
                }
                if ( dim[i] > maxdim )
                     maxdim = dim[i];
            }
            A->diameter = maxdim;
        }
        // ELL
        else if ( A->storage_type == Magma_ELL ) {
            printf("error:format not supported.\n");
            info = MAGMA_ERR_NOT_SUPPORTED;
        }
    } // end CPU case

    else {
        printf("error: matrix not on CPU.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
    }
cleanup:
    magma_free( &dim );
    return info;
}
/* ////////////////////////////////////////////////////////////////////////////
   -- Debugging file
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magma_d_solver_par solver_par;
    magma_d_preconditioner precond_par;
    solver_par.epsilon = 10e-16;
    solver_par.maxiter = 1000;
    solver_par.verbose = 0;
    precond_par.solver = Magma_JACOBI;

    magma_dsolverinfo_init( &solver_par, &precond_par );
    
    double one = MAGMA_D_MAKE(1.0, 0.0);
    double zero = MAGMA_D_MAKE(0.0, 0.0);

    magma_d_sparse_matrix A, B, B_d;
    magma_d_vector x, b;

    // generate matrix of desired structure and size
    magma_int_t n=10;   // size is n*n
    magma_int_t nn = n*n;
    magma_int_t offdiags = 2;
    magma_index_t *diag_offset;
    double *diag_vals;
    magma_dmalloc_cpu( &diag_vals, offdiags+1 );
    magma_index_malloc_cpu( &diag_offset, offdiags+1 );
    diag_offset[0] = 0;
    diag_offset[1] = 1;
    diag_offset[2] = n;
    diag_vals[0] = MAGMA_D_MAKE( 4.0, 0.0 );
    diag_vals[1] = MAGMA_D_MAKE( -1.0, 0.0 );
    diag_vals[2] = MAGMA_D_MAKE( -1.0, 0.0 );
    magma_dmgenerator( nn, offdiags, diag_offset, diag_vals, &A );

    // convert marix into desired format
    B.storage_type = Magma_SELLC;
    B.blocksize = 8;
    B.alignment = 8;
    // scale matrix
    magma_dmscale( &A, Magma_UNITDIAG );

    magma_d_mconvert( A, &B, Magma_CSR, B.storage_type );
    magma_d_mtransfer( B, &B_d, Magma_CPU, Magma_DEV );

    // vectors and initial guess
    magma_d_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_d_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_d_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_d_vfree(&x);
    magma_d_vinit( &x, Magma_DEV, A.num_cols, zero );

    // solver
    magma_dpcg( B_d, b, &x, &solver_par, &precond_par );

    // solverinfo
    magma_dsolverinfo( &solver_par, &precond_par );

    magma_dsolverinfo_free( &solver_par, &precond_par );

    magma_d_mfree(&B_d);
    magma_d_mfree(&B);
    magma_d_mfree(&A); 
    magma_d_vfree(&x);
    magma_d_vfree(&b);

    TESTING_FINALIZE();
    return 0;
}
Example #9
0
magma_int_t
magma_dsymbolic_ilu(
  const magma_int_t levfill,                 /* level of fill */
  const magma_int_t n,                       /* order of matrix */
  magma_int_t *nzl,                          /* input-output */
  magma_int_t *nzu,                          /* input-output */
  const mwIndex *ia,
  const mwIndex *ja,    /* input */
  mwIndex *ial,
  mwIndex *jal,              /* output lower factor structure */
  mwIndex *iau,
  mwIndex *jau)              /* output upper factor structure */
{
    magma_int_t info = 0;
    
    magma_int_t i;
    magma_index_t *lnklst=NULL;
    magma_index_t *curlev=NULL;
    magma_index_t *levels=NULL;
    magma_index_t *iwork=NULL;
    
    magma_int_t knzl = 0;
    magma_int_t knzu = 0;

    CHECK( magma_index_malloc_cpu( &lnklst, n ));
    CHECK( magma_index_malloc_cpu( &curlev, n ));
    CHECK( magma_index_malloc_cpu( &levels, *nzu ));
    CHECK( magma_index_malloc_cpu( &iwork, n ));
    
    for(magma_int_t t=0; t<n; t++){
        lnklst[t] = 0;
        curlev[t] = 0;
        iwork[t] = 0;
    }

    for(magma_int_t t=0; t<*nzu; t++){
        levels[t] = 0;
    }

    ial[0] = 0;
    iau[0] = 0;

    for (i=0; i<n; i++) {
        //printf("check line %d\n", i);
        magma_int_t first, next, j;

        /* copy column indices of row into workspace and sort them */

        magma_int_t len = ia[i+1] - ia[i];
        next = 0;
        for (j=ia[i]; j<ia[i+1]; j++)
            iwork[next++] = ja[j];
        magma_dshell_sort(len, iwork);
        //printf("check2 line %d\n", i);
        /* construct implied linked list for row */

        first = iwork[0];
        curlev[first] = 0;

        for (j=0; j<=len-2; j++)
        {
            lnklst[iwork[j]] = iwork[j+1];
            curlev[iwork[j]] = 0;
        }
        // printf("check3 line %d iwork[len-1]:%d\n", i, iwork[len-1]);
        lnklst[iwork[len-1]] = n;
        curlev[iwork[len-1]] = 0;

        /* merge with rows in U */
        // printf("check4 line %d lnklst[iwork[len-1]]:%d\n", i, lnklst[iwork[len-1]]);
        next = first;
        // printf("next:%d (!<) first:%d\n", next, i);
        while (next < i)
        {
          //  printf("check line %d while %d\n", i, next);
            magma_int_t oldlst = next;
            magma_int_t nxtlst = lnklst[next];
            magma_int_t row = next;
            magma_int_t ii;

            /* scan row */

            for (ii=iau[row]+1; ii<iau[row+1]; /*nop*/)
            {
                if (jau[ii] < nxtlst)
                {
                    /* new fill-in */
                    magma_int_t newlev = curlev[row] + levels[ii] + 1;
                    if (newlev <= levfill)
                    {
                        lnklst[oldlst]  = jau[ii];
                        lnklst[jau[ii]] = nxtlst;
                        oldlst = jau[ii];
                        curlev[jau[ii]] = newlev;
                    }
                    ii++;
                }
                else if (jau[ii] == nxtlst)
                {
                    magma_int_t newlev;
                    oldlst = nxtlst;
                    nxtlst = lnklst[oldlst];
                    newlev = curlev[row] + levels[ii] + 1;
                    curlev[jau[ii]] = min( curlev[jau[ii]], newlev );
                    ii++;
                }
                else /* (jau[ii] > nxtlst) */
                {
                    oldlst = nxtlst;
                    nxtlst = lnklst[oldlst];
                }
            }
            next = lnklst[next];
        }
        
        /* gather the pattern magma_int_to L and U */
       // printf("check line5 %d\n", i);
        next = first;
        while (next < i)
        {
            if (knzl >= *nzl) {
                printf("ILU: STORAGE parameter value %d<%d too small.\n", int(*nzl), int(knzl));
                printf("Increase STORAGE parameter.\n");
                info = -1;
                goto cleanup;
            }
            jal[knzl++] = next;
            next = lnklst[next];
        }
        ial[i+1] = knzl;
        //  printf("check line6 %d\n", i);
        if (next != i)
        {
            printf("ILU structurally singular.\n");
            /*
                assert(knzu < *nzu);
                levels[knzu] = 2*n;
                jau[knzu++] = i;
            */
        }
        //  printf("check line7 %d\n", i);
        //                 printf("next:%d  n:%d \n", next, n);
        while (next < n)
        {
            if (knzu >= *nzu) {
                printf("ILU: STORAGE parameter value %d < %d too small.\n", int(*nzu), int(knzu));
                printf("Increase STORAGE parameter.\n");
                info = -1;
                goto cleanup;
            }
            // printf("1 knzu:%d  next:%d \n", knzu, next );
            levels[knzu] = curlev[next];
            //  printf("2 knzu:%d  next:%d \n", knzu, next );
            jau[knzu++] = next;
            //  printf("3 knzu:%d  next:%d \n", knzu, next );
            next = lnklst[next];
            //  printf("4 next:%d  n:%d \n", next, n);
        }
        iau[i+1] = knzu;
    }

    *nzl = knzl;
    *nzu = knzu;

   // printf("ende\n");

#if 0
    printf( "Actual nnz for ILU: %d\n", *nzl + *nzu );
#endif

cleanup:
    magma_free_cpu(lnklst);
    magma_free_cpu(curlev);
    magma_free_cpu(levels);
    magma_free_cpu(iwork);
    
    return info;
}
Example #10
0
extern "C" magma_int_t
magma_zmslice(
    magma_int_t num_slices,
    magma_int_t slice,
    magma_z_matrix A, 
    magma_z_matrix *B,
    magma_z_matrix *ALOC,
    magma_z_matrix *ANLOC,
    magma_index_t *comm_i,
    magmaDoubleComplex *comm_v,
    magma_int_t *start,
    magma_int_t *end,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    if( A.num_rows != A.num_cols ){
        printf("%%  error: only supported for square matrices.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    
    if ( A.memory_location == Magma_CPU
            && A.storage_type == Magma_CSR ){
        CHECK( magma_zmconvert( A, B, Magma_CSR, Magma_CSR, queue ) );
        magma_free_cpu( B->col );
        magma_free_cpu( B->val );
        CHECK( magma_zmconvert( A, ALOC, Magma_CSR, Magma_CSR, queue ) );
        magma_free_cpu( ALOC->col );
        magma_free_cpu( ALOC->row );
        magma_free_cpu( ALOC->val );
        CHECK( magma_zmconvert( A, ANLOC, Magma_CSR, Magma_CSR, queue ) );
        magma_free_cpu( ANLOC->col );
        magma_free_cpu( ANLOC->row );
        magma_free_cpu( ANLOC->val );
        
        magma_int_t i,j,k, nnz, nnz_loc=0, loc_row = 0, nnz_nloc = 0;
        magma_index_t col;
        magma_int_t size = magma_ceildiv( A.num_rows, num_slices ); 
        magma_int_t lstart = slice*size;
        magma_int_t lend = min( (slice+1)*size, A.num_rows );
        // correct size for last slice
        size = lend-lstart;
        CHECK( magma_index_malloc_cpu( &ALOC->row, size+1 ) );
        CHECK( magma_index_malloc_cpu( &ANLOC->row, size+1 ) );
        
        // count elements for slice - identity for rest
        nnz = A.row[ lend ] - A.row[ lstart ] + ( A.num_rows - size );
        CHECK( magma_index_malloc_cpu( &B->col, nnz ) );
        CHECK( magma_zmalloc_cpu( &B->val, nnz ) );         
        
        // for the communication plan
        for( i=0; i<A.num_rows; i++ ) {
            comm_i[i] = 0;
            comm_v[i] = MAGMA_Z_ZERO;
        }
        
        k=0;
        B->row[i] = 0;
        ALOC->row[0] = 0;
        ANLOC->row[0] = 0;
        // identity above slice
        for( i=0; i<lstart; i++ ) {
            B->row[i+1]   = B->row[i]+1;
            B->val[k] = MAGMA_Z_ONE;
            B->col[k] = i;
            k++;
        }
        
        // slice        
        for( i=lstart; i<lend; i++ ) {
            B->row[i+1]   = B->row[i] + (A.row[i+1]-A.row[i]);
            for( j=A.row[i]; j<A.row[i+1]; j++ ){
                B->val[k] = A.val[j];
                col = A.col[j];
                B->col[k] = col;
                // communication plan
                if( col<lstart || col>=lend ){
                    comm_i[ col ] = 1;
                    comm_v[ col ] = comm_v[ col ] 
                            + MAGMA_Z_MAKE( MAGMA_Z_ABS( A.val[j] ), 0.0 );
                    nnz_nloc++;
                } else {
                    nnz_loc++;   
                }
                k++;
            }
            loc_row++;
            ALOC->row[ loc_row ] = nnz_loc;
            ANLOC->row[ loc_row ] = nnz_nloc;
        }
        CHECK( magma_index_malloc_cpu( &ALOC->col, nnz_loc ) );
        CHECK( magma_zmalloc_cpu( &ALOC->val, nnz_loc ) ); 
        ALOC->num_rows = size;
        ALOC->num_cols = size;
        ALOC->nnz = nnz_loc;
        
        CHECK( magma_index_malloc_cpu( &ANLOC->col, nnz_nloc ) );
        CHECK( magma_zmalloc_cpu( &ANLOC->val, nnz_nloc ) ); 
        ANLOC->num_rows = size;
        ANLOC->num_cols = A.num_cols;
        ANLOC->nnz = nnz_nloc;
        
        nnz_loc = 0;
        nnz_nloc = 0;
        // local/nonlocal matrix        
        for( i=lstart; i<lend; i++ ) {
            for( j=A.row[i]; j<A.row[i+1]; j++ ){
                col = A.col[j];
                // insert only in local part in ALOC, nonlocal in ANLOC
                if( col<lstart || col>=lend ){
                    ANLOC->val[ nnz_nloc ] = A.val[j];
                    ANLOC->col[ nnz_nloc ] = col;  
                    nnz_nloc++;
                } else {
                    ALOC->val[ nnz_loc ] = A.val[j];
                    ALOC->col[ nnz_loc ] = col-lstart;  
                    nnz_loc++;
                }
            }
        }
        
        // identity below slice
        for( i=lend; i<A.num_rows; i++ ) {
            B->row[i+1] = B->row[i]+1;
            B->val[k] = MAGMA_Z_ONE;
            B->col[k] = i;
            k++;
        }
        B->nnz = k;
        *start = lstart;
        *end = lend;
        
    }
    else {
        printf("error: mslice only supported for CSR matrices on the CPU: %d %d.\n", 
                int(A.memory_location), int(A.storage_type) );
        info = MAGMA_ERR_NOT_SUPPORTED;
    }
cleanup:
    return info;
}
Example #11
0
magma_int_t
magma_d_csr_mtx(
    magma_d_matrix *A,
    const char *filename,
    magma_queue_t queue )
{
    char buffer[ 1024 ];
    magma_int_t info = 0;

    int csr_compressor = 0;       // checks for zeros in original file
    
    magma_d_matrix B={Magma_CSR};

    magma_index_t *coo_col = NULL;
    magma_index_t *coo_row = NULL;
    double *coo_val = NULL;
    double *new_val = NULL;
    magma_index_t* new_row = NULL;
    magma_index_t* new_col = NULL;
    magma_int_t symmetric = 0;
    
    std::vector< std::pair< magma_index_t, double > > rowval;
    
    FILE *fid = NULL;
    MM_typecode matcode;
    fid = fopen(filename, "r");
    
    if (fid == NULL) {
        printf("%% Unable to open file %s\n", filename);
        info = MAGMA_ERR_NOT_FOUND;
        goto cleanup;
    }
    
    printf("%% Reading sparse matrix from file (%s):", filename);
    fflush(stdout);
    
    if (mm_read_banner(fid, &matcode) != 0) {
        printf("\n%% Could not process Matrix Market banner: %s.\n", matcode);
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    
    if (!mm_is_valid(matcode)) {
        printf("\n%% Invalid Matrix Market file.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    
    if ( ! ( ( mm_is_real(matcode)    ||
               mm_is_integer(matcode) ||
               mm_is_pattern(matcode) ||
               mm_is_real(matcode) ) &&
             mm_is_coordinate(matcode)  &&
             mm_is_sparse(matcode) ) )
    {
        mm_snprintf_typecode( buffer, sizeof(buffer), matcode );
        printf("\n%% Sorry, MAGMA-sparse does not support Market Market type: [%s]\n", buffer );
        printf("%% Only real-valued or pattern coordinate matrices are supported.\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }

    magma_index_t num_rows, num_cols, num_nonzeros;
    if (mm_read_mtx_crd_size(fid, &num_rows, &num_cols, &num_nonzeros) != 0) {
        info = MAGMA_ERR_UNKNOWN;
        goto cleanup;
    }
    
    A->storage_type    = Magma_CSR;
    A->memory_location = Magma_CPU;
    A->num_rows        = num_rows;
    A->num_cols        = num_cols;
    A->nnz             = num_nonzeros;
    A->fill_mode       = MagmaFull;
    
    CHECK( magma_index_malloc_cpu( &coo_col, A->nnz ) );
    CHECK( magma_index_malloc_cpu( &coo_row, A->nnz ) );
    CHECK( magma_dmalloc_cpu( &coo_val, A->nnz ) );

    if (mm_is_real(matcode) || mm_is_integer(matcode)) {
        for(magma_int_t i = 0; i < A->nnz; ++i) {
            magma_index_t ROW, COL;
            double VAL;  // always read in a double and convert later if necessary
            
            fscanf(fid, " %d %d %lf \n", &ROW, &COL, &VAL);
            if ( VAL == 0 )
                csr_compressor = 1;
            coo_row[i] = ROW - 1;
            coo_col[i] = COL - 1;
            coo_val[i] = MAGMA_D_MAKE( VAL, 0.);
        }
    } else if (mm_is_pattern(matcode) ) {
        for(magma_int_t i = 0; i < A->nnz; ++i) {
            magma_index_t ROW, COL;
            
            fscanf(fid, " %d %d \n", &ROW, &COL );
            
            coo_row[i] = ROW - 1;
            coo_col[i] = COL - 1;
            coo_val[i] = MAGMA_D_MAKE( 1.0, 0.);
        }
    } else if (mm_is_real(matcode) ){
       for(magma_int_t i = 0; i < A->nnz; ++i) {
            magma_index_t ROW, COL;
            double VAL, VALC;  // always read in a double and convert later if necessary
            
            fscanf(fid, " %d %d %lf %lf\n", &ROW, &COL, &VAL, &VALC);
            
            coo_row[i] = ROW - 1;
            coo_col[i] = COL - 1;
            coo_val[i] = MAGMA_D_MAKE( VAL, VALC);
        }
        // printf(" ...successfully read real matrix... ");
    } else {
        printf("\n%% Unrecognized data type\n");
        info = MAGMA_ERR_NOT_SUPPORTED;
        goto cleanup;
    }
    fclose(fid);
    fid = NULL;
    printf(" done. Converting to CSR:");
    fflush(stdout);
    
    A->sym = Magma_GENERAL;


    if( mm_is_symmetric(matcode) ) {
        symmetric = 1;
    }
    if ( mm_is_symmetric(matcode) || mm_is_symmetric(matcode) ) { 
                                        // duplicate off diagonal entries
        printf("\n%% Detected symmetric case.");
        A->sym = Magma_SYMMETRIC;
        magma_index_t off_diagonals = 0;
        for(magma_int_t i = 0; i < A->nnz; ++i) {
            if (coo_row[i] != coo_col[i])
                ++off_diagonals;
        }
        magma_index_t true_nonzeros = 2*off_diagonals + (A->nnz - off_diagonals);
        
        //printf("%% total number of nonzeros: %d\n%%", int(A->nnz));

        CHECK( magma_index_malloc_cpu( &new_row, true_nonzeros ));
        CHECK( magma_index_malloc_cpu( &new_col, true_nonzeros ));
        CHECK( magma_dmalloc_cpu( &new_val, true_nonzeros ));
        
        magma_index_t ptr = 0;
        for(magma_int_t i = 0; i < A->nnz; ++i) {
            if (coo_row[i] != coo_col[i]) {
                new_row[ptr] = coo_row[i];
                new_col[ptr] = coo_col[i];
                new_val[ptr] = coo_val[i];
                ptr++;
                new_col[ptr] = coo_row[i];
                new_row[ptr] = coo_col[i];
                new_val[ptr] = (symmetric == 0) ? coo_val[i] : conj(coo_val[i]);
                ptr++;
            } else {
                new_row[ptr] = coo_row[i];
                new_col[ptr] = coo_col[i];
                new_val[ptr] = coo_val[i];
                ptr++;
            }
        }
        
        magma_free_cpu(coo_row);
        magma_free_cpu(coo_col);
        magma_free_cpu(coo_val);

        coo_row = new_row;
        coo_col = new_col;
        coo_val = new_val;
        A->nnz = true_nonzeros;
        //printf("total number of nonzeros: %d\n", A->nnz);
    } // end symmetric case
    
    CHECK( magma_dmalloc_cpu( &A->val, A->nnz ));
    CHECK( magma_index_malloc_cpu( &A->col, A->nnz ));
    CHECK( magma_index_malloc_cpu( &A->row, A->num_rows+1 ));
    
    // original code from Nathan Bell and Michael Garland
    for (magma_index_t i = 0; i < num_rows; i++)
        (A->row)[i] = 0;
    
    for (magma_index_t i = 0; i < A->nnz; i++)
        (A->row)[coo_row[i]]++;
        
    // cumulative sum the nnz per row to get row[]
    magma_int_t cumsum;
    cumsum = 0;
    for(magma_int_t i = 0; i < num_rows; i++) {
        magma_index_t temp = (A->row)[i];
        (A->row)[i] = cumsum;
        cumsum += temp;
    }
    (A->row)[num_rows] = A->nnz;
    
    // write Aj,Ax into Bj,Bx
    for(magma_int_t i = 0; i < A->nnz; i++) {
        magma_index_t row_ = coo_row[i];
        magma_index_t dest = (A->row)[row_];
        (A->col)[dest] = coo_col[i];
        (A->val)[dest] = coo_val[i];
        (A->row)[row_]++;
    }    
    magma_free_cpu(coo_row);
    magma_free_cpu(coo_col);
    magma_free_cpu(coo_val);
    coo_row = NULL;
    coo_col = NULL;
    coo_val = NULL;

    int last;
    last = 0;
    for(int i = 0; i <= num_rows; i++) {
        int temp    = (A->row)[i];
        (A->row)[i] = last;
        last        = temp;
    }
    (A->row)[A->num_rows] = A->nnz;
    
    // sort column indices within each row
    // copy into vector of pairs (column index, value), sort by column index, then copy back
    for (magma_index_t k=0; k < A->num_rows; ++k) {
        int kk  = (A->row)[k];
        int len = (A->row)[k+1] - (A->row)[k];
        rowval.resize( len );
        for( int i=0; i < len; ++i ) {
            rowval[i] = std::make_pair( (A->col)[kk+i], (A->val)[kk+i] );
        }
        std::sort( rowval.begin(), rowval.end(), compare_first );
        for( int i=0; i < len; ++i ) {
            (A->col)[kk+i] = rowval[i].first;
            (A->val)[kk+i] = rowval[i].second;
        }
    }

    if ( csr_compressor > 0) { // run the CSR compressor to remove zeros
        //printf("removing zeros: ");
        CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue ));
        CHECK( magma_d_csr_compressor(
            &(A->val), &(A->row), &(A->col),
            &B.val, &B.row, &B.col, &B.num_rows, queue ));
        B.nnz = B.row[num_rows];
        //printf(" remaining nonzeros:%d ", B.nnz);
        magma_free_cpu( A->val );
        magma_free_cpu( A->row );
        magma_free_cpu( A->col );
        CHECK( magma_dmtransfer( B, A, Magma_CPU, Magma_CPU, queue ));
        //printf("done.\n");
    }
    A->true_nnz = A->nnz;
    printf(" done.\n");
cleanup:
    if ( fid != NULL ) {
        fclose( fid );
        fid = NULL;
    }
    magma_dmfree( &B, queue );
    magma_free_cpu(coo_row);
    magma_free_cpu(coo_col);
    magma_free_cpu(coo_val);
    return info;
}
Example #12
0
extern "C" magma_int_t
magma_ccsrsplit(
    magma_int_t bsize,
    magma_c_sparse_matrix A,
    magma_c_sparse_matrix *D,
    magma_c_sparse_matrix *R,
    magma_queue_t queue )
{
    if (  A.memory_location == Magma_CPU &&
            (   A.storage_type == Magma_CSR ||
                A.storage_type == Magma_CSRCOO ) ) {

        magma_int_t i, k, j, nnz_diag, nnz_offd;
        
        magma_int_t stat_cpu = 0, stat_dev = 0;
        D->val = NULL;
        D->col = NULL;
        D->row = NULL;
        D->rowidx = NULL;
        D->blockinfo = NULL;
        D->diag = NULL;
        D->dval = NULL;
        D->dcol = NULL;
        D->drow = NULL;
        D->drowidx = NULL;
        D->ddiag = NULL;
        R->val = NULL;
        R->col = NULL;
        R->row = NULL;
        R->rowidx = NULL;
        R->blockinfo = NULL;
        R->diag = NULL;
        R->dval = NULL;
        R->dcol = NULL;
        R->drow = NULL;
        R->drowidx = NULL;
        R->ddiag = NULL;

        nnz_diag = nnz_offd = 0;
        // Count the new number of nonzeroes in the two matrices
        for( i=0; i<A.num_rows; i+=bsize )
            for( k=i; k<min(A.num_rows,i+bsize); k++ )
                for( j=A.row[k]; j<A.row[k+1]; j++ )
                if ( A.col[j] < i )
                    nnz_offd++;
                else if ( A.col[j] < i+bsize )
                    nnz_diag++;
                else
                    nnz_offd++;

        // Allocate memory for the new matrices
        D->storage_type = Magma_CSRD;
        D->memory_location = A.memory_location;
        D->num_rows = A.num_rows;
        D->num_cols = A.num_cols;
        D->nnz = nnz_diag;

        R->storage_type = Magma_CSR;
        R->memory_location = A.memory_location;
        R->num_rows = A.num_rows;
        R->num_cols = A.num_cols;
        R->nnz = nnz_offd;

        stat_cpu += magma_cmalloc_cpu( &D->val, nnz_diag );
        stat_cpu += magma_index_malloc_cpu( &D->row, A.num_rows+1 );
        stat_cpu += magma_index_malloc_cpu( &D->col, nnz_diag );

        stat_cpu += magma_cmalloc_cpu( &R->val, nnz_offd );
        stat_cpu += magma_index_malloc_cpu( &R->row, A.num_rows+1 );
        stat_cpu += magma_index_malloc_cpu( &R->col, nnz_offd );
        
        if( stat_cpu != 0 ){
            magma_c_mfree( D, queue );
            magma_c_mfree( R, queue );
            return MAGMA_ERR_HOST_ALLOC;
        }
        

        // Fill up the new sparse matrices  
        D->row[0] = 0;
        R->row[0] = 0;

        nnz_offd = nnz_diag = 0;
        for( i=0; i<A.num_rows; i+=bsize) {
            for( k=i; k<min(A.num_rows,i+bsize); k++ ) {
                D->row[k+1] = D->row[k];
                R->row[k+1] = R->row[k];
     
                for( j=A.row[k]; j<A.row[k+1]; j++ ) {
                    if ( A.col[j] < i ) {
                        R->val[nnz_offd] = A.val[j];
                        R->col[nnz_offd] = A.col[j];
                        R->row[k+1]++;  
                        nnz_offd++;
                    }
                    else if ( A.col[j] < i+bsize ) {
                        // larger than diagonal remain as before
                        if ( A.col[j]>k ) {
                            D->val[nnz_diag] = A.val[ j ];
                            D->col[nnz_diag] = A.col[ j ];
                            D->row[k+1]++;
                        }
                        // diagonal is written first
                        else if ( A.col[j]==k ) {
                            D->val[D->row[k]] = A.val[ j ];
                            D->col[D->row[k]] = A.col[ j ];
                            D->row[k+1]++;
                        }
                        // smaller than diagonal are shifted one to the right 
                        // to have room for the diagonal
                        else {
                            D->val[nnz_diag+1] = A.val[ j ];
                            D->col[nnz_diag+1] = A.col[ j ];
                            D->row[k+1]++;
                        }
                        nnz_diag++;
                    }
                    else {
                        R->val[nnz_offd] = A.val[j];
                        R->col[nnz_offd] = A.col[j];
                        R->row[k+1]++;  
                        nnz_offd++;
                    }
                }
            }
        }
        return MAGMA_SUCCESS; 
    }
    else {
        magma_c_sparse_matrix Ah, ACSR, DCSR, RCSR, Dh, Rh;
        magma_c_mtransfer( A, &Ah, A.memory_location, Magma_CPU, queue );
        magma_c_mconvert( Ah, &ACSR, A.storage_type, Magma_CSR, queue );

        magma_ccsrsplit( bsize, ACSR, &DCSR, &RCSR, queue );

        magma_c_mconvert( DCSR, &Dh, Magma_CSR, A.storage_type, queue );
        magma_c_mconvert( RCSR, &Rh, Magma_CSR, A.storage_type, queue );

        magma_c_mtransfer( Dh, D, Magma_CPU, A.memory_location, queue );
        magma_c_mtransfer( Rh, R, Magma_CPU, A.memory_location, queue );

        magma_c_mfree( &Ah, queue );
        magma_c_mfree( &ACSR, queue );
        magma_c_mfree( &Dh, queue );
        magma_c_mfree( &DCSR, queue );
        magma_c_mfree( &Rh, queue );
        magma_c_mfree( &RCSR, queue );

        return MAGMA_SUCCESS; 
    }
}
Example #13
0
extern "C" magma_int_t
magma_dmtransfer(
    magma_d_matrix A,
    magma_d_matrix *B,
    magma_location_t src,
    magma_location_t dst,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    B->val = NULL;
    B->diag = NULL;
    B->row = NULL;
    B->rowidx = NULL;
    B->col = NULL;
    B->blockinfo = NULL;
    B->dval = NULL;
    B->ddiag = NULL;
    B->drow = NULL;
    B->drowidx = NULL;
    B->dcol = NULL;
    B->diag = NULL;
    B->ddiag = NULL;
    B->list = NULL;
    B->dlist = NULL;
    

    // first case: copy matrix from host to device
    if ( src == Magma_CPU && dst == Magma_DEV ) {
        //CSR-type
        if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR
                                        || A.storage_type == Magma_CSC
                                        || A.storage_type == Magma_CSRD
                                        || A.storage_type == Magma_CSRL
                                        || A.storage_type == Magma_CSRU ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.nnz ));
            CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 ));
            CHECK( magma_index_malloc( &B->dcol, A.nnz ));
            // data transfer
            magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue );
            magma_index_setvector( A.num_rows + 1, A.row, 1, B->drow, 1, queue );
            magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue );
        }
        //COO-type
        else if ( A.storage_type == Magma_COO ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.nnz ));
            CHECK( magma_index_malloc( &B->dcol, A.nnz ));
            CHECK( magma_index_malloc( &B->drowidx, A.nnz ));
            // data transfer
            magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue );
            magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue );
            magma_index_setvector( A.nnz, A.rowidx, 1, B->drowidx, 1, queue );
        }
        //CSRCOO-type
        else if ( A.storage_type == Magma_CSRCOO ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.nnz ));
            CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 ));
            CHECK( magma_index_malloc( &B->dcol, A.nnz ));
            CHECK( magma_index_malloc( &B->drowidx, A.nnz ));
            // data transfer
            magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue );
            magma_index_setvector( A.num_rows + 1, A.row, 1, B->drow, 1, queue );
            magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue );
            magma_index_setvector( A.nnz, A.rowidx, 1, B->drowidx, 1, queue );
        }
        //ELL/ELLPACKT-type
        else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row ));
            CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row ));
            // data transfer
            magma_dsetvector( A.num_rows * A.max_nnz_row, A.val, 1, B->dval, 1, queue );
            magma_index_setvector( A.num_rows * A.max_nnz_row, A.col, 1, B->dcol, 1, queue );
        }
        //ELLD-type
        else if ( A.storage_type == Magma_ELLD ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row ));
            CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row ));
            // data transfer
            magma_dsetvector( A.num_rows * A.max_nnz_row, A.val, 1, B->dval, 1, queue );
            magma_index_setvector( A.num_rows * A.max_nnz_row, A.col, 1, B->dcol, 1, queue );
        }
        //ELLRT-type
        else if ( A.storage_type == Magma_ELLRT ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->alignment = A.alignment;
            magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment );
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.num_rows * rowlength ));
            CHECK( magma_index_malloc( &B->dcol, A.num_rows * rowlength ));
            CHECK( magma_index_malloc( &B->drow, A.num_rows ));
            // data transfer
            magma_dsetvector( A.num_rows * rowlength, A.val, 1, B->dval, 1, queue );
            magma_index_setvector( A.num_rows * rowlength, A.col, 1, B->dcol, 1, queue );
            magma_index_setvector( A.num_rows, A.row, 1, B->drow, 1, queue );
        }
        //SELLP-type
        else if ( A.storage_type == Magma_SELLP ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->numblocks = A.numblocks;
            B->alignment = A.alignment;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.nnz ));
            CHECK( magma_index_malloc( &B->dcol, A.nnz ));
            CHECK( magma_index_malloc( &B->drow, A.numblocks + 1 ));
            // data transfer
            magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue );
            magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue );
            magma_index_setvector( A.numblocks + 1, A.row, 1, B->drow, 1, queue );
        }
        //BCSR-type
        else if ( A.storage_type == Magma_BCSR ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->numblocks = A.numblocks;
            B->alignment = A.alignment;
            magma_int_t size_b = A.blocksize;
            //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b );
                    // max number of blocks per row
            //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b );
            magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b );
                    // max number of blocks per column
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, size_b * size_b * A.numblocks ));
            CHECK( magma_index_malloc( &B->drow, r_blocks + 1 ));
            CHECK( magma_index_malloc( &B->dcol, A.numblocks ));
            // data transfer
            magma_dsetvector( size_b * size_b * A.numblocks, A.val, 1, B->dval, 1, queue );
            magma_index_setvector( r_blocks + 1, A.row, 1, B->drow, 1, queue );
            magma_index_setvector( A.numblocks, A.col, 1, B->dcol, 1, queue );
        }
        //DENSE-type
        else if ( A.storage_type == Magma_DENSE ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->major = A.major;
            B->ld = A.ld;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.num_rows * A.num_cols ));
            // data transfer
            magma_dsetvector( A.num_rows * A.num_cols, A.val, 1, B->dval, 1, queue );
        }
    }

    // second case: copy matrix from host to host
    else if ( src == Magma_CPU && dst == Magma_CPU ) {
        //CSR-type
        if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR
                                        || A.storage_type == Magma_CSC
                                        || A.storage_type == Magma_CSRD
                                        || A.storage_type == Magma_CSRL
                                        || A.storage_type == Magma_CSRU ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 ));
            CHECK( magma_index_malloc_cpu( &B->col, A.nnz ));
            // data transfer
            for( magma_int_t i=0; i<A.nnz; i++ ) {
                B->val[i] = A.val[i];
                B->col[i] = A.col[i];
            }
            for( magma_int_t i=0; i<A.num_rows+1; i++ ) {
                B->row[i] = A.row[i];
            }
        }
        //COO-type
        else if ( A.storage_type == Magma_COO ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->col, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz ));
            // data transfer
            for( magma_int_t i=0; i<A.nnz; i++ ) {
                B->val[i] = A.val[i];
                B->col[i] = A.col[i];
                B->rowidx[i] = A.rowidx[i];
            }
        }
        //CSRCOO-type
        else if ( A.storage_type == Magma_CSRCOO ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 ));
            CHECK( magma_index_malloc_cpu( &B->col, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz ));
            // data transfer
            for( magma_int_t i=0; i<A.nnz; i++ ) {
                B->val[i] = A.val[i];
                B->col[i] = A.col[i];
                B->rowidx[i] = A.rowidx[i];
            }
            for( magma_int_t i=0; i<A.num_rows+1; i++ ) {
                B->row[i] = A.row[i];
            }
        }
        //ELL/ELLPACKT-type
        else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row ));
            CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row ));
            // data transfer
            for( magma_int_t i=0; i<A.num_rows*A.max_nnz_row; i++ ) {
                B->val[i] = A.val[i];
                B->col[i] = A.col[i];
            }
        }
        //ELLD-type
        else if ( A.storage_type == Magma_ELLD ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row ));
            CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row ));
            // data transfer
            for( magma_int_t i=0; i<A.num_rows*A.max_nnz_row; i++ ) {
                B->val[i] = A.val[i];
                B->col[i] = A.col[i];
            }
        }
        //ELLRT-type
        else if ( A.storage_type == Magma_ELLRT ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->alignment = A.alignment;
            //int threads_per_row = A.alignment;
            //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row );
            magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment );
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, rowlength * A.num_rows ));
            CHECK( magma_index_malloc_cpu( &B->row, A.num_rows ));
            CHECK( magma_index_malloc_cpu( &B->col, rowlength * A.num_rows ));
            // data transfer
            for( magma_int_t i=0; i<A.num_rows*rowlength; i++ ) {
                B->val[i] = A.val[i];
                B->col[i] = A.col[i];
            }
            for( magma_int_t i=0; i<A.num_rows; i++ ) {
                B->row[i] = A.row[i];
            }
        }
        //SELLP-type
        else if (  A.storage_type == Magma_SELLP ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->alignment = A.alignment;
            B->numblocks = A.numblocks;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->col, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->row, A.numblocks + 1 ));
            // data transfer
            for( magma_int_t i=0; i<A.nnz; i++ ) {
                B->val[i] = A.val[i];
                B->col[i] = A.col[i];
            }
            for( magma_int_t i=0; i<A.numblocks+1; i++ ) {
                B->row[i] = A.row[i];
            }
        }
        //BCSR-type
        else if ( A.storage_type == Magma_BCSR ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->numblocks = A.numblocks;
            B->alignment = A.alignment;
            magma_int_t size_b = A.blocksize;
            //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b );
                    // max number of blocks per row
            //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b );
            magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b );
                    // max number of blocks per column
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, size_b * size_b * A.numblocks ));
            CHECK( magma_index_malloc_cpu( &B->row, r_blocks + 1 ));
            CHECK( magma_index_malloc_cpu( &B->col, A.numblocks ));
            // data transfer
            //magma_dsetvector( size_b * size_b * A.numblocks, A.val, 1, B->dval, 1, queue );
            for( magma_int_t i=0; i<size_b*size_b*A.numblocks; i++ ) {
                B->dval[i] = A.val[i];
            }
            //magma_index_setvector( r_blocks + 1, A.row, 1, B->drow, 1, queue );
            for( magma_int_t i=0; i<r_blocks+1; i++ ) {
                B->drow[i] = A.row[i];
            }
            //magma_index_setvector( A.numblocks, A.col, 1, B->dcol, 1, queue );
            for( magma_int_t i=0; i<A.numblocks; i++ ) {
                B->dcol[i] = A.col[i];
            }
        }
        //DENSE-type
        else if ( A.storage_type == Magma_DENSE ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->major = A.major;
            B->ld = A.ld;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.num_cols ));
            // data transfer
            for( magma_int_t i=0; i<A.num_rows*A.num_cols; i++ ) {
                B->val[i] = A.val[i];
            }
        }
    }

    // third case: copy matrix from device to host
    else if ( src == Magma_DEV && dst == Magma_CPU ) {
        //CSR-type
        if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR
                                        || A.storage_type == Magma_CSC
                                        || A.storage_type == Magma_CSRD
                                        || A.storage_type == Magma_CSRL
                                        || A.storage_type == Magma_CSRU ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 ));
            CHECK( magma_index_malloc_cpu( &B->col, A.nnz ));
            // data transfer
            magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue );
            magma_index_getvector( A.num_rows + 1, A.drow, 1, B->row, 1, queue );
            magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue );
        }
        //COO-type
        else if ( A.storage_type == Magma_COO ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->col, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz ));
            // data transfer
            magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue );
            magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue );
            magma_index_getvector( A.nnz, A.drowidx, 1, B->rowidx, 1, queue );
        }
        //CSRCOO-type
        else if ( A.storage_type == Magma_CSRCOO ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 ));
            CHECK( magma_index_malloc_cpu( &B->col, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz ));
            // data transfer
            magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue );
            magma_index_getvector( A.num_rows + 1, A.drow, 1, B->row, 1, queue );
            magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue );
            magma_index_getvector( A.nnz, A.drowidx, 1, B->rowidx, 1, queue );
        }
        //ELL/ELLPACKT-type
        else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row ));
            CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row ));
            // data transfer
            magma_dgetvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->val, 1, queue );
            magma_index_getvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->col, 1, queue );
        }
        //ELLD-type
        else if (  A.storage_type == Magma_ELLD ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row ));
            CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row ));
            // data transfer
            magma_dgetvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->val, 1, queue );
            magma_index_getvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->col, 1, queue );
        }
        //ELLRT-type
        else if ( A.storage_type == Magma_ELLRT ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->alignment = A.alignment;
            //int threads_per_row = A.alignment;
            //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row );
            magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment );
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, rowlength * A.num_rows ));
            CHECK( magma_index_malloc_cpu( &B->row, A.num_rows ));
            CHECK( magma_index_malloc_cpu( &B->col, rowlength * A.num_rows ));
            // data transfer
            magma_dgetvector( A.num_rows * rowlength, A.dval, 1, B->val, 1, queue );
            magma_index_getvector( A.num_rows * rowlength, A.dcol, 1, B->col, 1, queue );
            magma_index_getvector( A.num_rows, A.drow, 1, B->row, 1, queue );
        }
        //SELLP-type
        else if ( A.storage_type == Magma_SELLP ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->numblocks = A.numblocks;
            B->alignment = A.alignment;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->col, A.nnz ));
            CHECK( magma_index_malloc_cpu( &B->row, A.numblocks + 1 ));
            // data transfer
            magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue );
            magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue );
            magma_index_getvector( A.numblocks + 1, A.drow, 1, B->row, 1, queue );
        }
        //BCSR-type
        else if ( A.storage_type == Magma_BCSR ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->numblocks = A.numblocks;
            B->alignment = A.alignment;
            magma_int_t size_b = A.blocksize;
            //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b );
                    // max number of blocks per row
            //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b );
            magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b );
                    // max number of blocks per column
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, size_b * size_b * A.numblocks ));
            CHECK( magma_index_malloc_cpu( &B->row, r_blocks + 1 ));
            CHECK( magma_index_malloc_cpu( &B->col, A.numblocks ));
            // data transfer
            magma_dgetvector( size_b * size_b * A.numblocks, A.dval, 1, B->val, 1, queue );
            magma_index_getvector( r_blocks + 1, A.drow, 1, B->row, 1, queue );
            magma_index_getvector( A.numblocks, A.dcol, 1, B->col, 1, queue );
        }
        //DENSE-type
        else if ( A.storage_type == Magma_DENSE ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_CPU;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->major = A.major;
            B->ld = A.ld;
            // memory allocation
            CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.num_cols ));
            // data transfer
            magma_dgetvector( A.num_rows * A.num_cols, A.dval, 1, B->val, 1, queue );
        }
    }

    // fourth case: copy matrix from device to device
    else if ( src == Magma_DEV && dst == Magma_DEV ) {
        //CSR-type
        if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR
                                        || A.storage_type == Magma_CSC
                                        || A.storage_type == Magma_CSRD
                                        || A.storage_type == Magma_CSRL
                                        || A.storage_type == Magma_CSRU ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.nnz ));
            CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 ));
            CHECK( magma_index_malloc( &B->dcol, A.nnz ));
            // data transfer
            magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue );
            magma_index_copyvector( A.num_rows + 1, A.drow, 1, B->drow, 1, queue );
            magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue );
        }
        //COO-type
        else if ( A.storage_type == Magma_COO ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.nnz ));
            CHECK( magma_index_malloc( &B->dcol, A.nnz ));
            CHECK( magma_index_malloc( &B->drowidx, A.nnz ));
            // data transfer
            magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue );
            magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue );
            magma_index_copyvector( A.nnz, A.drowidx, 1, B->drowidx, 1, queue );
        }
        //CSRCOO-type
        else if ( A.storage_type == Magma_CSRCOO ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.nnz ));
            CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 ));
            CHECK( magma_index_malloc( &B->dcol, A.nnz ));
            CHECK( magma_index_malloc( &B->drowidx, A.nnz ));
            // data transfer
            magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue );
            magma_index_copyvector( A.num_rows + 1, A.drow, 1, B->drow, 1, queue );
            magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue );
            magma_index_copyvector( A.nnz, A.drowidx, 1, B->drowidx, 1, queue );
        }
        //ELL/ELLPACKT-type
        else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row ));
            CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row ));
            // data transfer
            magma_dcopyvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->dval, 1, queue );
            magma_index_copyvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->dcol, 1, queue );
        }
        //ELLD-type
        else if ( A.storage_type == Magma_ELLD ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row ));
            CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row ));
            // data transfer
            magma_dcopyvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->dval, 1, queue );
            magma_index_copyvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->dcol, 1, queue );
        }
        //ELLRT-type
        else if ( A.storage_type == Magma_ELLRT ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->alignment = A.alignment;
            //int threads_per_row = A.alignment;
            //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row );
            magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment );
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.num_rows * rowlength ));
            CHECK( magma_index_malloc( &B->dcol, A.num_rows * rowlength ));
            CHECK( magma_index_malloc( &B->drow, A.num_rows ));
            // data transfer
            magma_dcopyvector( A.num_rows * rowlength, A.dval, 1, B->dval, 1, queue );
            magma_index_copyvector( A.num_rows * rowlength, A.dcol, 1, B->dcol, 1, queue );
            magma_index_copyvector( A.num_rows, A.drow, 1, B->drow, 1, queue );
        }
        //SELLP-type
        else if ( A.storage_type == Magma_SELLP ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->numblocks = A.numblocks;
            B->alignment = A.alignment;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.nnz ));
            CHECK( magma_index_malloc( &B->dcol, A.nnz ));
            CHECK( magma_index_malloc( &B->drow, A.numblocks + 1 ));
            // data transfer
            magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue );
            magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue );
            magma_index_copyvector( A.numblocks + 1, A.drow, 1, B->drow, 1, queue );
        }
        //BCSR-type
        else if ( A.storage_type == Magma_BCSR ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->blocksize = A.blocksize;
            B->numblocks = A.numblocks;
            B->alignment = A.alignment;
            magma_int_t size_b = A.blocksize;
            //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b );
                    // max number of blocks per row
            //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b );
            magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b );
                    // max number of blocks per column
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, size_b * size_b * A.numblocks ));
            CHECK( magma_index_malloc( &B->drow, r_blocks + 1 ));
            CHECK( magma_index_malloc( &B->dcol, A.numblocks ));
            // data transfer
            magma_dcopyvector( size_b * size_b * A.numblocks, A.dval, 1, B->dval, 1, queue );
            magma_index_copyvector( r_blocks + 1, A.drow, 1, B->drow, 1, queue );
            magma_index_copyvector( A.numblocks, A.dcol, 1, B->dcol, 1, queue );
        }
        //DENSE-type
        else if ( A.storage_type == Magma_DENSE ) {
            // fill in information for B
            B->storage_type = A.storage_type;
            B->memory_location = Magma_DEV;
            B->sym = A.sym;
            B->diagorder_type = A.diagorder_type;
            B->fill_mode = A.fill_mode;
            B->num_rows = A.num_rows;
            B->num_cols = A.num_cols;
            B->nnz = A.nnz; B->true_nnz = A.true_nnz;
            B->max_nnz_row = A.max_nnz_row;
            B->diameter = A.diameter;
            B->major = A.major;
            B->ld = A.ld;
            // memory allocation
            CHECK( magma_dmalloc( &B->dval, A.num_rows * A.num_cols ));
            // data transfer
            magma_dcopyvector( A.num_rows * A.num_cols, A.dval, 1, B->dval, 1, queue );
        }
    }
    
    
cleanup:
    if( info != 0 ){
        magma_dmfree( B, queue );
    }
    return info;
}
Example #14
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Debugging file
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    magma_z_solver_par solver_par;
    magma_z_preconditioner precond_par;
    solver_par.epsilon = 10e-16;
    solver_par.maxiter = 1000;
    solver_par.verbose = 0;
    solver_par.restart = 30;
    solver_par.num_eigenvalues = 0;
    solver_par.ortho = Magma_CGS;
    
    magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0);
    magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0);

    magma_z_sparse_matrix A, B, B_d;
    magma_z_vector x, b;

    // generate matrix of desired structure and size
    magma_int_t n=100;   // size is n*n
    magma_int_t nn = n*n;
    magma_int_t offdiags = 2;
    magma_index_t *diag_offset;
    magmaDoubleComplex *diag_vals;
    magma_zmalloc_cpu( &diag_vals, offdiags+1 );
    magma_index_malloc_cpu( &diag_offset, offdiags+1 );
    diag_offset[0] = 0;
    diag_offset[1] = 1;
    diag_offset[2] = n;
    diag_vals[0] = MAGMA_Z_MAKE( 4.1, 0.0 );
    diag_vals[1] = MAGMA_Z_MAKE( -1.0, 0.0 );
    diag_vals[2] = MAGMA_Z_MAKE( -1.0, 0.0 );
    magma_zmgenerator( nn, offdiags, diag_offset, diag_vals, &A );

    // convert marix into desired format
    B.storage_type = Magma_SELLC;
    B.blocksize = 8;
    B.alignment = 8;
    // scale matrix
    magma_zmscale( &A, Magma_UNITDIAG );

    magma_z_mconvert( A, &B, Magma_CSR, B.storage_type );
    magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV );


    // test CG ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zcg_res( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PCG Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PCG IC ################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ICC;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);


    // test PCG IC ################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ICC;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpcg( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test BICGSTAB ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zbicgstab( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PBICGSTAB Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);
/*
    // test PBICGSTAB ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PBICGSTAB ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);printf("here\n");
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test GMRES ####################################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // solver
    magma_zgmres( B_d, b, &x, &solver_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);

    // test PGMRES Jacobi ############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_JACOBI;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpgmres( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);*/

    // test PGMRES ILU ###############################
    // vectors and initial guess
    magma_z_vinit( &b, Magma_DEV, A.num_cols, one );
    magma_z_vinit( &x, Magma_DEV, A.num_cols, one );
    magma_z_spmv( one, B_d, x, zero, b );                 //  b = A x
    magma_z_vfree(&x);
    magma_z_vinit( &x, Magma_DEV, A.num_cols, zero );
    magma_zsolverinfo_init( &solver_par, &precond_par );
    // Preconditioner
    precond_par.solver = Magma_ILU;
    magma_z_precondsetup( B_d, b, &precond_par );
    // solver
    magma_zpgmres( B_d, b, &x, &solver_par, &precond_par );
    // solverinfo
    magma_zsolverinfo( &solver_par, &precond_par );
    if( solver_par.numiter > 150 ){
        printf("error: test not passed!\n"); exit(-1);
    }
    magma_zsolverinfo_free( &solver_par, &precond_par );
    magma_z_vfree(&x);
    magma_z_vfree(&b);


    printf("all tests passed.\n");




    magma_z_mfree(&B_d);
    magma_z_mfree(&B);
    magma_z_mfree(&A); 


    TESTING_FINALIZE();
    return 0;
}
extern "C" magma_int_t 
c_transpose_csr(    
    magma_int_t n_rows, 
    magma_int_t n_cols, 
    magma_int_t nnz, 
    magmaFloatComplex *val, 
    magma_index_t *row, 
    magma_index_t *col, 
    magma_int_t *new_n_rows, 
    magma_int_t *new_n_cols, 
    magma_int_t *new_nnz, 
    magmaFloatComplex **new_val, 
    magma_index_t **new_row, 
    magma_index_t **new_col,
    magma_queue_t queue )
{



  nnz = row[n_rows];
  *new_n_rows = n_cols;
  *new_n_cols = n_rows;
  *new_nnz = nnz;

  magmaFloatComplex ** valtemp;
  magma_index_t ** coltemp;
  valtemp = (magmaFloatComplex**)malloc((n_rows)*sizeof(magmaFloatComplex*));
  coltemp =(magma_index_t**)malloc((n_rows)*sizeof(magma_index_t*));

  // temporary 2-dimensional arrays valtemp/coltemp 
  // where val[i] is the array with the values of the i-th column of the matrix
  magma_index_t *nnztemp;
  magma_index_malloc_cpu( &nnztemp, n_rows );
  
  for( magma_int_t i=0; i<n_rows; i++ )
    nnztemp[i]=0;
  for( magma_int_t i=0; i<nnz; i++ )
    nnztemp[col[i]]++;    

  for( magma_int_t i=0; i<n_rows; i++ ){
    valtemp[i] = 
        (magmaFloatComplex*)malloc((nnztemp[i])*sizeof(magmaFloatComplex));
    coltemp[i] = (magma_index_t*)malloc(nnztemp[i]*sizeof(magma_index_t));
  }

  for( magma_int_t i=0; i<n_rows; i++ )
    nnztemp[i]=0;

  for( magma_int_t j=0; j<n_rows; j++ ){
    for( magma_int_t i=row[j]; i<row[j+1]; i++ ){
      valtemp[col[i]][nnztemp[col[i]]]=val[i];
      coltemp[col[i]][nnztemp[col[i]]]=j;
      nnztemp[col[i]]++;    
    }
  }

  //csr structure for transposed matrix
  *new_val = new magmaFloatComplex[nnz];
  *new_row = new magma_index_t[n_rows+1];
  *new_col = new magma_index_t[nnz];

  //fill the transposed csr structure
  magma_int_t nnztmp=0;
  (*new_row)[0]=0;
  for( magma_int_t j=0; j<n_rows; j++ ){
    for( magma_int_t i=0; i<nnztemp[j]; i++ ){
      (*new_val)[nnztmp]=valtemp[j][i];
      (*new_col)[nnztmp]=coltemp[j][i];
      nnztmp++;
    }
    (*new_row)[j+1]=nnztmp;
  }

//usually the temporary memory should be freed afterwards
//however, it does not work
/*
  for( magma_int_t j=0; j<n_rows; j++ ){
    free(valtemp[j]);
    free(coltemp[j]);
  }
  free(valtemp);free(coltemp);
      printf("check9\n");
    fflush(stdout);
*/

    magma_free_cpu( nnztemp );
    
    return MAGMA_SUCCESS;
}
Example #16
0
magma_int_t
magma_dsymbilu(
    magma_d_matrix *A,
    magma_int_t levels,
    magma_d_matrix *L,
    magma_d_matrix *U,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_d_matrix A_copy={Magma_CSR}, B={Magma_CSR};
    magma_d_matrix hA={Magma_CSR}, CSRCOOA={Magma_CSR};
    
    if( A->memory_location == Magma_CPU && A->storage_type == Magma_CSR ){
        CHECK( magma_dmtransfer( *A, &A_copy, Magma_CPU, Magma_CPU, queue ));
        CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue ));

        // possibility to scale to unit diagonal
        //magma_dmscale( &B, Magma_UNITDIAG );

        CHECK( magma_dmconvert( B, L, Magma_CSR, Magma_CSR , queue));
        CHECK( magma_dmconvert( B, U, Magma_CSR, Magma_CSR, queue ));

        magma_int_t num_lnnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz;
        magma_int_t num_unnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz;

        magma_free_cpu( L->col );
        magma_free_cpu( U->col );
        CHECK( magma_index_malloc_cpu( &L->col, num_lnnz ));
        CHECK( magma_index_malloc_cpu( &U->col, num_unnz ));

        magma_dsymbolic_ilu( levels, A->num_rows, &num_lnnz, &num_unnz, B.row, B.col,
                                            L->row, L->col, U->row, U->col );
        L->nnz = num_lnnz;
        U->nnz = num_unnz;
        magma_free_cpu( L->val );
        magma_free_cpu( U->val );
        CHECK( magma_dmalloc_cpu( &L->val, L->nnz ));
        CHECK( magma_dmalloc_cpu( &U->val, U->nnz ));
        for( magma_int_t i=0; i<L->nnz; i++ )
            L->val[i] = MAGMA_D_MAKE( 0.0, 0.0 );

        for( magma_int_t i=0; i<U->nnz; i++ )
            U->val[i] = MAGMA_D_MAKE( 0.0, 0.0 );
        // take the original values (scaled) as initial guess for L
        for(magma_int_t i=0; i<L->num_rows; i++){
            for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){
                magma_index_t lcol = B.col[j];
                for(magma_int_t k=L->row[i]; k<L->row[i+1]; k++){
                    if( L->col[k] == lcol ){
                        L->val[k] =  B.val[j];
                    }
                }
            }
        }

        // take the original values (scaled) as initial guess for U
        for(magma_int_t i=0; i<U->num_rows; i++){
            for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){
                magma_index_t lcol = B.col[j];
                for(magma_int_t k=U->row[i]; k<U->row[i+1]; k++){
                    if( U->col[k] == lcol ){
                        U->val[k] =  B.val[j];
                    }
                }
            }
        }
        magma_dmfree( &B, queue );
        // fill A with the new structure;
        magma_free_cpu( A->col );
        magma_free_cpu( A->val );
        CHECK( magma_index_malloc_cpu( &A->col, L->nnz+U->nnz ));
        CHECK( magma_dmalloc_cpu( &A->val, L->nnz+U->nnz ));
        A->nnz = L->nnz+U->nnz;
        
        magma_int_t z = 0;
        for(magma_int_t i=0; i<A->num_rows; i++){
            A->row[i] = z;
            for(magma_int_t j=L->row[i]; j<L->row[i+1]; j++){
                A->col[z] = L->col[j];
                A->val[z] = L->val[j];
                z++;
            }
            for(magma_int_t j=U->row[i]; j<U->row[i+1]; j++){
                A->col[z] = U->col[j];
                A->val[z] = U->val[j];
                z++;
            }
        }
        A->row[A->num_rows] = z;
        // reset the values of A to the original entries
        for(magma_int_t i=0; i<A->num_rows; i++){
            for(magma_int_t j=A_copy.row[i]; j<A_copy.row[i+1]; j++){
                magma_index_t lcol = A_copy.col[j];
                for(magma_int_t k=A->row[i]; k<A->row[i+1]; k++){
                    if( A->col[k] == lcol ){
                        A->val[k] =  A_copy.val[j];
                    }
                }
            }
        }
    }
    else {
        magma_storage_t A_storage = A->storage_type;
        magma_location_t A_location = A->memory_location;
        CHECK( magma_dmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue ));
        CHECK( magma_dmconvert( hA, &CSRCOOA, hA.storage_type, Magma_CSR, queue ));

        CHECK( magma_dsymbilu( &CSRCOOA, levels, L, U, queue ));

        magma_dmfree( &hA, queue );
        magma_dmfree( A, queue );
        CHECK( magma_dmconvert( CSRCOOA, &hA, Magma_CSR, A_storage, queue ));
        CHECK( magma_dmtransfer( hA, A, Magma_CPU, A_location, queue ));
    }
    
cleanup:
    if( info != 0 ){
        magma_dmfree( L, queue );
        magma_dmfree( U, queue );
    }
    magma_dmfree( &A_copy, queue );
    magma_dmfree( &B, queue );
    magma_dmfree( &hA, queue );
    magma_dmfree( &CSRCOOA, queue );
    return info;
}
Example #17
0
extern "C" magma_int_t
c_transpose_csr(
    magma_int_t n_rows,
    magma_int_t n_cols,
    magma_int_t nnz,
    magmaFloatComplex *values,
    magma_index_t *rowptr,
    magma_index_t *colind,
    magma_int_t *new_n_rows,
    magma_int_t *new_n_cols,
    magma_int_t *new_nnz,
    magmaFloatComplex **new_values,
    magma_index_t **new_rowptr,
    magma_index_t **new_colind,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    // easier to keep names straight if we convert CSR to CSC,
    // which is the same as tranposing CSR.
    magmaFloatComplex *csc_values=NULL;
    magma_index_t *csc_colptr=NULL, *csc_rowind=NULL;
    
    // i, j are actual row & col indices (0 <= i < nrows, 0 <= j < ncols).
    // k is index into col and values (0 <= k < nnz).
    magma_int_t i, j, k, total, tmp;
    
    CHECK( magma_cmalloc_cpu( &csc_values, nnz ) );
    CHECK( magma_index_malloc_cpu( &csc_colptr, n_cols + 1 ) );
    CHECK( magma_index_malloc_cpu( &csc_rowind, nnz ) );
    
    // example matrix
    // [ x x 0 x ]
    // [ x 0 x x ]
    // [ x x 0 0 ]
    // rowptr = [ 0 3 6, 8 ]
    // colind = [ 0 1 3; 0 2 3; 0 1 ]
    
    // sum up nnz in each original column
    // colptr = [ 3 2 1 2, X ]
    for( j=0; j < n_cols; ++j ) {
        csc_colptr[ j ] = 0;
    }
    for( k=0; k < nnz; ++k ) {
        csc_colptr[ colind[k] ]++;
    }
    
    // running sum to convert to new colptr
    // colptr = [ 0 3 5 6, 8 ]
    total = 0;
    for( j=0; j < n_cols; ++j ) {
        tmp = csc_colptr[ j ];
        csc_colptr[ j ] = total;
        total += tmp;
    }
    csc_colptr[ n_cols ] = total;
    assert( total == nnz );
    
    // copy row indices and values
    // this increments colptr until it effectively shifts left one
    // colptr = [ 3 5 6 8, 8 ]
    // rowind = [ 0 1 2; 0 2; 1; 0 1 ]
    for( i=0; i < n_rows; ++i ) {
        for( k=rowptr[i]; k < rowptr[i+1]; ++k ) {
            j = colind[k];
            csc_rowind[ csc_colptr[ j ] ] = i;
            csc_values[ csc_colptr[ j ] ] = values[k];
            csc_colptr[ j ]++;
        }
    }
    assert( csc_colptr[ n_cols-1 ] == nnz );
    
    // shift colptr right one
    // colptr = [ 0 3 5 6, 8 ]
    for( j=n_cols-1; j > 0; --j ) {
        csc_colptr[j] = csc_colptr[j-1];
    }
    csc_colptr[0] = 0;

    // save into output variables
    *new_n_rows = n_cols;
    *new_n_cols = n_rows;
    *new_nnz    = nnz;
    *new_values = csc_values;
    *new_rowptr = csc_colptr;
    *new_colind = csc_rowind;
    
cleanup:
    magma_free_cpu( csc_values );
    magma_free_cpu( csc_colptr );
    magma_free_cpu( csc_rowind );
    return info;
}
Example #18
0
extern "C" magma_int_t
magma_dmlumerge(    
    magma_d_sparse_matrix L, 
    magma_d_sparse_matrix U,
    magma_d_sparse_matrix *A,
    magma_queue_t queue ){
    if( L.storage_type == Magma_CSR && U.storage_type == Magma_CSR ){
        if( L.memory_location == Magma_CPU && U.memory_location == Magma_CPU ){
            
            magma_d_mtransfer( L, A, Magma_CPU, Magma_CPU, queue );
            magma_free_cpu( A->col );
            magma_free_cpu( A->val );
            // make sure it is strictly lower triangular
            magma_int_t z = 0;
            for(magma_int_t i=0; i<A->num_rows; i++){
                for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){
                    if( L.col[j] < i ){// make sure it is strictly lower triangular
                        z++;
                    }
                }
                for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){
                    z++;
                }
            }
            A->nnz = z;
            // fill A with the new structure;
            magma_int_t stat_cpu = 0;
            stat_cpu += magma_index_malloc_cpu( &A->col, A->nnz );
            stat_cpu += magma_dmalloc_cpu( &A->val, A->nnz );
            if( stat_cpu != 0 ){
                magma_d_mfree( A, queue );
                printf("error: memory allocation.\n");
                return MAGMA_ERR_HOST_ALLOC;
            }
            z = 0;
            for(magma_int_t i=0; i<A->num_rows; i++){
                A->row[i] = z;
                for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){
                    if( L.col[j] < i ){// make sure it is strictly lower triangular
                        A->col[z] = L.col[j];
                        A->val[z] = L.val[j];
                        z++;
                    }
                }
                for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){
                    A->col[z] = U.col[j];
                    A->val[z] = U.val[j];
                    z++;
                }
            }
            A->row[A->num_rows] = z;
            A->nnz = z;
            return MAGMA_SUCCESS; 
        }
        else{
    
            printf("error: matrix not on CPU.\n"); 
    
            return MAGMA_SUCCESS; 
        }
    }
    else{
        
        printf("error: matrix not on CPU.\n"); 
    
        return MAGMA_SUCCESS; 
    }
}
Example #19
0
magma_int_t
magma_cm_27stencil(
    magma_int_t n,
    magma_c_matrix *A,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_int_t i,j,k;
    magma_c_matrix hA={Magma_CSR};

    
    // generate matrix of desired structure and size (3d 27-point stencil)
    magma_int_t nn = n*n*n;
    magma_int_t offdiags = 13;
    magma_index_t *diag_offset=NULL;
    magmaFloatComplex *diag_vals=NULL;
    CHECK( magma_cmalloc_cpu( &diag_vals, offdiags+1 ));
    CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 ));

    diag_offset[0] = 0;
    diag_offset[1] = 1;
    diag_offset[2] = n-1;
    diag_offset[3] = n;
    diag_offset[4] = n+1;
    diag_offset[5] = n*n-n-1;
    diag_offset[6] = n*n-n;
    diag_offset[7] = n*n-n+1;
    diag_offset[8] = n*n-1;
    diag_offset[9] = n*n;
    diag_offset[10] = n*n+1;
    diag_offset[11] = n*n+n-1;
    diag_offset[12] = n*n+n;
    diag_offset[13] = n*n+n+1;

    diag_vals[0] = MAGMA_C_MAKE( 26.0, 0.0 );
    diag_vals[1] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[2] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[3] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[4] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[5] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[6] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[7] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[8] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[9] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[10] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[11] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[12] = MAGMA_C_MAKE( -1.0, 0.0 );
    diag_vals[13] = MAGMA_C_MAKE( -1.0, 0.0 );
    CHECK( magma_cmgenerator( nn, offdiags, diag_offset, diag_vals, &hA, queue ));

    // now set some entries to zero (boundary...)
    for( i=0; i < n*n; i++ ) {
        for( j=0; j < n; j++ ) {
            magma_index_t row = i*n+j;
            for( k=hA.row[row]; k<hA.row[row+1]; k++) {
                if ((hA.col[k] == row-1 ||
                    hA.col[k] == row-n-1 ||
                    hA.col[k] == row+n-1 ||
                    hA.col[k] == row-n*n+n-1 ||
                    hA.col[k] == row+n*n-n-1 ||
                    hA.col[k] == row-n*n-1 ||
                    hA.col[k] == row+n*n-1 ||
                    hA.col[k] == row-n*n-n-1 ||
                    hA.col[k] == row+n*n+n-1 ) && (row+1)%n == 1 )
                        
                        hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 );
    
                if ((hA.col[k] == row+1 ||
                    hA.col[k] == row-n+1 ||
                    hA.col[k] == row+n+1 ||
                    hA.col[k] == row-n*n+n+1 ||
                    hA.col[k] == row+n*n-n+1 ||
                    hA.col[k] == row-n*n+1 ||
                    hA.col[k] == row+n*n+1 ||
                    hA.col[k] == row-n*n-n+1 ||
                    hA.col[k] == row+n*n+n+1 ) && (row)%n ==n-1 )
                        
                        hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 );
            }
        }
    }
    hA.true_nnz = hA.nnz;
    CHECK( magma_cmconvert( hA, A, Magma_CSR, Magma_CSR, queue ));

cleanup:
    magma_free_cpu( diag_vals );
    magma_free_cpu( diag_offset );
    magma_cmfree( &hA, queue );
    return info;
}
Example #20
0
extern "C" magma_int_t
magma_smlumerge(
    magma_s_matrix L,
    magma_s_matrix U,
    magma_s_matrix *A,
    magma_queue_t queue )
{
    magma_int_t info = 0;    

    if( L.storage_type == Magma_CSR && U.storage_type == Magma_CSR ){
        if( L.memory_location == Magma_CPU && U.memory_location == Magma_CPU ){
            
            CHECK( magma_smtransfer( L, A, Magma_CPU, Magma_CPU, queue ));
            magma_free_cpu( A->col );
            magma_free_cpu( A->val );
            // make sure it is strictly lower triangular
            magma_int_t z = 0;
            for(magma_int_t i=0; i<A->num_rows; i++){
                for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){
                    if( L.col[j] < i ){// make sure it is strictly lower triangular
                        z++;
                    }
                }
                for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){
                    z++;
                }
            }
            A->nnz = z;
            // fill A with the new structure;
            CHECK( magma_index_malloc_cpu( &A->col, A->nnz ));
            CHECK( magma_smalloc_cpu( &A->val, A->nnz ));
            z = 0;
            for(magma_int_t i=0; i<A->num_rows; i++){
                A->row[i] = z;
                for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){
                    if( L.col[j] < i ){// make sure it is strictly lower triangular
                        A->col[z] = L.col[j];
                        A->val[z] = L.val[j];
                        z++;
                    }
                }
                for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){
                    A->col[z] = U.col[j];
                    A->val[z] = U.val[j];
                    z++;
                }
            }
            A->row[A->num_rows] = z;
            A->nnz = z;
        }
        else{
            printf("error: matrix not on CPU.\n"); 
            info = MAGMA_ERR_NOT_SUPPORTED;
        }
    }
    else{
            printf("error: matrix in wrong format.\n"); 
            info = MAGMA_ERR_NOT_SUPPORTED;
    }
cleanup:
    if( info != 0 ){
        magma_smfree( A, queue );
    }
    return info;
}
Example #21
0
magma_int_t
magma_cm_5stencil(
    magma_int_t n,
    magma_c_matrix *A,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_int_t i,j,k;
    magma_c_matrix hA={Magma_CSR};
    
    // generate matrix of desired structure and size (2d 5-point stencil)
    magma_int_t nn = n*n;
    magma_int_t offdiags = 2;
    magma_index_t *diag_offset=NULL;
    magmaFloatComplex *diag_vals=NULL;
    CHECK( magma_cmalloc_cpu( &diag_vals, offdiags+1 ));
    CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 ));

    diag_offset[0] = 0;
    diag_offset[1] = 1;
    diag_offset[2] = n;
    
    #define COMPLEX
    
    #ifdef COMPLEX
        // complex case
        diag_vals[0] = MAGMA_C_MAKE( 4.0, 4.0 );
        diag_vals[1] = MAGMA_C_MAKE( -1.0, -1.0 );
        diag_vals[2] = MAGMA_C_MAKE( -1.0, -1.0 );
        
    #else
        // real case
        diag_vals[0] = MAGMA_C_MAKE( 4.0, 0.0 );
        diag_vals[1] = MAGMA_C_MAKE( -1.0, 0.0 );
        diag_vals[2] = MAGMA_C_MAKE( -1.0, 0.0 );
    #endif
    CHECK( magma_cmgenerator( nn, offdiags, diag_offset, diag_vals, &hA, queue ));

    // now set some entries to zero (boundary...)
    for( i=0; i<n; i++ ) {
        for( j=0; j<n; j++ ) {
            magma_index_t row = i*n+j;
            for( k=hA.row[row]; k<hA.row[row+1]; k++) {
                if ((hA.col[k] == row-1 ) && (row+1)%n == 1 )
                    hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 );
    
                if ((hA.col[k] == row+1 ) && (row)%n ==n-1 )
                    hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 );
            }
        }
    }

    CHECK( magma_cmconvert( hA, A, Magma_CSR, Magma_CSR, queue ));
    magma_cmcsrcompressor( A, queue );
    A->true_nnz = A->nnz;
    
cleanup:
    magma_free_cpu( diag_vals );
    magma_free_cpu( diag_offset );
    magma_cmfree( &hA, queue );
    return info;
}
Example #22
0
extern "C" magma_int_t
magma_ccsrsplit(
    magma_int_t bsize,
    magma_c_matrix A,
    magma_c_matrix *D,
    magma_c_matrix *R,
    magma_queue_t queue )
{
    magma_int_t info = 0;
    
    magma_int_t i, k, j, nnz_diag, nnz_offd;
    
    D->val = NULL;
    D->col = NULL;
    D->row = NULL;
    D->rowidx = NULL;
    D->blockinfo = NULL;
    D->diag = NULL;
    D->dval = NULL;
    D->dcol = NULL;
    D->drow = NULL;
    D->drowidx = NULL;
    D->ddiag = NULL;
    R->val = NULL;
    R->col = NULL;
    R->row = NULL;
    R->rowidx = NULL;
    R->blockinfo = NULL;
    R->diag = NULL;
    R->dval = NULL;
    R->dcol = NULL;
    R->drow = NULL;
    R->drowidx = NULL;
    R->ddiag = NULL;
    
    if (  A.memory_location == Magma_CPU &&
            (   A.storage_type == Magma_CSR ||
                A.storage_type == Magma_CSRCOO ) ) {



        nnz_diag = nnz_offd = 0;
        // Count the new number of nonzeroes in the two matrices
        for( i=0; i<A.num_rows; i+=bsize ){
            for( k=i; k<min(A.num_rows,i+bsize); k++ ){
                int check = 0;
                for( j=A.row[k]; j<A.row[k+1]; j++ ){
                    if ( A.col[j] < i )
                        nnz_offd++;
                    else if ( A.col[j] < i+bsize ){
                        if( A.col[j] == k ){
                            check = 1;
                        }
                        nnz_diag++;
                    }
                    else
                        nnz_offd++;
                }
                if( check == 0 ){
                    printf("error: matrix contains zero on diagonal at (%d,%d).\n", i, i);
                    info = -1;
                    goto cleanup;
                }
            }
        }

        // Allocate memory for the new matrices
        D->storage_type = Magma_CSRD;
        D->memory_location = A.memory_location;
        D->num_rows = A.num_rows;
        D->num_cols = A.num_cols;
        D->nnz = nnz_diag;

        R->storage_type = Magma_CSR;
        R->memory_location = A.memory_location;
        R->num_rows = A.num_rows;
        R->num_cols = A.num_cols;
        R->nnz = nnz_offd;

        CHECK( magma_cmalloc_cpu( &D->val, nnz_diag ));
        CHECK( magma_index_malloc_cpu( &D->row, A.num_rows+1 ));
        CHECK( magma_index_malloc_cpu( &D->col, nnz_diag ));

        CHECK( magma_cmalloc_cpu( &R->val, nnz_offd ));
        CHECK( magma_index_malloc_cpu( &R->row, A.num_rows+1 ));
        CHECK( magma_index_malloc_cpu( &R->col, nnz_offd ));
        
        // Fill up the new sparse matrices
        D->row[0] = 0;
        R->row[0] = 0;

        nnz_offd = nnz_diag = 0;
        for( i=0; i<A.num_rows; i+=bsize) {
            for( k=i; k<min(A.num_rows,i+bsize); k++ ) {
                D->row[k+1] = D->row[k];
                R->row[k+1] = R->row[k];
     
                for( j=A.row[k]; j<A.row[k+1]; j++ ) {
                    if ( A.col[j] < i ) {
                        R->val[nnz_offd] = A.val[j];
                        R->col[nnz_offd] = A.col[j];
                        R->row[k+1]++;
                        nnz_offd++;
                    }
                    else if ( A.col[j] < i+bsize ) {
                        // larger than diagonal remain as before
                        if ( A.col[j]>k ) {
                            D->val[nnz_diag] = A.val[ j ];
                            D->col[nnz_diag] = A.col[ j ];
                            D->row[k+1]++;
                        }
                        // diagonal is written first
                        else if ( A.col[j]==k ) {
                            D->val[D->row[k]] = A.val[ j ];
                            D->col[D->row[k]] = A.col[ j ];
                            D->row[k+1]++;
                        }
                        // smaller than diagonal are shifted one to the right
                        // to have room for the diagonal
                        else {
                            D->val[nnz_diag+1] = A.val[ j ];
                            D->col[nnz_diag+1] = A.col[ j ];
                            D->row[k+1]++;
                        }
                        nnz_diag++;
                    }
                    else {
                        R->val[nnz_offd] = A.val[j];
                        R->col[nnz_offd] = A.col[j];
                        R->row[k+1]++;
                        nnz_offd++;
                    }
                }
            }
        }
    }
    else {
        magma_c_matrix Ah={Magma_CSR}, ACSR={Magma_CSR}, DCSR={Magma_CSR}, RCSR={Magma_CSR}, Dh={Magma_CSR}, Rh={Magma_CSR};
        CHECK( magma_cmtransfer( A, &Ah, A.memory_location, Magma_CPU, queue ));
        CHECK( magma_cmconvert( Ah, &ACSR, A.storage_type, Magma_CSR, queue ));

        CHECK( magma_ccsrsplit( bsize, ACSR, &DCSR, &RCSR, queue ));

        CHECK( magma_cmconvert( DCSR, &Dh, Magma_CSR, A.storage_type, queue ));
        CHECK( magma_cmconvert( RCSR, &Rh, Magma_CSR, A.storage_type, queue ));

        CHECK( magma_cmtransfer( Dh, D, Magma_CPU, A.memory_location, queue ));
        CHECK( magma_cmtransfer( Rh, R, Magma_CPU, A.memory_location, queue ));

        magma_cmfree( &Ah, queue );
        magma_cmfree( &ACSR, queue );
        magma_cmfree( &Dh, queue );
        magma_cmfree( &DCSR, queue );
        magma_cmfree( &Rh, queue );
        magma_cmfree( &RCSR, queue );
    }
cleanup:
    if( info != 0 ){
        magma_cmfree( D, queue );
        magma_cmfree( R, queue );
    }
    return info;
}
Example #23
0
/* ////////////////////////////////////////////////////////////////////////////
   -- testing any solver
*/
int main(  int argc, char** argv )
{
    magma_int_t info = 0;
    TESTING_INIT();

    magma_dopts zopts;
    magma_queue_t queue=NULL;
    magma_queue_create( 0, &queue );
    
    real_Double_t res;
    magma_d_matrix Z={Magma_CSR}, A={Magma_CSR}, AT={Magma_CSR}, 
    A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR};
    
    magma_index_t *comm_i=NULL;
    double *comm_v=NULL;
    magma_int_t start, end;
    
    int i=1;
    CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue ));

    B.blocksize = zopts.blocksize;
    B.alignment = zopts.alignment;

    while( i < argc ) {
        if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) {   // Laplace test
            i++;
            magma_int_t laplace_size = atoi( argv[i] );
            CHECK( magma_dm_5stencil(  laplace_size, &Z, queue ));
        } else {                        // file-matrix test
            CHECK( magma_d_csr_mtx( &Z,  argv[i], queue ));
        }

        printf("%% matrix info: %d-by-%d with %d nonzeros\n",
                            int(Z.num_rows), int(Z.num_cols), int(Z.nnz) );
        
        // slice matrix
        CHECK( magma_index_malloc_cpu( &comm_i, Z.num_rows ) );
        CHECK( magma_dmalloc_cpu( &comm_v, Z.num_rows ) );
        
        CHECK( magma_dmslice( 1, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) );    
        magma_dprint_matrix( A2, queue );
        magma_dprint_matrix( AT, queue );
        magma_dprint_matrix( B, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&AT, queue );
        magma_dmfree(&B, queue );

        CHECK( magma_dmslice( 9, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) );    
        magma_dprint_matrix( A2, queue );
        magma_dprint_matrix( AT, queue );
        magma_dprint_matrix( B, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&AT, queue );
        magma_dmfree(&B, queue );
        
        CHECK( magma_dmslice( 9, 1, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) );    
        magma_dprint_matrix( A2, queue );
        magma_dprint_matrix( AT, queue );
        magma_dprint_matrix( B, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&AT, queue );
        magma_dmfree(&B, queue );

        CHECK( magma_dmslice( 9, 8, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) );    
        magma_dprint_matrix( A2, queue );
        magma_dprint_matrix( AT, queue );
        magma_dprint_matrix( B, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&AT, queue );
        magma_dmfree(&B, queue );
        
        
        // scale matrix
        CHECK( magma_dmscale( &Z, zopts.scaling, queue ));

        // remove nonzeros in matrix
        CHECK( magma_dmcsrcompressor( &Z, queue ));
        
        // convert to be non-symmetric
        CHECK( magma_dmconvert( Z, &A, Magma_CSR, Magma_CSRL, queue ));
        
        // transpose
        CHECK( magma_dmtranspose( A, &AT, queue ));

        // convert, copy back and forth to check everything works

        CHECK( magma_dmconvert( AT, &B, Magma_CSR, zopts.output_format, queue ));
        magma_dmfree(&AT, queue );
        CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ));
        magma_dmfree(&B, queue );
        CHECK( magma_dmcsrcompressor_gpu( &B_d, queue ));
        CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue ));
        magma_dmfree(&B_d, queue );
        CHECK( magma_dmconvert( B, &AT, zopts.output_format,Magma_CSR, queue ));
        magma_dmfree(&B, queue );

        // transpose back
        CHECK( magma_dmtranspose( AT, &A2, queue ));
        magma_dmfree(&AT, queue );
        CHECK( magma_dmdiff( A, A2, &res, queue));
        printf("%% ||A-B||_F = %8.2e\n", res);
        if ( res < .000001 )
            printf("%% tester:  ok\n");
        else
            printf("%% tester:  failed\n");
        
        magma_free_cpu( comm_i );
        magma_free_cpu( comm_v );
        comm_i=NULL;
        comm_v=NULL;
        magma_dmfree(&A, queue );
        magma_dmfree(&A2, queue );
        magma_dmfree(&Z, queue );

        i++;
    }

cleanup:
    magma_free_cpu( comm_i );
    magma_free_cpu( comm_v );
    magma_dmfree(&AT, queue );
    magma_dmfree(&A, queue );
    magma_dmfree(&B, queue );
    magma_dmfree(&B_d, queue );
    magma_dmfree(&A2, queue );
    magma_dmfree(&Z, queue );
    magma_queue_destroy( queue );
    TESTING_FINALIZE();
    return info;
}