extern "C" magma_int_t magma_zrowentries( magma_z_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_index_t *length=NULL; magma_index_t i,j, maxrowlength=0; // check whether matrix on CPU if ( A->memory_location == Magma_CPU ) { // CSR if ( A->storage_type == Magma_CSR ) { CHECK( magma_index_malloc_cpu( &length, A->num_rows)); for( i=0; i<A->num_rows; i++ ) { length[i] = A->row[i+1]-A->row[i]; if (length[i] > maxrowlength) maxrowlength = length[i]; } A->max_nnz_row = maxrowlength; } // Dense else if ( A->storage_type == Magma_DENSE ) { CHECK( magma_index_malloc_cpu( &length, A->num_rows)); for( i=0; i<A->num_rows; i++ ) { length[i] = 0; for( j=0; j<A->num_cols; j++ ) { if ( MAGMA_Z_REAL( A->val[i*A->num_cols + j] ) != 0. ) length[i]++; } if (length[i] > maxrowlength) maxrowlength = length[i]; } A->max_nnz_row = maxrowlength; } } // end CPU case else { printf("error: matrix not on CPU.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: magma_free( length ); return info; }
magma_int_t magma_zmLdiagadd( magma_z_matrix *L, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix LL={Magma_CSR}; if( L->row[1]==1 ){ // lower triangular with unit diagonal //printf("L lower triangular.\n"); LL.diagorder_type = Magma_UNITY; CHECK( magma_zmconvert( *L, &LL, Magma_CSR, Magma_CSRL, queue )); } else if ( L->row[1]==0 ){ // strictly lower triangular //printf("L strictly lower triangular.\n"); CHECK( magma_zmtransfer( *L, &LL, Magma_CPU, Magma_CPU, queue )); magma_free_cpu( LL.col ); magma_free_cpu( LL.val ); LL.nnz = L->nnz+L->num_rows; CHECK( magma_zmalloc_cpu( &LL.val, LL.nnz )); CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz )); magma_int_t z=0; for( magma_int_t i=0; i<L->num_rows; i++){ LL.row[i] = z; for( magma_int_t j=L->row[i]; j<L->row[i+1]; j++){ LL.val[z] = L->val[j]; LL.col[z] = L->col[j]; z++; } // add unit diagonal LL.val[z] = MAGMA_Z_MAKE(1.0, 0.0); LL.col[z] = i; z++; } LL.row[LL.num_rows] = z; LL.nnz = z; } else { printf("error: L neither lower nor strictly lower triangular!\n"); } magma_zmfree( L, queue ); CHECK( magma_zmtransfer(LL, L, Magma_CPU, Magma_CPU, queue )); cleanup: if( info != 0 ){ magma_zmfree( L, queue ); } magma_zmfree( &LL, queue ); return info; }
magma_int_t magma_ddomainoverlap( magma_index_t num_rows, magma_int_t *num_indices, magma_index_t *rowptr, magma_index_t *colidx, magma_index_t *x, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t blocksize=128; magma_int_t row=0, col=0, num_ind=0, offset=0; magma_index_t *tmp_x; CHECK( magma_index_malloc_cpu( &tmp_x, blocksize )); for(magma_int_t i=0; i<blocksize; i++ ){ tmp_x[i] = -1; } for(magma_int_t i=0; i<num_rows; i++){ row = i; for(magma_int_t j=rowptr[row]; j<rowptr[row+1]; j++){ col = colidx[j]; int doubleitem = 0; for(magma_int_t k=0; k<blocksize; k++){ if( tmp_x[k] == col ) doubleitem = 1; } if( doubleitem == 0 ){ tmp_x[num_ind] = col; num_ind++; (*num_indices)++; } if( num_ind == blocksize || j == rowptr[num_rows]-1 ){ magma_dindexcopy( num_ind, offset, tmp_x, x, queue ); offset=offset+num_ind; num_ind = 0; break; } } } cleanup: magma_free_cpu( tmp_x ); return info; }
magma_int_t magma_zinitguess( magma_z_matrix A, magma_z_matrix *L, magma_z_matrix *U, magma_queue_t queue ) { magma_int_t info = 0; magmaDoubleComplex one = MAGMA_Z_MAKE( 1.0, 0.0 ); magma_z_matrix hAL={Magma_CSR}, hAU={Magma_CSR}, dAL={Magma_CSR}, dAU={Magma_CSR}, dALU={Magma_CSR}, hALU={Magma_CSR}, hD={Magma_CSR}, dD={Magma_CSR}, dL={Magma_CSR}, hL={Magma_CSR}; magma_int_t i,j; magma_int_t offdiags = 0; magma_index_t *diag_offset; magmaDoubleComplex *diag_vals=NULL; // need only lower triangular hAL.diagorder_type = Magma_VALUE; CHECK( magma_zmconvert( A, &hAL, Magma_CSR, Magma_CSRL, queue )); //magma_zmconvert( hAL, &hALCOO, Magma_CSR, Magma_CSRCOO ); // need only upper triangular //magma_zmconvert( A, &hAU, Magma_CSR, Magma_CSRU ); CHECK( magma_z_cucsrtranspose( hAL, &hAU, queue )); //magma_zmconvert( hAU, &hAUCOO, Magma_CSR, Magma_CSRCOO ); CHECK( magma_zmtransfer( hAL, &dAL, Magma_CPU, Magma_DEV, queue )); CHECK( magma_z_spmm( one, dAL, dAU, &dALU, queue )); CHECK( magma_zmtransfer( dALU, &hALU, Magma_DEV, Magma_CPU, queue )); magma_zmfree( &dAU, queue); magma_zmfree( &dALU, queue); CHECK( magma_zmalloc_cpu( &diag_vals, offdiags+1 )); CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 )); diag_offset[0] = 0; diag_vals[0] = MAGMA_Z_MAKE( 1.0, 0.0 ); CHECK( magma_zmgenerator( hALU.num_rows, offdiags, diag_offset, diag_vals, &hD, queue )); magma_zmfree( &hALU, queue ); for(i=0; i<hALU.num_rows; i++){ for(j=hALU.row[i]; j<hALU.row[i+1]; j++){ if( hALU.col[j] == i ){ //printf("%d %d %d == %d -> %f -->", i, j, hALU.col[j], i, hALU.val[j]); hD.val[i] = MAGMA_Z_MAKE( 1.0 / sqrt(fabs(MAGMA_Z_REAL(hALU.val[j]))) , 0.0 ); //printf("insert %f at %d\n", hD.val[i], i); } } } CHECK( magma_zmtransfer( hD, &dD, Magma_CPU, Magma_DEV, queue )); magma_zmfree( &hD, queue); CHECK( magma_z_spmm( one, dD, dAL, &dL, queue )); magma_zmfree( &dAL, queue ); magma_zmfree( &dD, queue ); /* // check for diagonal = 1 magma_z_matrix dLt={Magma_CSR}, dLL={Magma_CSR}, LL={Magma_CSR}; CHECK( magma_z_cucsrtranspose( dL, &dLt )); CHECK( magma_zcuspmm( dL, dLt, &dLL )); CHECK( magma_zmtransfer( dLL, &LL, Magma_DEV, Magma_CPU )); //for(i=0; i < hALU.num_rows; i++) { for(i=0; i < 100; i++) { for(j=hALU.row[i]; j < hALU.row[i+1]; j++) { if( hALU.col[j] == i ){ printf("%d %d -> %f -->", i, i, LL.val[j]); } } } */ CHECK( magma_zmtransfer( dL, &hL, Magma_DEV, Magma_CPU, queue )); CHECK( magma_zmconvert( hL, L, Magma_CSR, Magma_CSRCOO, queue )); cleanup: if( info !=0 ){ magma_zmfree( L, queue ); magma_zmfree( U, queue ); } magma_zmfree( &dAU, queue); magma_zmfree( &dALU, queue); magma_zmfree( &dL, queue ); magma_zmfree( &hL, queue ); magma_zmfree( &dAL, queue ); magma_zmfree( &dD, queue ); magma_zmfree( &hD, queue); magma_zmfree( &hALU, queue ); return info; }
magma_int_t magma_zilures( magma_z_matrix A, magma_z_matrix L, magma_z_matrix U, magma_z_matrix *LU, real_Double_t *res, real_Double_t *nonlinres, magma_queue_t queue ) { magma_int_t info = 0; magmaDoubleComplex tmp; real_Double_t tmp2; magma_int_t i, j, k; magmaDoubleComplex one = MAGMA_Z_MAKE( 1.0, 0.0 ); magma_z_matrix LL={Magma_CSR}, L_d={Magma_CSR}, U_d={Magma_CSR}, LU_d={Magma_CSR}; if( L.row[1]==1 ){ // lower triangular with unit diagonal //printf("L lower triangular.\n"); LL.diagorder_type = Magma_UNITY; CHECK( magma_zmconvert( L, &LL, Magma_CSR, Magma_CSRL, queue )); } else if ( L.row[1]==0 ){ // strictly lower triangular //printf("L strictly lower triangular.\n"); CHECK( magma_zmtransfer( L, &LL, Magma_CPU, Magma_CPU, queue )); magma_free_cpu( LL.col ); magma_free_cpu( LL.val ); LL.nnz = L.nnz+L.num_rows; CHECK( magma_zmalloc_cpu( &LL.val, LL.nnz )); CHECK( magma_index_malloc_cpu( &LL.col, LL.nnz )); magma_int_t z=0; for (i=0; i < L.num_rows; i++) { LL.row[i] = z; for (j=L.row[i]; j < L.row[i+1]; j++) { LL.val[z] = L.val[j]; LL.col[z] = L.col[j]; z++; } // add unit diagonal LL.val[z] = MAGMA_Z_MAKE(1.0, 0.0); LL.col[z] = i; z++; } LL.row[LL.num_rows] = z; } else { printf("error: L neither lower nor strictly lower triangular!\n"); } CHECK( magma_zmtransfer( LL, &L_d, Magma_CPU, Magma_DEV, queue )); CHECK( magma_zmtransfer( U, &U_d, Magma_CPU, Magma_DEV, queue )); magma_zmfree( &LL, queue ); CHECK( magma_z_spmm( one, L_d, U_d, &LU_d, queue )); CHECK( magma_zmtransfer(LU_d, LU, Magma_DEV, Magma_CPU, queue )); magma_zmfree( &L_d, queue ); magma_zmfree( &U_d, queue ); magma_zmfree( &LU_d, queue ); // compute Frobenius norm of A-LU for(i=0; i<A.num_rows; i++){ for(j=A.row[i]; j<A.row[i+1]; j++){ magma_index_t lcol = A.col[j]; for(k=LU->row[i]; k<LU->row[i+1]; k++){ if( LU->col[k] == lcol ){ tmp = MAGMA_Z_MAKE( MAGMA_Z_REAL( LU->val[k] )- MAGMA_Z_REAL( A.val[j] ) , 0.0 ); LU->val[k] = tmp; tmp2 = (real_Double_t) fabs( MAGMA_Z_REAL(tmp) ); (*nonlinres) = (*nonlinres) + tmp2*tmp2; } } } } for(i=0; i<LU->num_rows; i++){ for(j=LU->row[i]; j<LU->row[i+1]; j++){ tmp2 = (real_Double_t) fabs( MAGMA_Z_REAL(LU->val[j]) ); (*res) = (*res) + tmp2* tmp2; } } (*res) = sqrt((*res)); (*nonlinres) = sqrt((*nonlinres)); cleanup: if( info !=0 ){ magma_zmfree( LU, queue ); } magma_zmfree( &LL, queue ); magma_zmfree( &L_d, queue ); magma_zmfree( &U_d, queue ); magma_zmfree( &LU_d, queue ); return info; }
magma_int_t magma_cmgenerator( magma_int_t n, magma_int_t offdiags, magma_index_t *diag_offset, magmaFloatComplex *diag_vals, magma_c_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_c_matrix B={Magma_CSR}; B.val = NULL; B.col = NULL; B.row = NULL; B.rowidx = NULL; B.blockinfo = NULL; B.diag = NULL; B.dval = NULL; B.dcol = NULL; B.drow = NULL; B.drowidx = NULL; B.ddiag = NULL; B.list = NULL; B.dlist = NULL; B.num_rows = n; B.num_cols = n; B.fill_mode = MagmaFull; B.memory_location = Magma_CPU; B.storage_type = Magma_ELLPACKT; B.max_nnz_row = (2*offdiags+1); CHECK( magma_cmalloc_cpu( &B.val, B.max_nnz_row*n )); CHECK( magma_index_malloc_cpu( &B.col, B.max_nnz_row*n )); for( int i=0; i<n; i++ ) { // stride over rows // stride over the number of nonzeros in each row // left of diagonal for( int j=0; j<offdiags; j++ ) { B.val[ i*B.max_nnz_row + j ] = diag_vals[ offdiags - j ]; B.col[ i*B.max_nnz_row + j ] = -1 * diag_offset[ offdiags-j ] + i; } // elements on the diagonal B.val[ i*B.max_nnz_row + offdiags ] = diag_vals[ 0 ]; B.col[ i*B.max_nnz_row + offdiags ] = i; // right of diagonal for( int j=0; j<offdiags; j++ ) { B.val[ i*B.max_nnz_row + j + offdiags +1 ] = diag_vals[ j+1 ]; B.col[ i*B.max_nnz_row + j + offdiags +1 ] = diag_offset[ j+1 ] + i; } } // set invalid entries to zero for( int i=0; i<n; i++ ) { // stride over rows for( int j=0; j<B.max_nnz_row; j++ ) { // nonzeros in every row if ( (B.col[i*B.max_nnz_row + j] < 0) || (B.col[i*B.max_nnz_row + j] >= n) ) { B.val[ i*B.max_nnz_row + j ] = MAGMA_C_MAKE( 0.0, 0.0 ); } } } B.nnz = 0; for( int i=0; i<n; i++ ) { // stride over rows for( int j=0; j<B.max_nnz_row; j++ ) { // nonzeros in every row if ( MAGMA_C_REAL( B.val[i*B.max_nnz_row + j]) != 0.0 ) B.nnz++; } } B.true_nnz = B.nnz; // converting it to CSR will remove the invalit entries completely CHECK( magma_cmconvert( B, A, Magma_ELLPACKT, Magma_CSR, queue )); cleanup: if( info != 0 ){ magma_cmfree( &B, queue ); } return info; }
/** Purpose ------- Computes the diameter of a sparse matrix and stores the value in diameter. Arguments --------- @param[in,out] A magma_z_matrix* sparse matrix @param[in] queue magma_queue_t Queue to execute in. @ingroup magmasparse_zaux ********************************************************************/ extern "C" magma_int_t magma_zdiameter( magma_z_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_index_t i, j, tmp, *dim=NULL, maxdim=0; // check whether matrix on CPU if ( A->memory_location == Magma_CPU ) { // CSR if ( A->storage_type == Magma_CSR ) { CHECK( magma_index_malloc_cpu( &dim, A->num_rows)); for( i=0; i<A->num_rows; i++ ) { dim[i] = 0; for( j=A->row[i]; j<A->row[i+1]; j++ ) { // if ( MAGMA_Z_REAL(A->val[j]) > THRESHOLD ) { tmp = abs( i - A->col[j] ); if ( tmp > dim[i] ) dim[i] = tmp; // } } if ( dim[i] > maxdim ) maxdim = dim[i]; } A->diameter = maxdim; } // Dense else if ( A->storage_type == Magma_DENSE ) { magma_index_t i, j, tmp, *dim, maxdim=0; CHECK( magma_index_malloc_cpu( &dim, A->num_rows)); for( i=0; i<A->num_rows; i++ ) { dim[i] = 0; for( j=0; j<A->num_cols; j++ ) { if ( MAGMA_Z_REAL( A->val[i*A->num_cols + j] ) != 0.0 ) { tmp = abs( i -j ); if ( tmp > dim[i] ) dim[i] = tmp; } } if ( dim[i] > maxdim ) maxdim = dim[i]; } A->diameter = maxdim; } // ELLPACK else if ( A->storage_type == Magma_ELL ) { CHECK( magma_index_malloc_cpu( &dim, A->num_rows)); for( i=0; i<A->num_rows; i++ ) { dim[i] = 0; for( j=i*A->max_nnz_row; j<(i+1)*A->max_nnz_row; j++ ) { if ( MAGMA_Z_REAL( A->val[j] ) > THRESHOLD ) { tmp = abs( i - A->col[j] ); if ( tmp > dim[i] ) dim[i] = tmp; } } if ( dim[i] > maxdim ) maxdim = dim[i]; } A->diameter = maxdim; } // ELL else if ( A->storage_type == Magma_ELL ) { printf("error:format not supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } } // end CPU case else { printf("error: matrix not on CPU.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: magma_free( &dim ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- Debugging file */ int main( int argc, char** argv) { TESTING_INIT(); magma_d_solver_par solver_par; magma_d_preconditioner precond_par; solver_par.epsilon = 10e-16; solver_par.maxiter = 1000; solver_par.verbose = 0; precond_par.solver = Magma_JACOBI; magma_dsolverinfo_init( &solver_par, &precond_par ); double one = MAGMA_D_MAKE(1.0, 0.0); double zero = MAGMA_D_MAKE(0.0, 0.0); magma_d_sparse_matrix A, B, B_d; magma_d_vector x, b; // generate matrix of desired structure and size magma_int_t n=10; // size is n*n magma_int_t nn = n*n; magma_int_t offdiags = 2; magma_index_t *diag_offset; double *diag_vals; magma_dmalloc_cpu( &diag_vals, offdiags+1 ); magma_index_malloc_cpu( &diag_offset, offdiags+1 ); diag_offset[0] = 0; diag_offset[1] = 1; diag_offset[2] = n; diag_vals[0] = MAGMA_D_MAKE( 4.0, 0.0 ); diag_vals[1] = MAGMA_D_MAKE( -1.0, 0.0 ); diag_vals[2] = MAGMA_D_MAKE( -1.0, 0.0 ); magma_dmgenerator( nn, offdiags, diag_offset, diag_vals, &A ); // convert marix into desired format B.storage_type = Magma_SELLC; B.blocksize = 8; B.alignment = 8; // scale matrix magma_dmscale( &A, Magma_UNITDIAG ); magma_d_mconvert( A, &B, Magma_CSR, B.storage_type ); magma_d_mtransfer( B, &B_d, Magma_CPU, Magma_DEV ); // vectors and initial guess magma_d_vinit( &b, Magma_DEV, A.num_cols, one ); magma_d_vinit( &x, Magma_DEV, A.num_cols, one ); magma_d_spmv( one, B_d, x, zero, b ); // b = A x magma_d_vfree(&x); magma_d_vinit( &x, Magma_DEV, A.num_cols, zero ); // solver magma_dpcg( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_dsolverinfo( &solver_par, &precond_par ); magma_dsolverinfo_free( &solver_par, &precond_par ); magma_d_mfree(&B_d); magma_d_mfree(&B); magma_d_mfree(&A); magma_d_vfree(&x); magma_d_vfree(&b); TESTING_FINALIZE(); return 0; }
magma_int_t magma_dsymbolic_ilu( const magma_int_t levfill, /* level of fill */ const magma_int_t n, /* order of matrix */ magma_int_t *nzl, /* input-output */ magma_int_t *nzu, /* input-output */ const mwIndex *ia, const mwIndex *ja, /* input */ mwIndex *ial, mwIndex *jal, /* output lower factor structure */ mwIndex *iau, mwIndex *jau) /* output upper factor structure */ { magma_int_t info = 0; magma_int_t i; magma_index_t *lnklst=NULL; magma_index_t *curlev=NULL; magma_index_t *levels=NULL; magma_index_t *iwork=NULL; magma_int_t knzl = 0; magma_int_t knzu = 0; CHECK( magma_index_malloc_cpu( &lnklst, n )); CHECK( magma_index_malloc_cpu( &curlev, n )); CHECK( magma_index_malloc_cpu( &levels, *nzu )); CHECK( magma_index_malloc_cpu( &iwork, n )); for(magma_int_t t=0; t<n; t++){ lnklst[t] = 0; curlev[t] = 0; iwork[t] = 0; } for(magma_int_t t=0; t<*nzu; t++){ levels[t] = 0; } ial[0] = 0; iau[0] = 0; for (i=0; i<n; i++) { //printf("check line %d\n", i); magma_int_t first, next, j; /* copy column indices of row into workspace and sort them */ magma_int_t len = ia[i+1] - ia[i]; next = 0; for (j=ia[i]; j<ia[i+1]; j++) iwork[next++] = ja[j]; magma_dshell_sort(len, iwork); //printf("check2 line %d\n", i); /* construct implied linked list for row */ first = iwork[0]; curlev[first] = 0; for (j=0; j<=len-2; j++) { lnklst[iwork[j]] = iwork[j+1]; curlev[iwork[j]] = 0; } // printf("check3 line %d iwork[len-1]:%d\n", i, iwork[len-1]); lnklst[iwork[len-1]] = n; curlev[iwork[len-1]] = 0; /* merge with rows in U */ // printf("check4 line %d lnklst[iwork[len-1]]:%d\n", i, lnklst[iwork[len-1]]); next = first; // printf("next:%d (!<) first:%d\n", next, i); while (next < i) { // printf("check line %d while %d\n", i, next); magma_int_t oldlst = next; magma_int_t nxtlst = lnklst[next]; magma_int_t row = next; magma_int_t ii; /* scan row */ for (ii=iau[row]+1; ii<iau[row+1]; /*nop*/) { if (jau[ii] < nxtlst) { /* new fill-in */ magma_int_t newlev = curlev[row] + levels[ii] + 1; if (newlev <= levfill) { lnklst[oldlst] = jau[ii]; lnklst[jau[ii]] = nxtlst; oldlst = jau[ii]; curlev[jau[ii]] = newlev; } ii++; } else if (jau[ii] == nxtlst) { magma_int_t newlev; oldlst = nxtlst; nxtlst = lnklst[oldlst]; newlev = curlev[row] + levels[ii] + 1; curlev[jau[ii]] = min( curlev[jau[ii]], newlev ); ii++; } else /* (jau[ii] > nxtlst) */ { oldlst = nxtlst; nxtlst = lnklst[oldlst]; } } next = lnklst[next]; } /* gather the pattern magma_int_to L and U */ // printf("check line5 %d\n", i); next = first; while (next < i) { if (knzl >= *nzl) { printf("ILU: STORAGE parameter value %d<%d too small.\n", int(*nzl), int(knzl)); printf("Increase STORAGE parameter.\n"); info = -1; goto cleanup; } jal[knzl++] = next; next = lnklst[next]; } ial[i+1] = knzl; // printf("check line6 %d\n", i); if (next != i) { printf("ILU structurally singular.\n"); /* assert(knzu < *nzu); levels[knzu] = 2*n; jau[knzu++] = i; */ } // printf("check line7 %d\n", i); // printf("next:%d n:%d \n", next, n); while (next < n) { if (knzu >= *nzu) { printf("ILU: STORAGE parameter value %d < %d too small.\n", int(*nzu), int(knzu)); printf("Increase STORAGE parameter.\n"); info = -1; goto cleanup; } // printf("1 knzu:%d next:%d \n", knzu, next ); levels[knzu] = curlev[next]; // printf("2 knzu:%d next:%d \n", knzu, next ); jau[knzu++] = next; // printf("3 knzu:%d next:%d \n", knzu, next ); next = lnklst[next]; // printf("4 next:%d n:%d \n", next, n); } iau[i+1] = knzu; } *nzl = knzl; *nzu = knzu; // printf("ende\n"); #if 0 printf( "Actual nnz for ILU: %d\n", *nzl + *nzu ); #endif cleanup: magma_free_cpu(lnklst); magma_free_cpu(curlev); magma_free_cpu(levels); magma_free_cpu(iwork); return info; }
extern "C" magma_int_t magma_zmslice( magma_int_t num_slices, magma_int_t slice, magma_z_matrix A, magma_z_matrix *B, magma_z_matrix *ALOC, magma_z_matrix *ANLOC, magma_index_t *comm_i, magmaDoubleComplex *comm_v, magma_int_t *start, magma_int_t *end, magma_queue_t queue ) { magma_int_t info = 0; if( A.num_rows != A.num_cols ){ printf("%% error: only supported for square matrices.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if ( A.memory_location == Magma_CPU && A.storage_type == Magma_CSR ){ CHECK( magma_zmconvert( A, B, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( B->col ); magma_free_cpu( B->val ); CHECK( magma_zmconvert( A, ALOC, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( ALOC->col ); magma_free_cpu( ALOC->row ); magma_free_cpu( ALOC->val ); CHECK( magma_zmconvert( A, ANLOC, Magma_CSR, Magma_CSR, queue ) ); magma_free_cpu( ANLOC->col ); magma_free_cpu( ANLOC->row ); magma_free_cpu( ANLOC->val ); magma_int_t i,j,k, nnz, nnz_loc=0, loc_row = 0, nnz_nloc = 0; magma_index_t col; magma_int_t size = magma_ceildiv( A.num_rows, num_slices ); magma_int_t lstart = slice*size; magma_int_t lend = min( (slice+1)*size, A.num_rows ); // correct size for last slice size = lend-lstart; CHECK( magma_index_malloc_cpu( &ALOC->row, size+1 ) ); CHECK( magma_index_malloc_cpu( &ANLOC->row, size+1 ) ); // count elements for slice - identity for rest nnz = A.row[ lend ] - A.row[ lstart ] + ( A.num_rows - size ); CHECK( magma_index_malloc_cpu( &B->col, nnz ) ); CHECK( magma_zmalloc_cpu( &B->val, nnz ) ); // for the communication plan for( i=0; i<A.num_rows; i++ ) { comm_i[i] = 0; comm_v[i] = MAGMA_Z_ZERO; } k=0; B->row[i] = 0; ALOC->row[0] = 0; ANLOC->row[0] = 0; // identity above slice for( i=0; i<lstart; i++ ) { B->row[i+1] = B->row[i]+1; B->val[k] = MAGMA_Z_ONE; B->col[k] = i; k++; } // slice for( i=lstart; i<lend; i++ ) { B->row[i+1] = B->row[i] + (A.row[i+1]-A.row[i]); for( j=A.row[i]; j<A.row[i+1]; j++ ){ B->val[k] = A.val[j]; col = A.col[j]; B->col[k] = col; // communication plan if( col<lstart || col>=lend ){ comm_i[ col ] = 1; comm_v[ col ] = comm_v[ col ] + MAGMA_Z_MAKE( MAGMA_Z_ABS( A.val[j] ), 0.0 ); nnz_nloc++; } else { nnz_loc++; } k++; } loc_row++; ALOC->row[ loc_row ] = nnz_loc; ANLOC->row[ loc_row ] = nnz_nloc; } CHECK( magma_index_malloc_cpu( &ALOC->col, nnz_loc ) ); CHECK( magma_zmalloc_cpu( &ALOC->val, nnz_loc ) ); ALOC->num_rows = size; ALOC->num_cols = size; ALOC->nnz = nnz_loc; CHECK( magma_index_malloc_cpu( &ANLOC->col, nnz_nloc ) ); CHECK( magma_zmalloc_cpu( &ANLOC->val, nnz_nloc ) ); ANLOC->num_rows = size; ANLOC->num_cols = A.num_cols; ANLOC->nnz = nnz_nloc; nnz_loc = 0; nnz_nloc = 0; // local/nonlocal matrix for( i=lstart; i<lend; i++ ) { for( j=A.row[i]; j<A.row[i+1]; j++ ){ col = A.col[j]; // insert only in local part in ALOC, nonlocal in ANLOC if( col<lstart || col>=lend ){ ANLOC->val[ nnz_nloc ] = A.val[j]; ANLOC->col[ nnz_nloc ] = col; nnz_nloc++; } else { ALOC->val[ nnz_loc ] = A.val[j]; ALOC->col[ nnz_loc ] = col-lstart; nnz_loc++; } } } // identity below slice for( i=lend; i<A.num_rows; i++ ) { B->row[i+1] = B->row[i]+1; B->val[k] = MAGMA_Z_ONE; B->col[k] = i; k++; } B->nnz = k; *start = lstart; *end = lend; } else { printf("error: mslice only supported for CSR matrices on the CPU: %d %d.\n", int(A.memory_location), int(A.storage_type) ); info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: return info; }
magma_int_t magma_d_csr_mtx( magma_d_matrix *A, const char *filename, magma_queue_t queue ) { char buffer[ 1024 ]; magma_int_t info = 0; int csr_compressor = 0; // checks for zeros in original file magma_d_matrix B={Magma_CSR}; magma_index_t *coo_col = NULL; magma_index_t *coo_row = NULL; double *coo_val = NULL; double *new_val = NULL; magma_index_t* new_row = NULL; magma_index_t* new_col = NULL; magma_int_t symmetric = 0; std::vector< std::pair< magma_index_t, double > > rowval; FILE *fid = NULL; MM_typecode matcode; fid = fopen(filename, "r"); if (fid == NULL) { printf("%% Unable to open file %s\n", filename); info = MAGMA_ERR_NOT_FOUND; goto cleanup; } printf("%% Reading sparse matrix from file (%s):", filename); fflush(stdout); if (mm_read_banner(fid, &matcode) != 0) { printf("\n%% Could not process Matrix Market banner: %s.\n", matcode); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if (!mm_is_valid(matcode)) { printf("\n%% Invalid Matrix Market file.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } if ( ! ( ( mm_is_real(matcode) || mm_is_integer(matcode) || mm_is_pattern(matcode) || mm_is_real(matcode) ) && mm_is_coordinate(matcode) && mm_is_sparse(matcode) ) ) { mm_snprintf_typecode( buffer, sizeof(buffer), matcode ); printf("\n%% Sorry, MAGMA-sparse does not support Market Market type: [%s]\n", buffer ); printf("%% Only real-valued or pattern coordinate matrices are supported.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } magma_index_t num_rows, num_cols, num_nonzeros; if (mm_read_mtx_crd_size(fid, &num_rows, &num_cols, &num_nonzeros) != 0) { info = MAGMA_ERR_UNKNOWN; goto cleanup; } A->storage_type = Magma_CSR; A->memory_location = Magma_CPU; A->num_rows = num_rows; A->num_cols = num_cols; A->nnz = num_nonzeros; A->fill_mode = MagmaFull; CHECK( magma_index_malloc_cpu( &coo_col, A->nnz ) ); CHECK( magma_index_malloc_cpu( &coo_row, A->nnz ) ); CHECK( magma_dmalloc_cpu( &coo_val, A->nnz ) ); if (mm_is_real(matcode) || mm_is_integer(matcode)) { for(magma_int_t i = 0; i < A->nnz; ++i) { magma_index_t ROW, COL; double VAL; // always read in a double and convert later if necessary fscanf(fid, " %d %d %lf \n", &ROW, &COL, &VAL); if ( VAL == 0 ) csr_compressor = 1; coo_row[i] = ROW - 1; coo_col[i] = COL - 1; coo_val[i] = MAGMA_D_MAKE( VAL, 0.); } } else if (mm_is_pattern(matcode) ) { for(magma_int_t i = 0; i < A->nnz; ++i) { magma_index_t ROW, COL; fscanf(fid, " %d %d \n", &ROW, &COL ); coo_row[i] = ROW - 1; coo_col[i] = COL - 1; coo_val[i] = MAGMA_D_MAKE( 1.0, 0.); } } else if (mm_is_real(matcode) ){ for(magma_int_t i = 0; i < A->nnz; ++i) { magma_index_t ROW, COL; double VAL, VALC; // always read in a double and convert later if necessary fscanf(fid, " %d %d %lf %lf\n", &ROW, &COL, &VAL, &VALC); coo_row[i] = ROW - 1; coo_col[i] = COL - 1; coo_val[i] = MAGMA_D_MAKE( VAL, VALC); } // printf(" ...successfully read real matrix... "); } else { printf("\n%% Unrecognized data type\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } fclose(fid); fid = NULL; printf(" done. Converting to CSR:"); fflush(stdout); A->sym = Magma_GENERAL; if( mm_is_symmetric(matcode) ) { symmetric = 1; } if ( mm_is_symmetric(matcode) || mm_is_symmetric(matcode) ) { // duplicate off diagonal entries printf("\n%% Detected symmetric case."); A->sym = Magma_SYMMETRIC; magma_index_t off_diagonals = 0; for(magma_int_t i = 0; i < A->nnz; ++i) { if (coo_row[i] != coo_col[i]) ++off_diagonals; } magma_index_t true_nonzeros = 2*off_diagonals + (A->nnz - off_diagonals); //printf("%% total number of nonzeros: %d\n%%", int(A->nnz)); CHECK( magma_index_malloc_cpu( &new_row, true_nonzeros )); CHECK( magma_index_malloc_cpu( &new_col, true_nonzeros )); CHECK( magma_dmalloc_cpu( &new_val, true_nonzeros )); magma_index_t ptr = 0; for(magma_int_t i = 0; i < A->nnz; ++i) { if (coo_row[i] != coo_col[i]) { new_row[ptr] = coo_row[i]; new_col[ptr] = coo_col[i]; new_val[ptr] = coo_val[i]; ptr++; new_col[ptr] = coo_row[i]; new_row[ptr] = coo_col[i]; new_val[ptr] = (symmetric == 0) ? coo_val[i] : conj(coo_val[i]); ptr++; } else { new_row[ptr] = coo_row[i]; new_col[ptr] = coo_col[i]; new_val[ptr] = coo_val[i]; ptr++; } } magma_free_cpu(coo_row); magma_free_cpu(coo_col); magma_free_cpu(coo_val); coo_row = new_row; coo_col = new_col; coo_val = new_val; A->nnz = true_nonzeros; //printf("total number of nonzeros: %d\n", A->nnz); } // end symmetric case CHECK( magma_dmalloc_cpu( &A->val, A->nnz )); CHECK( magma_index_malloc_cpu( &A->col, A->nnz )); CHECK( magma_index_malloc_cpu( &A->row, A->num_rows+1 )); // original code from Nathan Bell and Michael Garland for (magma_index_t i = 0; i < num_rows; i++) (A->row)[i] = 0; for (magma_index_t i = 0; i < A->nnz; i++) (A->row)[coo_row[i]]++; // cumulative sum the nnz per row to get row[] magma_int_t cumsum; cumsum = 0; for(magma_int_t i = 0; i < num_rows; i++) { magma_index_t temp = (A->row)[i]; (A->row)[i] = cumsum; cumsum += temp; } (A->row)[num_rows] = A->nnz; // write Aj,Ax into Bj,Bx for(magma_int_t i = 0; i < A->nnz; i++) { magma_index_t row_ = coo_row[i]; magma_index_t dest = (A->row)[row_]; (A->col)[dest] = coo_col[i]; (A->val)[dest] = coo_val[i]; (A->row)[row_]++; } magma_free_cpu(coo_row); magma_free_cpu(coo_col); magma_free_cpu(coo_val); coo_row = NULL; coo_col = NULL; coo_val = NULL; int last; last = 0; for(int i = 0; i <= num_rows; i++) { int temp = (A->row)[i]; (A->row)[i] = last; last = temp; } (A->row)[A->num_rows] = A->nnz; // sort column indices within each row // copy into vector of pairs (column index, value), sort by column index, then copy back for (magma_index_t k=0; k < A->num_rows; ++k) { int kk = (A->row)[k]; int len = (A->row)[k+1] - (A->row)[k]; rowval.resize( len ); for( int i=0; i < len; ++i ) { rowval[i] = std::make_pair( (A->col)[kk+i], (A->val)[kk+i] ); } std::sort( rowval.begin(), rowval.end(), compare_first ); for( int i=0; i < len; ++i ) { (A->col)[kk+i] = rowval[i].first; (A->val)[kk+i] = rowval[i].second; } } if ( csr_compressor > 0) { // run the CSR compressor to remove zeros //printf("removing zeros: "); CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue )); CHECK( magma_d_csr_compressor( &(A->val), &(A->row), &(A->col), &B.val, &B.row, &B.col, &B.num_rows, queue )); B.nnz = B.row[num_rows]; //printf(" remaining nonzeros:%d ", B.nnz); magma_free_cpu( A->val ); magma_free_cpu( A->row ); magma_free_cpu( A->col ); CHECK( magma_dmtransfer( B, A, Magma_CPU, Magma_CPU, queue )); //printf("done.\n"); } A->true_nnz = A->nnz; printf(" done.\n"); cleanup: if ( fid != NULL ) { fclose( fid ); fid = NULL; } magma_dmfree( &B, queue ); magma_free_cpu(coo_row); magma_free_cpu(coo_col); magma_free_cpu(coo_val); return info; }
extern "C" magma_int_t magma_ccsrsplit( magma_int_t bsize, magma_c_sparse_matrix A, magma_c_sparse_matrix *D, magma_c_sparse_matrix *R, magma_queue_t queue ) { if ( A.memory_location == Magma_CPU && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) ) { magma_int_t i, k, j, nnz_diag, nnz_offd; magma_int_t stat_cpu = 0, stat_dev = 0; D->val = NULL; D->col = NULL; D->row = NULL; D->rowidx = NULL; D->blockinfo = NULL; D->diag = NULL; D->dval = NULL; D->dcol = NULL; D->drow = NULL; D->drowidx = NULL; D->ddiag = NULL; R->val = NULL; R->col = NULL; R->row = NULL; R->rowidx = NULL; R->blockinfo = NULL; R->diag = NULL; R->dval = NULL; R->dcol = NULL; R->drow = NULL; R->drowidx = NULL; R->ddiag = NULL; nnz_diag = nnz_offd = 0; // Count the new number of nonzeroes in the two matrices for( i=0; i<A.num_rows; i+=bsize ) for( k=i; k<min(A.num_rows,i+bsize); k++ ) for( j=A.row[k]; j<A.row[k+1]; j++ ) if ( A.col[j] < i ) nnz_offd++; else if ( A.col[j] < i+bsize ) nnz_diag++; else nnz_offd++; // Allocate memory for the new matrices D->storage_type = Magma_CSRD; D->memory_location = A.memory_location; D->num_rows = A.num_rows; D->num_cols = A.num_cols; D->nnz = nnz_diag; R->storage_type = Magma_CSR; R->memory_location = A.memory_location; R->num_rows = A.num_rows; R->num_cols = A.num_cols; R->nnz = nnz_offd; stat_cpu += magma_cmalloc_cpu( &D->val, nnz_diag ); stat_cpu += magma_index_malloc_cpu( &D->row, A.num_rows+1 ); stat_cpu += magma_index_malloc_cpu( &D->col, nnz_diag ); stat_cpu += magma_cmalloc_cpu( &R->val, nnz_offd ); stat_cpu += magma_index_malloc_cpu( &R->row, A.num_rows+1 ); stat_cpu += magma_index_malloc_cpu( &R->col, nnz_offd ); if( stat_cpu != 0 ){ magma_c_mfree( D, queue ); magma_c_mfree( R, queue ); return MAGMA_ERR_HOST_ALLOC; } // Fill up the new sparse matrices D->row[0] = 0; R->row[0] = 0; nnz_offd = nnz_diag = 0; for( i=0; i<A.num_rows; i+=bsize) { for( k=i; k<min(A.num_rows,i+bsize); k++ ) { D->row[k+1] = D->row[k]; R->row[k+1] = R->row[k]; for( j=A.row[k]; j<A.row[k+1]; j++ ) { if ( A.col[j] < i ) { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } else if ( A.col[j] < i+bsize ) { // larger than diagonal remain as before if ( A.col[j]>k ) { D->val[nnz_diag] = A.val[ j ]; D->col[nnz_diag] = A.col[ j ]; D->row[k+1]++; } // diagonal is written first else if ( A.col[j]==k ) { D->val[D->row[k]] = A.val[ j ]; D->col[D->row[k]] = A.col[ j ]; D->row[k+1]++; } // smaller than diagonal are shifted one to the right // to have room for the diagonal else { D->val[nnz_diag+1] = A.val[ j ]; D->col[nnz_diag+1] = A.col[ j ]; D->row[k+1]++; } nnz_diag++; } else { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } } } } return MAGMA_SUCCESS; } else { magma_c_sparse_matrix Ah, ACSR, DCSR, RCSR, Dh, Rh; magma_c_mtransfer( A, &Ah, A.memory_location, Magma_CPU, queue ); magma_c_mconvert( Ah, &ACSR, A.storage_type, Magma_CSR, queue ); magma_ccsrsplit( bsize, ACSR, &DCSR, &RCSR, queue ); magma_c_mconvert( DCSR, &Dh, Magma_CSR, A.storage_type, queue ); magma_c_mconvert( RCSR, &Rh, Magma_CSR, A.storage_type, queue ); magma_c_mtransfer( Dh, D, Magma_CPU, A.memory_location, queue ); magma_c_mtransfer( Rh, R, Magma_CPU, A.memory_location, queue ); magma_c_mfree( &Ah, queue ); magma_c_mfree( &ACSR, queue ); magma_c_mfree( &Dh, queue ); magma_c_mfree( &DCSR, queue ); magma_c_mfree( &Rh, queue ); magma_c_mfree( &RCSR, queue ); return MAGMA_SUCCESS; } }
extern "C" magma_int_t magma_dmtransfer( magma_d_matrix A, magma_d_matrix *B, magma_location_t src, magma_location_t dst, magma_queue_t queue ) { magma_int_t info = 0; B->val = NULL; B->diag = NULL; B->row = NULL; B->rowidx = NULL; B->col = NULL; B->blockinfo = NULL; B->dval = NULL; B->ddiag = NULL; B->drow = NULL; B->drowidx = NULL; B->dcol = NULL; B->diag = NULL; B->ddiag = NULL; B->list = NULL; B->dlist = NULL; // first case: copy matrix from host to device if ( src == Magma_CPU && dst == Magma_DEV ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.nnz, A.rowidx, 1, B->drowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.nnz, A.rowidx, 1, B->drowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dsetvector( A.num_rows * A.max_nnz_row, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * A.max_nnz_row, A.col, 1, B->dcol, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dsetvector( A.num_rows * A.max_nnz_row, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * A.max_nnz_row, A.col, 1, B->dcol, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->drow, A.num_rows )); // data transfer magma_dsetvector( A.num_rows * rowlength, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * rowlength, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.num_rows, A.row, 1, B->drow, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.numblocks + 1 )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.numblocks + 1, A.row, 1, B->drow, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc( &B->dval, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc( &B->drow, r_blocks + 1 )); CHECK( magma_index_malloc( &B->dcol, A.numblocks )); // data transfer magma_dsetvector( size_b * size_b * A.numblocks, A.val, 1, B->dval, 1, queue ); magma_index_setvector( r_blocks + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.numblocks, A.col, 1, B->dcol, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.num_cols )); // data transfer magma_dsetvector( A.num_rows * A.num_cols, A.val, 1, B->dval, 1, queue ); } } // second case: copy matrix from host to host else if ( src == Magma_CPU && dst == Magma_CPU ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.num_rows+1; i++ ) { B->row[i] = A.row[i]; } } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; B->rowidx[i] = A.rowidx[i]; } } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; B->rowidx[i] = A.rowidx[i]; } for( magma_int_t i=0; i<A.num_rows+1; i++ ) { B->row[i] = A.row[i]; } } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.max_nnz_row; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.max_nnz_row; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc_cpu( &B->val, rowlength * A.num_rows )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows )); CHECK( magma_index_malloc_cpu( &B->col, rowlength * A.num_rows )); // data transfer for( magma_int_t i=0; i<A.num_rows*rowlength; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.num_rows; i++ ) { B->row[i] = A.row[i]; } } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; B->numblocks = A.numblocks; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.numblocks + 1 )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.numblocks+1; i++ ) { B->row[i] = A.row[i]; } } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc_cpu( &B->val, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc_cpu( &B->row, r_blocks + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.numblocks )); // data transfer //magma_dsetvector( size_b * size_b * A.numblocks, A.val, 1, B->dval, 1, queue ); for( magma_int_t i=0; i<size_b*size_b*A.numblocks; i++ ) { B->dval[i] = A.val[i]; } //magma_index_setvector( r_blocks + 1, A.row, 1, B->drow, 1, queue ); for( magma_int_t i=0; i<r_blocks+1; i++ ) { B->drow[i] = A.row[i]; } //magma_index_setvector( A.numblocks, A.col, 1, B->dcol, 1, queue ); for( magma_int_t i=0; i<A.numblocks; i++ ) { B->dcol[i] = A.col[i]; } } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.num_cols )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.num_cols; i++ ) { B->val[i] = A.val[i]; } } } // third case: copy matrix from device to host else if ( src == Magma_DEV && dst == Magma_CPU ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.nnz, A.drowidx, 1, B->rowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.nnz, A.drowidx, 1, B->rowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer magma_dgetvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->col, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer magma_dgetvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->col, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc_cpu( &B->val, rowlength * A.num_rows )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows )); CHECK( magma_index_malloc_cpu( &B->col, rowlength * A.num_rows )); // data transfer magma_dgetvector( A.num_rows * rowlength, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * rowlength, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.num_rows, A.drow, 1, B->row, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.numblocks + 1 )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.numblocks + 1, A.drow, 1, B->row, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc_cpu( &B->val, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc_cpu( &B->row, r_blocks + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.numblocks )); // data transfer magma_dgetvector( size_b * size_b * A.numblocks, A.dval, 1, B->val, 1, queue ); magma_index_getvector( r_blocks + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.numblocks, A.dcol, 1, B->col, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.num_cols )); // data transfer magma_dgetvector( A.num_rows * A.num_cols, A.dval, 1, B->val, 1, queue ); } } // fourth case: copy matrix from device to device else if ( src == Magma_DEV && dst == Magma_DEV ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.nnz, A.drowidx, 1, B->drowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.nnz, A.drowidx, 1, B->drowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dcopyvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->dcol, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dcopyvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->dcol, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->drow, A.num_rows )); // data transfer magma_dcopyvector( A.num_rows * rowlength, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * rowlength, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.num_rows, A.drow, 1, B->drow, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.numblocks + 1 )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.numblocks + 1, A.drow, 1, B->drow, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc( &B->dval, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc( &B->drow, r_blocks + 1 )); CHECK( magma_index_malloc( &B->dcol, A.numblocks )); // data transfer magma_dcopyvector( size_b * size_b * A.numblocks, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( r_blocks + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.numblocks, A.dcol, 1, B->dcol, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.num_cols )); // data transfer magma_dcopyvector( A.num_rows * A.num_cols, A.dval, 1, B->dval, 1, queue ); } } cleanup: if( info != 0 ){ magma_dmfree( B, queue ); } return info; }
/* //////////////////////////////////////////////////////////////////////////// -- Debugging file */ int main( int argc, char** argv) { TESTING_INIT(); magma_z_solver_par solver_par; magma_z_preconditioner precond_par; solver_par.epsilon = 10e-16; solver_par.maxiter = 1000; solver_par.verbose = 0; solver_par.restart = 30; solver_par.num_eigenvalues = 0; solver_par.ortho = Magma_CGS; magmaDoubleComplex one = MAGMA_Z_MAKE(1.0, 0.0); magmaDoubleComplex zero = MAGMA_Z_MAKE(0.0, 0.0); magma_z_sparse_matrix A, B, B_d; magma_z_vector x, b; // generate matrix of desired structure and size magma_int_t n=100; // size is n*n magma_int_t nn = n*n; magma_int_t offdiags = 2; magma_index_t *diag_offset; magmaDoubleComplex *diag_vals; magma_zmalloc_cpu( &diag_vals, offdiags+1 ); magma_index_malloc_cpu( &diag_offset, offdiags+1 ); diag_offset[0] = 0; diag_offset[1] = 1; diag_offset[2] = n; diag_vals[0] = MAGMA_Z_MAKE( 4.1, 0.0 ); diag_vals[1] = MAGMA_Z_MAKE( -1.0, 0.0 ); diag_vals[2] = MAGMA_Z_MAKE( -1.0, 0.0 ); magma_zmgenerator( nn, offdiags, diag_offset, diag_vals, &A ); // convert marix into desired format B.storage_type = Magma_SELLC; B.blocksize = 8; B.alignment = 8; // scale matrix magma_zmscale( &A, Magma_UNITDIAG ); magma_z_mconvert( A, &B, Magma_CSR, B.storage_type ); magma_z_mtransfer( B, &B_d, Magma_CPU, Magma_DEV ); // test CG #################################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // solver magma_zcg_res( B_d, b, &x, &solver_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PCG Jacobi ############################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_JACOBI; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpcg( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PCG IC ################################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ICC; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpcg( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PCG IC ################################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ICC; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpcg( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test BICGSTAB #################################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // solver magma_zbicgstab( B_d, b, &x, &solver_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PBICGSTAB Jacobi ############################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_JACOBI; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); /* // test PBICGSTAB ILU ############################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ILU; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PBICGSTAB ILU ############################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x);printf("here\n"); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ILU; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpbicgstab( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test GMRES #################################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // solver magma_zgmres( B_d, b, &x, &solver_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); // test PGMRES Jacobi ############################ // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_JACOBI; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpgmres( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b);*/ // test PGMRES ILU ############################### // vectors and initial guess magma_z_vinit( &b, Magma_DEV, A.num_cols, one ); magma_z_vinit( &x, Magma_DEV, A.num_cols, one ); magma_z_spmv( one, B_d, x, zero, b ); // b = A x magma_z_vfree(&x); magma_z_vinit( &x, Magma_DEV, A.num_cols, zero ); magma_zsolverinfo_init( &solver_par, &precond_par ); // Preconditioner precond_par.solver = Magma_ILU; magma_z_precondsetup( B_d, b, &precond_par ); // solver magma_zpgmres( B_d, b, &x, &solver_par, &precond_par ); // solverinfo magma_zsolverinfo( &solver_par, &precond_par ); if( solver_par.numiter > 150 ){ printf("error: test not passed!\n"); exit(-1); } magma_zsolverinfo_free( &solver_par, &precond_par ); magma_z_vfree(&x); magma_z_vfree(&b); printf("all tests passed.\n"); magma_z_mfree(&B_d); magma_z_mfree(&B); magma_z_mfree(&A); TESTING_FINALIZE(); return 0; }
extern "C" magma_int_t c_transpose_csr( magma_int_t n_rows, magma_int_t n_cols, magma_int_t nnz, magmaFloatComplex *val, magma_index_t *row, magma_index_t *col, magma_int_t *new_n_rows, magma_int_t *new_n_cols, magma_int_t *new_nnz, magmaFloatComplex **new_val, magma_index_t **new_row, magma_index_t **new_col, magma_queue_t queue ) { nnz = row[n_rows]; *new_n_rows = n_cols; *new_n_cols = n_rows; *new_nnz = nnz; magmaFloatComplex ** valtemp; magma_index_t ** coltemp; valtemp = (magmaFloatComplex**)malloc((n_rows)*sizeof(magmaFloatComplex*)); coltemp =(magma_index_t**)malloc((n_rows)*sizeof(magma_index_t*)); // temporary 2-dimensional arrays valtemp/coltemp // where val[i] is the array with the values of the i-th column of the matrix magma_index_t *nnztemp; magma_index_malloc_cpu( &nnztemp, n_rows ); for( magma_int_t i=0; i<n_rows; i++ ) nnztemp[i]=0; for( magma_int_t i=0; i<nnz; i++ ) nnztemp[col[i]]++; for( magma_int_t i=0; i<n_rows; i++ ){ valtemp[i] = (magmaFloatComplex*)malloc((nnztemp[i])*sizeof(magmaFloatComplex)); coltemp[i] = (magma_index_t*)malloc(nnztemp[i]*sizeof(magma_index_t)); } for( magma_int_t i=0; i<n_rows; i++ ) nnztemp[i]=0; for( magma_int_t j=0; j<n_rows; j++ ){ for( magma_int_t i=row[j]; i<row[j+1]; i++ ){ valtemp[col[i]][nnztemp[col[i]]]=val[i]; coltemp[col[i]][nnztemp[col[i]]]=j; nnztemp[col[i]]++; } } //csr structure for transposed matrix *new_val = new magmaFloatComplex[nnz]; *new_row = new magma_index_t[n_rows+1]; *new_col = new magma_index_t[nnz]; //fill the transposed csr structure magma_int_t nnztmp=0; (*new_row)[0]=0; for( magma_int_t j=0; j<n_rows; j++ ){ for( magma_int_t i=0; i<nnztemp[j]; i++ ){ (*new_val)[nnztmp]=valtemp[j][i]; (*new_col)[nnztmp]=coltemp[j][i]; nnztmp++; } (*new_row)[j+1]=nnztmp; } //usually the temporary memory should be freed afterwards //however, it does not work /* for( magma_int_t j=0; j<n_rows; j++ ){ free(valtemp[j]); free(coltemp[j]); } free(valtemp);free(coltemp); printf("check9\n"); fflush(stdout); */ magma_free_cpu( nnztemp ); return MAGMA_SUCCESS; }
magma_int_t magma_dsymbilu( magma_d_matrix *A, magma_int_t levels, magma_d_matrix *L, magma_d_matrix *U, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix A_copy={Magma_CSR}, B={Magma_CSR}; magma_d_matrix hA={Magma_CSR}, CSRCOOA={Magma_CSR}; if( A->memory_location == Magma_CPU && A->storage_type == Magma_CSR ){ CHECK( magma_dmtransfer( *A, &A_copy, Magma_CPU, Magma_CPU, queue )); CHECK( magma_dmtransfer( *A, &B, Magma_CPU, Magma_CPU, queue )); // possibility to scale to unit diagonal //magma_dmscale( &B, Magma_UNITDIAG ); CHECK( magma_dmconvert( B, L, Magma_CSR, Magma_CSR , queue)); CHECK( magma_dmconvert( B, U, Magma_CSR, Magma_CSR, queue )); magma_int_t num_lnnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz; magma_int_t num_unnz = (levels > 0 ) ? B.nnz/2*(2*levels+50) : B.nnz; magma_free_cpu( L->col ); magma_free_cpu( U->col ); CHECK( magma_index_malloc_cpu( &L->col, num_lnnz )); CHECK( magma_index_malloc_cpu( &U->col, num_unnz )); magma_dsymbolic_ilu( levels, A->num_rows, &num_lnnz, &num_unnz, B.row, B.col, L->row, L->col, U->row, U->col ); L->nnz = num_lnnz; U->nnz = num_unnz; magma_free_cpu( L->val ); magma_free_cpu( U->val ); CHECK( magma_dmalloc_cpu( &L->val, L->nnz )); CHECK( magma_dmalloc_cpu( &U->val, U->nnz )); for( magma_int_t i=0; i<L->nnz; i++ ) L->val[i] = MAGMA_D_MAKE( 0.0, 0.0 ); for( magma_int_t i=0; i<U->nnz; i++ ) U->val[i] = MAGMA_D_MAKE( 0.0, 0.0 ); // take the original values (scaled) as initial guess for L for(magma_int_t i=0; i<L->num_rows; i++){ for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){ magma_index_t lcol = B.col[j]; for(magma_int_t k=L->row[i]; k<L->row[i+1]; k++){ if( L->col[k] == lcol ){ L->val[k] = B.val[j]; } } } } // take the original values (scaled) as initial guess for U for(magma_int_t i=0; i<U->num_rows; i++){ for(magma_int_t j=B.row[i]; j<B.row[i+1]; j++){ magma_index_t lcol = B.col[j]; for(magma_int_t k=U->row[i]; k<U->row[i+1]; k++){ if( U->col[k] == lcol ){ U->val[k] = B.val[j]; } } } } magma_dmfree( &B, queue ); // fill A with the new structure; magma_free_cpu( A->col ); magma_free_cpu( A->val ); CHECK( magma_index_malloc_cpu( &A->col, L->nnz+U->nnz )); CHECK( magma_dmalloc_cpu( &A->val, L->nnz+U->nnz )); A->nnz = L->nnz+U->nnz; magma_int_t z = 0; for(magma_int_t i=0; i<A->num_rows; i++){ A->row[i] = z; for(magma_int_t j=L->row[i]; j<L->row[i+1]; j++){ A->col[z] = L->col[j]; A->val[z] = L->val[j]; z++; } for(magma_int_t j=U->row[i]; j<U->row[i+1]; j++){ A->col[z] = U->col[j]; A->val[z] = U->val[j]; z++; } } A->row[A->num_rows] = z; // reset the values of A to the original entries for(magma_int_t i=0; i<A->num_rows; i++){ for(magma_int_t j=A_copy.row[i]; j<A_copy.row[i+1]; j++){ magma_index_t lcol = A_copy.col[j]; for(magma_int_t k=A->row[i]; k<A->row[i+1]; k++){ if( A->col[k] == lcol ){ A->val[k] = A_copy.val[j]; } } } } } else { magma_storage_t A_storage = A->storage_type; magma_location_t A_location = A->memory_location; CHECK( magma_dmtransfer( *A, &hA, A->memory_location, Magma_CPU, queue )); CHECK( magma_dmconvert( hA, &CSRCOOA, hA.storage_type, Magma_CSR, queue )); CHECK( magma_dsymbilu( &CSRCOOA, levels, L, U, queue )); magma_dmfree( &hA, queue ); magma_dmfree( A, queue ); CHECK( magma_dmconvert( CSRCOOA, &hA, Magma_CSR, A_storage, queue )); CHECK( magma_dmtransfer( hA, A, Magma_CPU, A_location, queue )); } cleanup: if( info != 0 ){ magma_dmfree( L, queue ); magma_dmfree( U, queue ); } magma_dmfree( &A_copy, queue ); magma_dmfree( &B, queue ); magma_dmfree( &hA, queue ); magma_dmfree( &CSRCOOA, queue ); return info; }
extern "C" magma_int_t c_transpose_csr( magma_int_t n_rows, magma_int_t n_cols, magma_int_t nnz, magmaFloatComplex *values, magma_index_t *rowptr, magma_index_t *colind, magma_int_t *new_n_rows, magma_int_t *new_n_cols, magma_int_t *new_nnz, magmaFloatComplex **new_values, magma_index_t **new_rowptr, magma_index_t **new_colind, magma_queue_t queue ) { magma_int_t info = 0; // easier to keep names straight if we convert CSR to CSC, // which is the same as tranposing CSR. magmaFloatComplex *csc_values=NULL; magma_index_t *csc_colptr=NULL, *csc_rowind=NULL; // i, j are actual row & col indices (0 <= i < nrows, 0 <= j < ncols). // k is index into col and values (0 <= k < nnz). magma_int_t i, j, k, total, tmp; CHECK( magma_cmalloc_cpu( &csc_values, nnz ) ); CHECK( magma_index_malloc_cpu( &csc_colptr, n_cols + 1 ) ); CHECK( magma_index_malloc_cpu( &csc_rowind, nnz ) ); // example matrix // [ x x 0 x ] // [ x 0 x x ] // [ x x 0 0 ] // rowptr = [ 0 3 6, 8 ] // colind = [ 0 1 3; 0 2 3; 0 1 ] // sum up nnz in each original column // colptr = [ 3 2 1 2, X ] for( j=0; j < n_cols; ++j ) { csc_colptr[ j ] = 0; } for( k=0; k < nnz; ++k ) { csc_colptr[ colind[k] ]++; } // running sum to convert to new colptr // colptr = [ 0 3 5 6, 8 ] total = 0; for( j=0; j < n_cols; ++j ) { tmp = csc_colptr[ j ]; csc_colptr[ j ] = total; total += tmp; } csc_colptr[ n_cols ] = total; assert( total == nnz ); // copy row indices and values // this increments colptr until it effectively shifts left one // colptr = [ 3 5 6 8, 8 ] // rowind = [ 0 1 2; 0 2; 1; 0 1 ] for( i=0; i < n_rows; ++i ) { for( k=rowptr[i]; k < rowptr[i+1]; ++k ) { j = colind[k]; csc_rowind[ csc_colptr[ j ] ] = i; csc_values[ csc_colptr[ j ] ] = values[k]; csc_colptr[ j ]++; } } assert( csc_colptr[ n_cols-1 ] == nnz ); // shift colptr right one // colptr = [ 0 3 5 6, 8 ] for( j=n_cols-1; j > 0; --j ) { csc_colptr[j] = csc_colptr[j-1]; } csc_colptr[0] = 0; // save into output variables *new_n_rows = n_cols; *new_n_cols = n_rows; *new_nnz = nnz; *new_values = csc_values; *new_rowptr = csc_colptr; *new_colind = csc_rowind; cleanup: magma_free_cpu( csc_values ); magma_free_cpu( csc_colptr ); magma_free_cpu( csc_rowind ); return info; }
extern "C" magma_int_t magma_dmlumerge( magma_d_sparse_matrix L, magma_d_sparse_matrix U, magma_d_sparse_matrix *A, magma_queue_t queue ){ if( L.storage_type == Magma_CSR && U.storage_type == Magma_CSR ){ if( L.memory_location == Magma_CPU && U.memory_location == Magma_CPU ){ magma_d_mtransfer( L, A, Magma_CPU, Magma_CPU, queue ); magma_free_cpu( A->col ); magma_free_cpu( A->val ); // make sure it is strictly lower triangular magma_int_t z = 0; for(magma_int_t i=0; i<A->num_rows; i++){ for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){ if( L.col[j] < i ){// make sure it is strictly lower triangular z++; } } for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){ z++; } } A->nnz = z; // fill A with the new structure; magma_int_t stat_cpu = 0; stat_cpu += magma_index_malloc_cpu( &A->col, A->nnz ); stat_cpu += magma_dmalloc_cpu( &A->val, A->nnz ); if( stat_cpu != 0 ){ magma_d_mfree( A, queue ); printf("error: memory allocation.\n"); return MAGMA_ERR_HOST_ALLOC; } z = 0; for(magma_int_t i=0; i<A->num_rows; i++){ A->row[i] = z; for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){ if( L.col[j] < i ){// make sure it is strictly lower triangular A->col[z] = L.col[j]; A->val[z] = L.val[j]; z++; } } for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){ A->col[z] = U.col[j]; A->val[z] = U.val[j]; z++; } } A->row[A->num_rows] = z; A->nnz = z; return MAGMA_SUCCESS; } else{ printf("error: matrix not on CPU.\n"); return MAGMA_SUCCESS; } } else{ printf("error: matrix not on CPU.\n"); return MAGMA_SUCCESS; } }
magma_int_t magma_cm_27stencil( magma_int_t n, magma_c_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i,j,k; magma_c_matrix hA={Magma_CSR}; // generate matrix of desired structure and size (3d 27-point stencil) magma_int_t nn = n*n*n; magma_int_t offdiags = 13; magma_index_t *diag_offset=NULL; magmaFloatComplex *diag_vals=NULL; CHECK( magma_cmalloc_cpu( &diag_vals, offdiags+1 )); CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 )); diag_offset[0] = 0; diag_offset[1] = 1; diag_offset[2] = n-1; diag_offset[3] = n; diag_offset[4] = n+1; diag_offset[5] = n*n-n-1; diag_offset[6] = n*n-n; diag_offset[7] = n*n-n+1; diag_offset[8] = n*n-1; diag_offset[9] = n*n; diag_offset[10] = n*n+1; diag_offset[11] = n*n+n-1; diag_offset[12] = n*n+n; diag_offset[13] = n*n+n+1; diag_vals[0] = MAGMA_C_MAKE( 26.0, 0.0 ); diag_vals[1] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[2] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[3] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[4] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[5] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[6] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[7] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[8] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[9] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[10] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[11] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[12] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[13] = MAGMA_C_MAKE( -1.0, 0.0 ); CHECK( magma_cmgenerator( nn, offdiags, diag_offset, diag_vals, &hA, queue )); // now set some entries to zero (boundary...) for( i=0; i < n*n; i++ ) { for( j=0; j < n; j++ ) { magma_index_t row = i*n+j; for( k=hA.row[row]; k<hA.row[row+1]; k++) { if ((hA.col[k] == row-1 || hA.col[k] == row-n-1 || hA.col[k] == row+n-1 || hA.col[k] == row-n*n+n-1 || hA.col[k] == row+n*n-n-1 || hA.col[k] == row-n*n-1 || hA.col[k] == row+n*n-1 || hA.col[k] == row-n*n-n-1 || hA.col[k] == row+n*n+n-1 ) && (row+1)%n == 1 ) hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 ); if ((hA.col[k] == row+1 || hA.col[k] == row-n+1 || hA.col[k] == row+n+1 || hA.col[k] == row-n*n+n+1 || hA.col[k] == row+n*n-n+1 || hA.col[k] == row-n*n+1 || hA.col[k] == row+n*n+1 || hA.col[k] == row-n*n-n+1 || hA.col[k] == row+n*n+n+1 ) && (row)%n ==n-1 ) hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 ); } } } hA.true_nnz = hA.nnz; CHECK( magma_cmconvert( hA, A, Magma_CSR, Magma_CSR, queue )); cleanup: magma_free_cpu( diag_vals ); magma_free_cpu( diag_offset ); magma_cmfree( &hA, queue ); return info; }
extern "C" magma_int_t magma_smlumerge( magma_s_matrix L, magma_s_matrix U, magma_s_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; if( L.storage_type == Magma_CSR && U.storage_type == Magma_CSR ){ if( L.memory_location == Magma_CPU && U.memory_location == Magma_CPU ){ CHECK( magma_smtransfer( L, A, Magma_CPU, Magma_CPU, queue )); magma_free_cpu( A->col ); magma_free_cpu( A->val ); // make sure it is strictly lower triangular magma_int_t z = 0; for(magma_int_t i=0; i<A->num_rows; i++){ for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){ if( L.col[j] < i ){// make sure it is strictly lower triangular z++; } } for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){ z++; } } A->nnz = z; // fill A with the new structure; CHECK( magma_index_malloc_cpu( &A->col, A->nnz )); CHECK( magma_smalloc_cpu( &A->val, A->nnz )); z = 0; for(magma_int_t i=0; i<A->num_rows; i++){ A->row[i] = z; for(magma_int_t j=L.row[i]; j<L.row[i+1]; j++){ if( L.col[j] < i ){// make sure it is strictly lower triangular A->col[z] = L.col[j]; A->val[z] = L.val[j]; z++; } } for(magma_int_t j=U.row[i]; j<U.row[i+1]; j++){ A->col[z] = U.col[j]; A->val[z] = U.val[j]; z++; } } A->row[A->num_rows] = z; A->nnz = z; } else{ printf("error: matrix not on CPU.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } } else{ printf("error: matrix in wrong format.\n"); info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: if( info != 0 ){ magma_smfree( A, queue ); } return info; }
magma_int_t magma_cm_5stencil( magma_int_t n, magma_c_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i,j,k; magma_c_matrix hA={Magma_CSR}; // generate matrix of desired structure and size (2d 5-point stencil) magma_int_t nn = n*n; magma_int_t offdiags = 2; magma_index_t *diag_offset=NULL; magmaFloatComplex *diag_vals=NULL; CHECK( magma_cmalloc_cpu( &diag_vals, offdiags+1 )); CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 )); diag_offset[0] = 0; diag_offset[1] = 1; diag_offset[2] = n; #define COMPLEX #ifdef COMPLEX // complex case diag_vals[0] = MAGMA_C_MAKE( 4.0, 4.0 ); diag_vals[1] = MAGMA_C_MAKE( -1.0, -1.0 ); diag_vals[2] = MAGMA_C_MAKE( -1.0, -1.0 ); #else // real case diag_vals[0] = MAGMA_C_MAKE( 4.0, 0.0 ); diag_vals[1] = MAGMA_C_MAKE( -1.0, 0.0 ); diag_vals[2] = MAGMA_C_MAKE( -1.0, 0.0 ); #endif CHECK( magma_cmgenerator( nn, offdiags, diag_offset, diag_vals, &hA, queue )); // now set some entries to zero (boundary...) for( i=0; i<n; i++ ) { for( j=0; j<n; j++ ) { magma_index_t row = i*n+j; for( k=hA.row[row]; k<hA.row[row+1]; k++) { if ((hA.col[k] == row-1 ) && (row+1)%n == 1 ) hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 ); if ((hA.col[k] == row+1 ) && (row)%n ==n-1 ) hA.val[k] = MAGMA_C_MAKE( 0.0, 0.0 ); } } } CHECK( magma_cmconvert( hA, A, Magma_CSR, Magma_CSR, queue )); magma_cmcsrcompressor( A, queue ); A->true_nnz = A->nnz; cleanup: magma_free_cpu( diag_vals ); magma_free_cpu( diag_offset ); magma_cmfree( &hA, queue ); return info; }
extern "C" magma_int_t magma_ccsrsplit( magma_int_t bsize, magma_c_matrix A, magma_c_matrix *D, magma_c_matrix *R, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i, k, j, nnz_diag, nnz_offd; D->val = NULL; D->col = NULL; D->row = NULL; D->rowidx = NULL; D->blockinfo = NULL; D->diag = NULL; D->dval = NULL; D->dcol = NULL; D->drow = NULL; D->drowidx = NULL; D->ddiag = NULL; R->val = NULL; R->col = NULL; R->row = NULL; R->rowidx = NULL; R->blockinfo = NULL; R->diag = NULL; R->dval = NULL; R->dcol = NULL; R->drow = NULL; R->drowidx = NULL; R->ddiag = NULL; if ( A.memory_location == Magma_CPU && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) ) { nnz_diag = nnz_offd = 0; // Count the new number of nonzeroes in the two matrices for( i=0; i<A.num_rows; i+=bsize ){ for( k=i; k<min(A.num_rows,i+bsize); k++ ){ int check = 0; for( j=A.row[k]; j<A.row[k+1]; j++ ){ if ( A.col[j] < i ) nnz_offd++; else if ( A.col[j] < i+bsize ){ if( A.col[j] == k ){ check = 1; } nnz_diag++; } else nnz_offd++; } if( check == 0 ){ printf("error: matrix contains zero on diagonal at (%d,%d).\n", i, i); info = -1; goto cleanup; } } } // Allocate memory for the new matrices D->storage_type = Magma_CSRD; D->memory_location = A.memory_location; D->num_rows = A.num_rows; D->num_cols = A.num_cols; D->nnz = nnz_diag; R->storage_type = Magma_CSR; R->memory_location = A.memory_location; R->num_rows = A.num_rows; R->num_cols = A.num_cols; R->nnz = nnz_offd; CHECK( magma_cmalloc_cpu( &D->val, nnz_diag )); CHECK( magma_index_malloc_cpu( &D->row, A.num_rows+1 )); CHECK( magma_index_malloc_cpu( &D->col, nnz_diag )); CHECK( magma_cmalloc_cpu( &R->val, nnz_offd )); CHECK( magma_index_malloc_cpu( &R->row, A.num_rows+1 )); CHECK( magma_index_malloc_cpu( &R->col, nnz_offd )); // Fill up the new sparse matrices D->row[0] = 0; R->row[0] = 0; nnz_offd = nnz_diag = 0; for( i=0; i<A.num_rows; i+=bsize) { for( k=i; k<min(A.num_rows,i+bsize); k++ ) { D->row[k+1] = D->row[k]; R->row[k+1] = R->row[k]; for( j=A.row[k]; j<A.row[k+1]; j++ ) { if ( A.col[j] < i ) { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } else if ( A.col[j] < i+bsize ) { // larger than diagonal remain as before if ( A.col[j]>k ) { D->val[nnz_diag] = A.val[ j ]; D->col[nnz_diag] = A.col[ j ]; D->row[k+1]++; } // diagonal is written first else if ( A.col[j]==k ) { D->val[D->row[k]] = A.val[ j ]; D->col[D->row[k]] = A.col[ j ]; D->row[k+1]++; } // smaller than diagonal are shifted one to the right // to have room for the diagonal else { D->val[nnz_diag+1] = A.val[ j ]; D->col[nnz_diag+1] = A.col[ j ]; D->row[k+1]++; } nnz_diag++; } else { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } } } } } else { magma_c_matrix Ah={Magma_CSR}, ACSR={Magma_CSR}, DCSR={Magma_CSR}, RCSR={Magma_CSR}, Dh={Magma_CSR}, Rh={Magma_CSR}; CHECK( magma_cmtransfer( A, &Ah, A.memory_location, Magma_CPU, queue )); CHECK( magma_cmconvert( Ah, &ACSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_ccsrsplit( bsize, ACSR, &DCSR, &RCSR, queue )); CHECK( magma_cmconvert( DCSR, &Dh, Magma_CSR, A.storage_type, queue )); CHECK( magma_cmconvert( RCSR, &Rh, Magma_CSR, A.storage_type, queue )); CHECK( magma_cmtransfer( Dh, D, Magma_CPU, A.memory_location, queue )); CHECK( magma_cmtransfer( Rh, R, Magma_CPU, A.memory_location, queue )); magma_cmfree( &Ah, queue ); magma_cmfree( &ACSR, queue ); magma_cmfree( &Dh, queue ); magma_cmfree( &DCSR, queue ); magma_cmfree( &Rh, queue ); magma_cmfree( &RCSR, queue ); } cleanup: if( info != 0 ){ magma_cmfree( D, queue ); magma_cmfree( R, queue ); } return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_d_matrix Z={Magma_CSR}, A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_index_t *comm_i=NULL; double *comm_v=NULL; magma_int_t start, end; int i=1; CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_dm_5stencil( laplace_size, &Z, queue )); } else { // file-matrix test CHECK( magma_d_csr_mtx( &Z, argv[i], queue )); } printf("%% matrix info: %d-by-%d with %d nonzeros\n", int(Z.num_rows), int(Z.num_cols), int(Z.nnz) ); // slice matrix CHECK( magma_index_malloc_cpu( &comm_i, Z.num_rows ) ); CHECK( magma_dmalloc_cpu( &comm_v, Z.num_rows ) ); CHECK( magma_dmslice( 1, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 1, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 8, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); // scale matrix CHECK( magma_dmscale( &Z, zopts.scaling, queue )); // remove nonzeros in matrix CHECK( magma_dmcsrcompressor( &Z, queue )); // convert to be non-symmetric CHECK( magma_dmconvert( Z, &A, Magma_CSR, Magma_CSRL, queue )); // transpose CHECK( magma_dmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_dmconvert( AT, &B, Magma_CSR, zopts.output_format, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&B, queue ); CHECK( magma_dmcsrcompressor_gpu( &B_d, queue )); CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_dmfree(&B_d, queue ); CHECK( magma_dmconvert( B, &AT, zopts.output_format,Magma_CSR, queue )); magma_dmfree(&B, queue ); // transpose back CHECK( magma_dmtranspose( AT, &A2, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmdiff( A, A2, &res, queue)); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester: ok\n"); else printf("%% tester: failed\n"); magma_free_cpu( comm_i ); magma_free_cpu( comm_v ); comm_i=NULL; comm_v=NULL; magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&Z, queue ); i++; } cleanup: magma_free_cpu( comm_i ); magma_free_cpu( comm_v ); magma_dmfree(&AT, queue ); magma_dmfree(&A, queue ); magma_dmfree(&B, queue ); magma_dmfree(&B_d, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&Z, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }