magma_int_t magma_ccsrset_gpu( magma_int_t m, magma_int_t n, magmaIndex_ptr row, magmaIndex_ptr col, magmaFloatComplex_ptr val, magma_c_matrix *A, magma_queue_t queue ) { A->num_rows = m; A->num_cols = n; magma_index_t nnz; magma_index_getvector( 1, row+m, 1, &nnz, 1, queue ); A->nnz = (magma_int_t) nnz; A->storage_type = Magma_CSR; A->memory_location = Magma_DEV; A->dval = val; A->dcol = col; A->drow = row; return MAGMA_SUCCESS; }
extern "C" magma_int_t magma_zcuspaxpy( magmaDoubleComplex *alpha, magma_z_sparse_matrix A, magmaDoubleComplex *beta, magma_z_sparse_matrix B, magma_z_sparse_matrix *AB, magma_queue_t queue ) { if ( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ) { magma_z_sparse_matrix C; C.num_rows = A.num_rows; C.num_cols = A.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; magma_int_t stat_dev = 0; C.val = NULL; C.col = NULL; C.row = NULL; C.rowidx = NULL; C.blockinfo = NULL; C.diag = NULL; C.dval = NULL; C.dcol = NULL; C.drow = NULL; C.drowidx = NULL; C.ddiag = NULL; // CUSPARSE context // cusparseHandle_t handle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&handle); cusparseSetStream( handle, queue ); if (cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseMatDescr_t descrB; cusparseMatDescr_t descrC; cusparseStatus = cusparseCreateMatDescr(&descrA); cusparseStatus = cusparseCreateMatDescr(&descrB); cusparseStatus = cusparseCreateMatDescr(&descrC); if (cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL); if (cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO); if (cusparseStatus != 0) printf("error in IndexBase.\n"); // multiply A and B on the device magma_int_t baseC; // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); stat_dev += magma_index_malloc( &C.drow, (A.num_rows + 1) ); cusparseXcsrgeamNnz(handle,A.num_rows, A.num_cols, descrA, A.nnz, A.drow, A.dcol, descrB, B.nnz, B.drow, B.dcol, descrC, C.row, nnzTotalDevHostPtr); if (NULL != nnzTotalDevHostPtr) { C.nnz = *nnzTotalDevHostPtr; } else { // workaround as nnz and base C are magma_int_t magma_index_t base_t, nnz_t; magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1 ); magma_index_getvector( 1, C.drow, 1, &base_t, 1 ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } stat_dev += magma_index_malloc( &C.dcol, C.nnz ); stat_dev += magma_zmalloc( &C.dval, C.nnz ); if( stat_dev != 0 ) { magma_z_mfree( &C, queue ); return MAGMA_ERR_DEVICE_ALLOC; } cusparseZcsrgeam(handle, A.num_rows, A.num_cols, alpha, descrA, A.nnz, A.dval, A.drow, A.dcol, beta, descrB, B.nnz, B.dval, B.drow, B.dcol, descrC, C.dval, C.drow, C.dcol); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); // end CUSPARSE context // magma_z_mtransfer( C, AB, Magma_DEV, Magma_DEV, queue ); magma_z_mfree( &C, queue ); return MAGMA_SUCCESS; } else { printf("error: CSRSPAXPY only supported on device and CSR format.\n"); return MAGMA_SUCCESS; } }
extern "C" magma_int_t magma_dcuspmm( magma_d_sparse_matrix A, magma_d_sparse_matrix B, magma_d_sparse_matrix *AB ){ if( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ){ magma_d_sparse_matrix C; C.num_rows = A.num_rows; C.num_cols = A.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; // CUSPARSE context // cusparseHandle_t handle; cusparseStatus_t cusparseStatus; cusparseStatus = cusparseCreate(&handle); if(cusparseStatus != 0) printf("error in Handle.\n"); cusparseMatDescr_t descrA; cusparseMatDescr_t descrB; cusparseMatDescr_t descrC; cusparseStatus = cusparseCreateMatDescr(&descrA); cusparseStatus = cusparseCreateMatDescr(&descrB); cusparseStatus = cusparseCreateMatDescr(&descrC); if(cusparseStatus != 0) printf("error in MatrDescr.\n"); cusparseStatus = cusparseSetMatType(descrA,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrB,CUSPARSE_MATRIX_TYPE_GENERAL); cusparseSetMatType(descrC,CUSPARSE_MATRIX_TYPE_GENERAL); if(cusparseStatus != 0) printf("error in MatrType.\n"); cusparseStatus = cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrB,CUSPARSE_INDEX_BASE_ZERO); cusparseSetMatIndexBase(descrC,CUSPARSE_INDEX_BASE_ZERO); if(cusparseStatus != 0) printf("error in IndexBase.\n"); // multiply A and B on the device magma_int_t baseC; // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST); magma_index_malloc( &C.row, (A.num_rows + 1) ); cusparseXcsrgemmNnz(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_rows, A.num_rows, descrA, A.nnz, A.row, A.col, descrB, B.nnz, B.row, B.col, descrC, C.row, nnzTotalDevHostPtr ); if (NULL != nnzTotalDevHostPtr){ C.nnz = *nnzTotalDevHostPtr; }else{ // workaround as nnz and base C are magma_int_t magma_index_t base_t, nnz_t; magma_index_getvector( 1, C.row+C.num_rows, 1, &nnz_t, 1 ); magma_index_getvector( 1, C.row, 1, &base_t, 1 ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } magma_index_malloc( &C.col, C.nnz ); magma_dmalloc( &C.val, C.nnz ); cusparseDcsrgemm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, A.num_rows, A.num_rows, descrA, A.nnz, A.val, A.row, A.col, descrB, B.nnz, B.val, B.row, B.col, descrC, C.val, C.row, C.col); cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); // end CUSPARSE context // magma_d_mtransfer( C, AB, Magma_DEV, Magma_DEV ); magma_d_mfree( &C ); return MAGMA_SUCCESS; } else{ printf("error: CSRMM only supported on device and CSR format.\n"); return MAGMA_SUCCESS; } }
extern "C" magma_int_t magma_zcuspmm( magma_z_matrix A, magma_z_matrix B, magma_z_matrix *AB, magma_queue_t queue ) { magma_int_t info = 0; magma_z_matrix C={Magma_CSR}; C.num_rows = A.num_rows; C.num_cols = B.num_cols; C.storage_type = A.storage_type; C.memory_location = A.memory_location; C.fill_mode = MagmaFull; C.val = NULL; C.col = NULL; C.row = NULL; C.rowidx = NULL; C.blockinfo = NULL; C.diag = NULL; C.dval = NULL; C.dcol = NULL; C.drow = NULL; C.drowidx = NULL; C.ddiag = NULL; magma_index_t base_t, nnz_t, baseC; cusparseHandle_t handle=NULL; cusparseMatDescr_t descrA=NULL; cusparseMatDescr_t descrB=NULL; cusparseMatDescr_t descrC=NULL; if ( A.memory_location == Magma_DEV && B.memory_location == Magma_DEV && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) && ( B.storage_type == Magma_CSR || B.storage_type == Magma_CSRCOO ) ) { // CUSPARSE context / CHECK_CUSPARSE( cusparseCreate( &handle )); CHECK_CUSPARSE( cusparseSetStream( handle, queue->cuda_stream() )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrA )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrB )); CHECK_CUSPARSE( cusparseCreateMatDescr( &descrC )); CHECK_CUSPARSE( cusparseSetMatType( descrA, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatType( descrB, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatType( descrC, CUSPARSE_MATRIX_TYPE_GENERAL )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrA, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrB, CUSPARSE_INDEX_BASE_ZERO )); CHECK_CUSPARSE( cusparseSetMatIndexBase( descrC, CUSPARSE_INDEX_BASE_ZERO )); // nnzTotalDevHostPtr points to host memory magma_index_t *nnzTotalDevHostPtr = (magma_index_t*) &C.nnz; CHECK_CUSPARSE( cusparseSetPointerMode( handle, CUSPARSE_POINTER_MODE_HOST )); CHECK( magma_index_malloc( &C.drow, (A.num_rows + 1) )); CHECK_CUSPARSE( cusparseXcsrgemmNnz( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, B.num_cols, A.num_cols, descrA, A.nnz, A.drow, A.dcol, descrB, B.nnz, B.drow, B.dcol, descrC, C.drow, nnzTotalDevHostPtr )); if (NULL != nnzTotalDevHostPtr) { C.nnz = *nnzTotalDevHostPtr; } else { // workaround as nnz and base C are magma_int_t magma_index_getvector( 1, C.drow+C.num_rows, 1, &nnz_t, 1, queue ); magma_index_getvector( 1, C.drow, 1, &base_t, 1, queue ); C.nnz = (magma_int_t) nnz_t; baseC = (magma_int_t) base_t; C.nnz -= baseC; } CHECK( magma_index_malloc( &C.dcol, C.nnz )); CHECK( magma_zmalloc( &C.dval, C.nnz )); CHECK_CUSPARSE( cusparseZcsrgemm( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, A.num_rows, B.num_cols, A.num_cols, descrA, A.nnz, A.dval, A.drow, A.dcol, descrB, B.nnz, B.dval, B.drow, B.dcol, descrC, C.dval, C.drow, C.dcol )); // end CUSPARSE context // //magma_device_sync(); magma_queue_sync( queue ); CHECK( magma_zmtransfer( C, AB, Magma_DEV, Magma_DEV, queue )); } else { info = MAGMA_ERR_NOT_SUPPORTED; } cleanup: cusparseDestroyMatDescr( descrA ); cusparseDestroyMatDescr( descrB ); cusparseDestroyMatDescr( descrC ); cusparseDestroy( handle ); magma_zmfree( &C, queue ); return info; }
extern "C" magma_int_t magma_dmtransfer( magma_d_matrix A, magma_d_matrix *B, magma_location_t src, magma_location_t dst, magma_queue_t queue ) { magma_int_t info = 0; B->val = NULL; B->diag = NULL; B->row = NULL; B->rowidx = NULL; B->col = NULL; B->blockinfo = NULL; B->dval = NULL; B->ddiag = NULL; B->drow = NULL; B->drowidx = NULL; B->dcol = NULL; B->diag = NULL; B->ddiag = NULL; B->list = NULL; B->dlist = NULL; // first case: copy matrix from host to device if ( src == Magma_CPU && dst == Magma_DEV ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.nnz, A.rowidx, 1, B->drowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.nnz, A.rowidx, 1, B->drowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dsetvector( A.num_rows * A.max_nnz_row, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * A.max_nnz_row, A.col, 1, B->dcol, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dsetvector( A.num_rows * A.max_nnz_row, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * A.max_nnz_row, A.col, 1, B->dcol, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->drow, A.num_rows )); // data transfer magma_dsetvector( A.num_rows * rowlength, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.num_rows * rowlength, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.num_rows, A.row, 1, B->drow, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.numblocks + 1 )); // data transfer magma_dsetvector( A.nnz, A.val, 1, B->dval, 1, queue ); magma_index_setvector( A.nnz, A.col, 1, B->dcol, 1, queue ); magma_index_setvector( A.numblocks + 1, A.row, 1, B->drow, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc( &B->dval, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc( &B->drow, r_blocks + 1 )); CHECK( magma_index_malloc( &B->dcol, A.numblocks )); // data transfer magma_dsetvector( size_b * size_b * A.numblocks, A.val, 1, B->dval, 1, queue ); magma_index_setvector( r_blocks + 1, A.row, 1, B->drow, 1, queue ); magma_index_setvector( A.numblocks, A.col, 1, B->dcol, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.num_cols )); // data transfer magma_dsetvector( A.num_rows * A.num_cols, A.val, 1, B->dval, 1, queue ); } } // second case: copy matrix from host to host else if ( src == Magma_CPU && dst == Magma_CPU ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.num_rows+1; i++ ) { B->row[i] = A.row[i]; } } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; B->rowidx[i] = A.rowidx[i]; } } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; B->rowidx[i] = A.rowidx[i]; } for( magma_int_t i=0; i<A.num_rows+1; i++ ) { B->row[i] = A.row[i]; } } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.max_nnz_row; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.max_nnz_row; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc_cpu( &B->val, rowlength * A.num_rows )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows )); CHECK( magma_index_malloc_cpu( &B->col, rowlength * A.num_rows )); // data transfer for( magma_int_t i=0; i<A.num_rows*rowlength; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.num_rows; i++ ) { B->row[i] = A.row[i]; } } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; B->numblocks = A.numblocks; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.numblocks + 1 )); // data transfer for( magma_int_t i=0; i<A.nnz; i++ ) { B->val[i] = A.val[i]; B->col[i] = A.col[i]; } for( magma_int_t i=0; i<A.numblocks+1; i++ ) { B->row[i] = A.row[i]; } } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc_cpu( &B->val, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc_cpu( &B->row, r_blocks + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.numblocks )); // data transfer //magma_dsetvector( size_b * size_b * A.numblocks, A.val, 1, B->dval, 1, queue ); for( magma_int_t i=0; i<size_b*size_b*A.numblocks; i++ ) { B->dval[i] = A.val[i]; } //magma_index_setvector( r_blocks + 1, A.row, 1, B->drow, 1, queue ); for( magma_int_t i=0; i<r_blocks+1; i++ ) { B->drow[i] = A.row[i]; } //magma_index_setvector( A.numblocks, A.col, 1, B->dcol, 1, queue ); for( magma_int_t i=0; i<A.numblocks; i++ ) { B->dcol[i] = A.col[i]; } } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.num_cols )); // data transfer for( magma_int_t i=0; i<A.num_rows*A.num_cols; i++ ) { B->val[i] = A.val[i]; } } } // third case: copy matrix from device to host else if ( src == Magma_DEV && dst == Magma_CPU ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.nnz, A.drowidx, 1, B->rowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->rowidx, A.nnz )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.nnz, A.drowidx, 1, B->rowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer magma_dgetvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->col, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc_cpu( &B->col, A.num_rows * A.max_nnz_row )); // data transfer magma_dgetvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->col, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc_cpu( &B->val, rowlength * A.num_rows )); CHECK( magma_index_malloc_cpu( &B->row, A.num_rows )); CHECK( magma_index_malloc_cpu( &B->col, rowlength * A.num_rows )); // data transfer magma_dgetvector( A.num_rows * rowlength, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.num_rows * rowlength, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.num_rows, A.drow, 1, B->row, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.nnz )); CHECK( magma_index_malloc_cpu( &B->col, A.nnz )); CHECK( magma_index_malloc_cpu( &B->row, A.numblocks + 1 )); // data transfer magma_dgetvector( A.nnz, A.dval, 1, B->val, 1, queue ); magma_index_getvector( A.nnz, A.dcol, 1, B->col, 1, queue ); magma_index_getvector( A.numblocks + 1, A.drow, 1, B->row, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc_cpu( &B->val, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc_cpu( &B->row, r_blocks + 1 )); CHECK( magma_index_malloc_cpu( &B->col, A.numblocks )); // data transfer magma_dgetvector( size_b * size_b * A.numblocks, A.dval, 1, B->val, 1, queue ); magma_index_getvector( r_blocks + 1, A.drow, 1, B->row, 1, queue ); magma_index_getvector( A.numblocks, A.dcol, 1, B->col, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_CPU; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc_cpu( &B->val, A.num_rows * A.num_cols )); // data transfer magma_dgetvector( A.num_rows * A.num_cols, A.dval, 1, B->val, 1, queue ); } } // fourth case: copy matrix from device to device else if ( src == Magma_DEV && dst == Magma_DEV ) { //CSR-type if ( A.storage_type == Magma_CSR || A.storage_type == Magma_CUCSR || A.storage_type == Magma_CSC || A.storage_type == Magma_CSRD || A.storage_type == Magma_CSRL || A.storage_type == Magma_CSRU ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); } //COO-type else if ( A.storage_type == Magma_COO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.nnz, A.drowidx, 1, B->drowidx, 1, queue ); } //CSRCOO-type else if ( A.storage_type == Magma_CSRCOO ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.num_rows + 1 )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drowidx, A.nnz )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.nnz, A.drowidx, 1, B->drowidx, 1, queue ); } //ELL/ELLPACKT-type else if ( A.storage_type == Magma_ELLPACKT || A.storage_type == Magma_ELL ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dcopyvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->dcol, 1, queue ); } //ELLD-type else if ( A.storage_type == Magma_ELLD ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.max_nnz_row )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * A.max_nnz_row )); // data transfer magma_dcopyvector( A.num_rows * A.max_nnz_row, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * A.max_nnz_row, A.dcol, 1, B->dcol, 1, queue ); } //ELLRT-type else if ( A.storage_type == Magma_ELLRT ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->alignment = A.alignment; //int threads_per_row = A.alignment; //int rowlength = magma_roundup( A.max_nnz_row, threads_per_row ); magma_int_t rowlength = magma_roundup( A.max_nnz_row, A.alignment ); // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->dcol, A.num_rows * rowlength )); CHECK( magma_index_malloc( &B->drow, A.num_rows )); // data transfer magma_dcopyvector( A.num_rows * rowlength, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.num_rows * rowlength, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.num_rows, A.drow, 1, B->drow, 1, queue ); } //SELLP-type else if ( A.storage_type == Magma_SELLP ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; // memory allocation CHECK( magma_dmalloc( &B->dval, A.nnz )); CHECK( magma_index_malloc( &B->dcol, A.nnz )); CHECK( magma_index_malloc( &B->drow, A.numblocks + 1 )); // data transfer magma_dcopyvector( A.nnz, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( A.nnz, A.dcol, 1, B->dcol, 1, queue ); magma_index_copyvector( A.numblocks + 1, A.drow, 1, B->drow, 1, queue ); } //BCSR-type else if ( A.storage_type == Magma_BCSR ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->blocksize = A.blocksize; B->numblocks = A.numblocks; B->alignment = A.alignment; magma_int_t size_b = A.blocksize; //magma_int_t c_blocks = ceil( (float)A.num_cols / (float)size_b ); // max number of blocks per row //magma_int_t r_blocks = ceil( (float)A.num_rows / (float)size_b ); magma_int_t r_blocks = magma_ceildiv( A.num_rows, size_b ); // max number of blocks per column // memory allocation CHECK( magma_dmalloc( &B->dval, size_b * size_b * A.numblocks )); CHECK( magma_index_malloc( &B->drow, r_blocks + 1 )); CHECK( magma_index_malloc( &B->dcol, A.numblocks )); // data transfer magma_dcopyvector( size_b * size_b * A.numblocks, A.dval, 1, B->dval, 1, queue ); magma_index_copyvector( r_blocks + 1, A.drow, 1, B->drow, 1, queue ); magma_index_copyvector( A.numblocks, A.dcol, 1, B->dcol, 1, queue ); } //DENSE-type else if ( A.storage_type == Magma_DENSE ) { // fill in information for B B->storage_type = A.storage_type; B->memory_location = Magma_DEV; B->sym = A.sym; B->diagorder_type = A.diagorder_type; B->fill_mode = A.fill_mode; B->num_rows = A.num_rows; B->num_cols = A.num_cols; B->nnz = A.nnz; B->true_nnz = A.true_nnz; B->max_nnz_row = A.max_nnz_row; B->diameter = A.diameter; B->major = A.major; B->ld = A.ld; // memory allocation CHECK( magma_dmalloc( &B->dval, A.num_rows * A.num_cols )); // data transfer magma_dcopyvector( A.num_rows * A.num_cols, A.dval, 1, B->dval, 1, queue ); } } cleanup: if( info != 0 ){ magma_dmfree( B, queue ); } return info; }