extern "C" magma_int_t magma_scsrsplit( magma_int_t offset, magma_int_t bsize, magma_s_matrix A, magma_s_matrix *D, magma_s_matrix *R, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i, k, j, nnz_diag, nnz_offd; D->val = NULL; D->col = NULL; D->row = NULL; D->rowidx = NULL; D->blockinfo = NULL; D->diag = NULL; D->dval = NULL; D->dcol = NULL; D->drow = NULL; D->drowidx = NULL; D->ddiag = NULL; R->val = NULL; R->col = NULL; R->row = NULL; R->rowidx = NULL; R->blockinfo = NULL; R->diag = NULL; R->dval = NULL; R->dcol = NULL; R->drow = NULL; R->drowidx = NULL; R->ddiag = NULL; if ( A.memory_location == Magma_CPU && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) ) { nnz_diag = nnz_offd = 0; // Count the new number of nonzeroes in the two matrices for( i=0; i<offset; i+=offset ){ for( k=i; k<min(A.num_rows,i+offset); k++ ){ int check = 0; for( j=A.row[k]; j<A.row[k+1]; j++ ){ if ( A.col[j] < i ) nnz_offd++; else if ( A.col[j] < i+offset ){ if( A.col[j] == k ){ check = 1; } nnz_diag++; } else nnz_offd++; } if( check == 0 ){ printf("error: matrix contains zero on diagonal at (%d,%d).\n", int(i), int(i)); info = -1; goto cleanup; } } } magma_int_t ii = i; for( i=ii; i<A.num_rows; i+=bsize ){ for( k=i; k<min(A.num_rows,i+bsize); k++ ){ int check = 0; for( j=A.row[k]; j<A.row[k+1]; j++ ){ if ( A.col[j] < i ) nnz_offd++; else if ( A.col[j] < i+bsize ){ if( A.col[j] == k ){ check = 1; } nnz_diag++; } else nnz_offd++; } if( check == 0 ){ printf("error: matrix contains zero on diagonal at (%d,%d).\n", int(i), int(i)); info = -1; goto cleanup; } } } // Allocate memory for the new matrices D->storage_type = Magma_CSRD; D->memory_location = A.memory_location; D->num_rows = A.num_rows; D->num_cols = A.num_cols; D->nnz = nnz_diag; R->storage_type = Magma_CSR; R->memory_location = A.memory_location; R->num_rows = A.num_rows; R->num_cols = A.num_cols; R->nnz = nnz_offd; CHECK( magma_smalloc_cpu( &D->val, nnz_diag )); CHECK( magma_index_malloc_cpu( &D->row, A.num_rows+1 )); CHECK( magma_index_malloc_cpu( &D->col, nnz_diag )); CHECK( magma_smalloc_cpu( &R->val, nnz_offd )); CHECK( magma_index_malloc_cpu( &R->row, A.num_rows+1 )); CHECK( magma_index_malloc_cpu( &R->col, nnz_offd )); // Fill up the new sparse matrices D->row[0] = 0; R->row[0] = 0; nnz_offd = nnz_diag = 0; for( i=0; i<offset; i+=offset) { for( k=i; k<min(A.num_rows,i+offset); k++ ) { D->row[k+1] = D->row[k]; R->row[k+1] = R->row[k]; for( j=A.row[k]; j<A.row[k+1]; j++ ) { if ( A.col[j] < i ) { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } else if ( A.col[j] < i+offset ) { // larger than diagonal remain as before if ( A.col[j]>k ) { D->val[nnz_diag] = A.val[ j ]; D->col[nnz_diag] = A.col[ j ]; D->row[k+1]++; } // diagonal is written first else if ( A.col[j]==k ) { D->val[D->row[k]] = A.val[ j ]; D->col[D->row[k]] = A.col[ j ]; D->row[k+1]++; } // smaller than diagonal are shifted one to the right // to have room for the diagonal else { D->val[nnz_diag+1] = A.val[ j ]; D->col[nnz_diag+1] = A.col[ j ]; D->row[k+1]++; } nnz_diag++; } else { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } } } } ii = i; for( i=ii; i<A.num_rows; i+=bsize) { for( k=i; k<min(A.num_rows,i+bsize); k++ ) { D->row[k+1] = D->row[k]; R->row[k+1] = R->row[k]; for( j=A.row[k]; j<A.row[k+1]; j++ ) { if ( A.col[j] < i ) { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } else if ( A.col[j] < i+bsize ) { // larger than diagonal remain as before if ( A.col[j]>k ) { D->val[nnz_diag] = A.val[ j ]; D->col[nnz_diag] = A.col[ j ]; D->row[k+1]++; } // diagonal is written first else if ( A.col[j]==k ) { D->val[D->row[k]] = A.val[ j ]; D->col[D->row[k]] = A.col[ j ]; D->row[k+1]++; } // smaller than diagonal are shifted one to the right // to have room for the diagonal else { D->val[nnz_diag+1] = A.val[ j ]; D->col[nnz_diag+1] = A.col[ j ]; D->row[k+1]++; } nnz_diag++; } else { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } } } } } else { magma_s_matrix Ah={Magma_CSR}, ACSR={Magma_CSR}, DCSR={Magma_CSR}, RCSR={Magma_CSR}, Dh={Magma_CSR}, Rh={Magma_CSR}; CHECK( magma_smtransfer( A, &Ah, A.memory_location, Magma_CPU, queue )); CHECK( magma_smconvert( Ah, &ACSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_scsrsplit( offset, bsize, ACSR, &DCSR, &RCSR, queue )); CHECK( magma_smconvert( DCSR, &Dh, Magma_CSR, A.storage_type, queue )); CHECK( magma_smconvert( RCSR, &Rh, Magma_CSR, A.storage_type, queue )); CHECK( magma_smtransfer( Dh, D, Magma_CPU, A.memory_location, queue )); CHECK( magma_smtransfer( Rh, R, Magma_CPU, A.memory_location, queue )); magma_smfree( &Ah, queue ); magma_smfree( &ACSR, queue ); magma_smfree( &Dh, queue ); magma_smfree( &DCSR, queue ); magma_smfree( &Rh, queue ); magma_smfree( &RCSR, queue ); } cleanup: if( info != 0 ){ magma_smfree( D, queue ); magma_smfree( R, queue ); } return info; }
magma_int_t magma_scsrsplit( magma_int_t bsize, magma_s_sparse_matrix A, magma_s_sparse_matrix *D, magma_s_sparse_matrix *R ){ if( A.memory_location == Magma_CPU && ( A.storage_type == Magma_CSR || A.storage_type == Magma_CSRCOO ) ){ magma_int_t i, k, j, nnz_diag, nnz_offd; nnz_diag = nnz_offd = 0; // Count the new number of nonzeroes in the two matrices for( i=0; i<A.num_rows; i+=bsize ) for( k=i; k<min(A.num_rows,i+bsize); k++ ) for( j=A.row[k]; j<A.row[k+1]; j++ ) if ( A.col[j] < i ) nnz_offd++; else if ( A.col[j] < i+bsize ) nnz_diag++; else nnz_offd++; // Allocate memory for the new matrices D->storage_type = Magma_CSRD; D->memory_location = A.memory_location; D->num_rows = A.num_rows; D->num_cols = A.num_cols; D->nnz = nnz_diag; R->storage_type = Magma_CSR; R->memory_location = A.memory_location; R->num_rows = A.num_rows; R->num_cols = A.num_cols; R->nnz = nnz_offd; magma_smalloc_cpu( &D->val, nnz_diag ); magma_index_malloc_cpu( &D->row, A.num_rows+1 ); magma_index_malloc_cpu( &D->col, nnz_diag ); magma_smalloc_cpu( &R->val, nnz_offd ); magma_index_malloc_cpu( &R->row, A.num_rows+1 ); magma_index_malloc_cpu( &R->col, nnz_offd ); // Fill up the new sparse matrices D->row[0] = 0; R->row[0] = 0; nnz_offd = nnz_diag = 0; for( i=0; i<A.num_rows; i+=bsize){ for( k=i; k<min(A.num_rows,i+bsize); k++ ){ D->row[k+1] = D->row[k]; R->row[k+1] = R->row[k]; for( j=A.row[k]; j<A.row[k+1]; j++ ){ if ( A.col[j] < i ){ R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } else if ( A.col[j] < i+bsize ){ // larger than diagonal remain as before if ( A.col[j]>k ){ D->val[nnz_diag] = A.val[ j ]; D->col[nnz_diag] = A.col[ j ]; D->row[k+1]++; } // diagonal is written first else if ( A.col[j]==k ) { D->val[D->row[k]] = A.val[ j ]; D->col[D->row[k]] = A.col[ j ]; D->row[k+1]++; } // smaller than diagonal are shifted one to the right // to have room for the diagonal else { D->val[nnz_diag+1] = A.val[ j ]; D->col[nnz_diag+1] = A.col[ j ]; D->row[k+1]++; } nnz_diag++; } else { R->val[nnz_offd] = A.val[j]; R->col[nnz_offd] = A.col[j]; R->row[k+1]++; nnz_offd++; } } } } return MAGMA_SUCCESS; } else{ magma_s_sparse_matrix Ah, ACSR, DCSR, RCSR, Dh, Rh; magma_s_mtransfer( A, &Ah, A.memory_location, Magma_CPU ); magma_s_mconvert( Ah, &ACSR, A.storage_type, Magma_CSR ); magma_scsrsplit( bsize, ACSR, &DCSR, &RCSR ); magma_s_mconvert( DCSR, &Dh, Magma_CSR, A.storage_type ); magma_s_mconvert( RCSR, &Rh, Magma_CSR, A.storage_type ); magma_s_mtransfer( Dh, D, Magma_CPU, A.memory_location ); magma_s_mtransfer( Rh, R, Magma_CPU, A.memory_location ); magma_s_mfree( &Ah ); magma_s_mfree( &ACSR ); magma_s_mfree( &Dh ); magma_s_mfree( &DCSR ); magma_s_mfree( &Rh ); magma_s_mfree( &RCSR ); return MAGMA_SUCCESS; } }