extern "C" magma_int_t magma_dmshrink( magma_d_matrix A, magma_d_matrix *B, magma_queue_t queue ) { magma_int_t info = 0; magma_d_matrix hA={Magma_CSR}, hACSR={Magma_CSR}, hB={Magma_CSR}, hBCSR={Magma_CSR}; if( A.num_rows<=A.num_cols){ if( A.memory_location == Magma_CPU && A.storage_type == Magma_CSR ){ CHECK( magma_dmconvert( A, B, Magma_CSR, Magma_CSR, queue )); for(magma_int_t i=0; i<A.nnz; i++){ if( B->col[i] >= A.num_rows ){ B->val[i] = MAGMA_D_ZERO; } } CHECK( magma_dmcsrcompressor( B, queue ) ); B->num_cols = B->num_rows; } else { CHECK( magma_dmtransfer( A, &hA, A.memory_location, Magma_CPU, queue )); CHECK( magma_dmconvert( hA, &hACSR, A.storage_type, Magma_CSR, queue )); CHECK( magma_dmshrink( hACSR, &hBCSR, queue )); CHECK( magma_dmconvert( hBCSR, &hB, Magma_CSR, A.storage_type, queue )); CHECK( magma_dmtransfer( hB, B, Magma_CPU, A.memory_location, queue )); } } else { printf("%% error: A has too many rows: m > n.\n"); info = MAGMA_ERR_NOT_SUPPORTED; goto cleanup; } cleanup: magma_dmfree( &hA, queue ); magma_dmfree( &hB, queue ); magma_dmfree( &hACSR, queue ); magma_dmfree( &hBCSR, queue ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { TESTING_INIT(); magma_dopts zopts; magma_queue_t queue; magma_queue_create( /*devices[ opts->device ],*/ &queue ); int i=1; magma_dparse_opts( argc, argv, &zopts, &i, queue ); real_Double_t res; magma_d_sparse_matrix Z, A, AT, A2, B, B_d; B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); magma_dm_5stencil( laplace_size, &Z, queue ); } else { // file-matrix test magma_d_csr_mtx( &Z, argv[i], queue ); } printf( "# matrix info: %d-by-%d with %d nonzeros\n", (int) Z.num_rows,(int) Z.num_cols,(int) Z.nnz ); // scale matrix magma_dmscale( &Z, zopts.scaling, queue ); // remove nonzeros in matrix magma_dmcsrcompressor( &Z, queue ); // convert to be non-symmetric magma_d_mconvert( Z, &A, Magma_CSR, Magma_CSRL, queue ); // transpose magma_d_mtranspose( A, &AT, queue ); // convert, copy back and forth to check everything works magma_d_mconvert( AT, &B, Magma_CSR, zopts.output_format, queue ); magma_d_mfree(&AT, queue ); magma_d_mtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue ); magma_d_mfree(&B, queue ); magma_dmcsrcompressor_gpu( &B_d, queue ); magma_d_mtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue ); magma_d_mfree(&B_d, queue ); magma_d_mconvert( B, &AT, zopts.output_format,Magma_CSR, queue ); magma_d_mfree(&B, queue ); // transpose back magma_d_mtranspose( AT, &A2, queue ); magma_d_mfree(&AT, queue ); magma_dmdiff( A, A2, &res, queue); printf("# ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("# tester: ok\n"); else printf("# tester: failed\n"); magma_d_mfree(&A, queue ); magma_d_mfree(&A2, queue ); magma_d_mfree(&Z, queue ); i++; } magma_queue_destroy( queue ); TESTING_FINALIZE(); return 0; }
magma_int_t magma_dm_5stencil( magma_int_t n, magma_d_matrix *A, magma_queue_t queue ) { magma_int_t info = 0; magma_int_t i,j,k; magma_d_matrix hA={Magma_CSR}; // generate matrix of desired structure and size (2d 5-point stencil) magma_int_t nn = n*n; magma_int_t offdiags = 2; magma_index_t *diag_offset=NULL; double *diag_vals=NULL; CHECK( magma_dmalloc_cpu( &diag_vals, offdiags+1 )); CHECK( magma_index_malloc_cpu( &diag_offset, offdiags+1 )); diag_offset[0] = 0; diag_offset[1] = 1; diag_offset[2] = n; #define REAL #ifdef COMPLEX // real case diag_vals[0] = MAGMA_D_MAKE( 4.0, 4.0 ); diag_vals[1] = MAGMA_D_MAKE( -1.0, -1.0 ); diag_vals[2] = MAGMA_D_MAKE( -1.0, -1.0 ); #else // real case diag_vals[0] = MAGMA_D_MAKE( 4.0, 0.0 ); diag_vals[1] = MAGMA_D_MAKE( -1.0, 0.0 ); diag_vals[2] = MAGMA_D_MAKE( -1.0, 0.0 ); #endif CHECK( magma_dmgenerator( nn, offdiags, diag_offset, diag_vals, &hA, queue )); // now set some entries to zero (boundary...) for( i=0; i<n; i++ ) { for( j=0; j<n; j++ ) { magma_index_t row = i*n+j; for( k=hA.row[row]; k<hA.row[row+1]; k++) { if ((hA.col[k] == row-1 ) && (row+1)%n == 1 ) hA.val[k] = MAGMA_D_MAKE( 0.0, 0.0 ); if ((hA.col[k] == row+1 ) && (row)%n ==n-1 ) hA.val[k] = MAGMA_D_MAKE( 0.0, 0.0 ); } } } CHECK( magma_dmconvert( hA, A, Magma_CSR, Magma_CSR, queue )); magma_dmcsrcompressor( A, queue ); A->true_nnz = A->nnz; cleanup: magma_free_cpu( diag_vals ); magma_free_cpu( diag_offset ); magma_dmfree( &hA, queue ); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_d_matrix A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; int i=1; real_Double_t start, end; CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_dm_5stencil( laplace_size, &A, queue )); } else { // file-matrix test CHECK( magma_d_csr_mtx( &A, argv[i], queue )); } printf( "\n# matrix info: %d-by-%d with %d nonzeros\n\n", int(A.num_rows), int(A.num_cols), int(A.nnz) ); // scale matrix CHECK( magma_dmscale( &A, zopts.scaling, queue )); // remove nonzeros in matrix start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_dmcsrcompressor( &A, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA CPU: %.2e seconds.\n", (end-start)/10 ); // transpose CHECK( magma_dmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_dmconvert( AT, &B, Magma_CSR, Magma_CSR, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&B, queue ); start = magma_sync_wtime( queue ); for (int j=0; j<10; j++) CHECK( magma_dmcsrcompressor_gpu( &B_d, queue )); end = magma_sync_wtime( queue ); printf( " > MAGMA GPU: %.2e seconds.\n", (end-start)/10 ); CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_dmfree(&B_d, queue ); CHECK( magma_dmconvert( B, &AT, Magma_CSR, Magma_CSR, queue )); magma_dmfree(&B, queue ); // transpose back CHECK( magma_dmtranspose( AT, &A2, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmdiff( A, A2, &res, queue )); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester matrix compressor: ok\n"); else printf("%% tester matrix compressor: failed\n"); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); i++; } cleanup: magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }
/* //////////////////////////////////////////////////////////////////////////// -- testing any solver */ int main( int argc, char** argv ) { magma_int_t info = 0; TESTING_INIT(); magma_dopts zopts; magma_queue_t queue=NULL; magma_queue_create( 0, &queue ); real_Double_t res; magma_d_matrix Z={Magma_CSR}, A={Magma_CSR}, AT={Magma_CSR}, A2={Magma_CSR}, B={Magma_CSR}, B_d={Magma_CSR}; magma_index_t *comm_i=NULL; double *comm_v=NULL; magma_int_t start, end; int i=1; CHECK( magma_dparse_opts( argc, argv, &zopts, &i, queue )); B.blocksize = zopts.blocksize; B.alignment = zopts.alignment; while( i < argc ) { if ( strcmp("LAPLACE2D", argv[i]) == 0 && i+1 < argc ) { // Laplace test i++; magma_int_t laplace_size = atoi( argv[i] ); CHECK( magma_dm_5stencil( laplace_size, &Z, queue )); } else { // file-matrix test CHECK( magma_d_csr_mtx( &Z, argv[i], queue )); } printf("%% matrix info: %d-by-%d with %d nonzeros\n", int(Z.num_rows), int(Z.num_cols), int(Z.nnz) ); // slice matrix CHECK( magma_index_malloc_cpu( &comm_i, Z.num_rows ) ); CHECK( magma_dmalloc_cpu( &comm_v, Z.num_rows ) ); CHECK( magma_dmslice( 1, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 0, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 1, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); CHECK( magma_dmslice( 9, 8, Z, &A2, &AT, &B, comm_i, comm_v, &start, &end, queue ) ); magma_dprint_matrix( A2, queue ); magma_dprint_matrix( AT, queue ); magma_dprint_matrix( B, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&AT, queue ); magma_dmfree(&B, queue ); // scale matrix CHECK( magma_dmscale( &Z, zopts.scaling, queue )); // remove nonzeros in matrix CHECK( magma_dmcsrcompressor( &Z, queue )); // convert to be non-symmetric CHECK( magma_dmconvert( Z, &A, Magma_CSR, Magma_CSRL, queue )); // transpose CHECK( magma_dmtranspose( A, &AT, queue )); // convert, copy back and forth to check everything works CHECK( magma_dmconvert( AT, &B, Magma_CSR, zopts.output_format, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmtransfer( B, &B_d, Magma_CPU, Magma_DEV, queue )); magma_dmfree(&B, queue ); CHECK( magma_dmcsrcompressor_gpu( &B_d, queue )); CHECK( magma_dmtransfer( B_d, &B, Magma_DEV, Magma_CPU, queue )); magma_dmfree(&B_d, queue ); CHECK( magma_dmconvert( B, &AT, zopts.output_format,Magma_CSR, queue )); magma_dmfree(&B, queue ); // transpose back CHECK( magma_dmtranspose( AT, &A2, queue )); magma_dmfree(&AT, queue ); CHECK( magma_dmdiff( A, A2, &res, queue)); printf("%% ||A-B||_F = %8.2e\n", res); if ( res < .000001 ) printf("%% tester: ok\n"); else printf("%% tester: failed\n"); magma_free_cpu( comm_i ); magma_free_cpu( comm_v ); comm_i=NULL; comm_v=NULL; magma_dmfree(&A, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&Z, queue ); i++; } cleanup: magma_free_cpu( comm_i ); magma_free_cpu( comm_v ); magma_dmfree(&AT, queue ); magma_dmfree(&A, queue ); magma_dmfree(&B, queue ); magma_dmfree(&B_d, queue ); magma_dmfree(&A2, queue ); magma_dmfree(&Z, queue ); magma_queue_destroy( queue ); TESTING_FINALIZE(); return info; }