STKUNIT_UNIT_TEST(UnitTestLinsysFunctions, test1)
{
    static const size_t spatial_dimension = 3;

    MPI_Barrier( MPI_COMM_WORLD );
    MPI_Comm comm = MPI_COMM_WORLD;
    //First create and fill MetaData and BulkData objects:

    const unsigned bucket_size = 100; //for a real application mesh, bucket_size would be much bigger...

    stk::mesh::fem::FEMMetaData fem_meta;
    stk::mesh::fem::FEMMetaData fem_meta2;
    fem_meta.FEM_initialize(spatial_dimension);
    fem_meta2.FEM_initialize(spatial_dimension);

    stk::mesh::MetaData & meta_data = stk::mesh::fem::FEMMetaData::get_meta_data(fem_meta);
    stk::mesh::MetaData & meta_data2 = stk::mesh::fem::FEMMetaData::get_meta_data(fem_meta2);

    const stk::mesh::EntityRank element_rank = fem_meta.element_rank();

    stk::mesh::BulkData bulk_data( meta_data, comm, bucket_size );
    stk::mesh::BulkData bulk_data2( meta_data2, comm, bucket_size );

    //create a boundary-condition part for testing later:
    stk::mesh::Part& bcpart = fem_meta.declare_part("bcpart");

    fill_utest_mesh_meta_data( fem_meta );

    bool use_temperature=false;
    fill_utest_mesh_meta_data( fem_meta2, use_temperature );

    fill_utest_mesh_bulk_data( bulk_data );
    fill_utest_mesh_bulk_data( bulk_data2 );

    //set owner-processors to lowest-sharing (stk::mesh defaults to
    //highest-sharing) If highest-sharing owns, then it isn't correct for the
    //way the fei library sets ownership of shared nodes for vectors etc.
    stk::mesh::set_owners<stk::mesh::LowestRankSharingProcOwns>( bulk_data );

    //put a node in our boundary-condition part. arbitrarily choose the
    //first locally-owned node:

    bulk_data.modification_begin();

    std::vector<stk::mesh::Entity*> local_nodes;
    stk::mesh::Selector select_owned(meta_data.locally_owned_part());
    stk::mesh::get_selected_entities(select_owned,
                                     bulk_data.buckets(NODE_RANK),
                                     local_nodes);

    stk::mesh::EntityId bc_node_id = 0;

    if (local_nodes.size() > 0) {
        stk::mesh::PartVector partvector;
        partvector.push_back(&bcpart);
        bulk_data.change_entity_parts(*local_nodes[0], partvector);
        bc_node_id = stk::linsys::impl::entityid_to_int(local_nodes[0]->identifier());
    }

    bulk_data.modification_end();

    stk::mesh::Selector selector = ( meta_data.locally_owned_part() | meta_data.globally_shared_part() ) & *meta_data.get_part("block_1");
    std::vector<unsigned> count;
    stk::mesh::count_entities(selector, bulk_data, count);

    STKUNIT_ASSERT_EQUAL( count[element_rank], (unsigned)4 );
    STKUNIT_ASSERT_EQUAL( count[NODE_RANK],     (unsigned)20 );

    ScalarField* temperature_field = meta_data.get_field<ScalarField>("temperature");

    //Create a fei Factory and stk::linsys::LinearSystem object:

    fei::SharedPtr<fei::Factory> factory(new Factory_Trilinos(comm));

    stk::linsys::LinearSystem ls(comm, factory);

    stk::linsys::add_connectivities(ls, element_rank, NODE_RANK,
                                    *temperature_field, selector, bulk_data);

    fei::SharedPtr<fei::MatrixGraph> matgraph = ls.get_fei_MatrixGraph();
    int num_blocks = matgraph->getNumConnectivityBlocks();

    STKUNIT_ASSERT_EQUAL( num_blocks, (int)1 );

    ls.synchronize_mappings_and_structure();
    ls.create_fei_LinearSystem();

    //put 0 throughout the matrix and 3 throughout the rhs:
    fei::SharedPtr<fei::Matrix> mat = ls.get_fei_LinearSystem()->getMatrix();
    ls.get_fei_LinearSystem()->getMatrix()->putScalar(0);
    ls.get_fei_LinearSystem()->getRHS()->putScalar(3.0);

    //put 10 on the matrix diagonal to ensure it will be easy to solve later.
    fei::SharedPtr<fei::VectorSpace> vspace = ls.get_fei_LinearSystem()->getRHS()->getVectorSpace();
    int numLocalRows = vspace->getNumIndices_Owned();
    std::vector<int> local_rows(numLocalRows);
    vspace->getIndices_Owned(numLocalRows, &local_rows[0], numLocalRows);

    for(size_t i=0; i<local_rows.size(); ++i) {
        int col = local_rows[i];
        double coef = 10;
        double* coefPtr = &coef;
        mat->sumIn(1, &local_rows[i], 1, &col, &coefPtr);
    }

    //now we'll impose a dirichlet bc on our one-node bcpart:
    stk::linsys::dirichlet_bc(ls, bulk_data, bcpart, NODE_RANK,
                              *temperature_field, 0, 9.0);

    ls.finalize_assembly();

    //now confirm that the rhs value for the equation corresponding to our
    //bc node is 9.0:

    fei::SharedPtr<fei::Vector> rhsvec = ls.get_fei_LinearSystem()->getRHS();
    double rhs_bc_val = 0;
    int bc_eqn_index = ls.get_DofMapper().get_global_index(NODE_RANK,
                       bc_node_id, *temperature_field);
    rhsvec->copyOut(1, &bc_eqn_index, &rhs_bc_val);

    bool bc_val_is_correct = std::abs(rhs_bc_val - 9.0) < 1.e-13;
    STKUNIT_ASSERT( bc_val_is_correct );

    stk::linsys::copy_vector_to_mesh( *rhsvec, ls.get_DofMapper(), bulk_data);

    stk::mesh::Entity* bc_node = bulk_data.get_entity(NODE_RANK, local_nodes[0]->identifier());

    stk::mesh::FieldTraits<ScalarField>::data_type* bc_node_data = stk::mesh::field_data(*temperature_field, *bc_node);

    bool bc_node_data_is_correct = std::abs(bc_node_data[0] - 9.0) < 1.e-13;
    STKUNIT_ASSERT( bc_node_data_is_correct );

    //now make sure we get a throw if we use the wrong bulk-data (that doesn't have the
    //temperature field defined)
    STKUNIT_ASSERT_THROW(stk::linsys::copy_vector_to_mesh( *rhsvec, ls.get_DofMapper(), bulk_data2), std::runtime_error);

    //obtain and zero the solution vector
    fei::SharedPtr<fei::Vector> solnvec = ls.get_fei_LinearSystem()->getSolutionVector();
    solnvec->putScalar(0);

    //copy the vector of zeros into the mesh:
    stk::linsys::copy_vector_to_mesh( *solnvec, ls.get_DofMapper(), bulk_data);

    //assert that our bc node's data is now zero.
    bc_node_data_is_correct = std::abs(bc_node_data[0] - 0) < 1.e-13;
    STKUNIT_ASSERT( bc_node_data_is_correct );

    //call the linear-system solve function.
    //(note that when we add options to the solve method, we'll need to enhance this
    //testing to exercise various specific solves.)
    Teuchos::ParameterList params;

    int status = 0;
    ls.solve(status, params);

    //copy the solution-vector into the mesh:
    stk::linsys::copy_vector_to_mesh( *solnvec, ls.get_DofMapper(), bulk_data);

    //now assert that the value 9 (bc value) produced by the solve is in this
    //node's data.
    //note that we use a loose tolerance, because the default solver tolerance
    //is (I think) only 1.e-6.
    bc_node_data_is_correct = std::abs(bc_node_data[0] - 9.0) < 1.e-6;
    STKUNIT_ASSERT( bc_node_data_is_correct );
    STKUNIT_ASSERT(bc_node_data_is_correct);
}
Beispiel #2
0
///  计算结果存储在矩阵a中
///  n_global: the order of the matrix
static void inv_driver(blas_idx_t n_global)		
{

    auto grid = std::make_shared<blacs_grid_t>();
	
	//// self code
	//n_global = 3;
	//double *aaa = new double(n_global*n_global);
	//for (int i = 0; i < 9; i++)
	//{
	//	aaa[i] = i + 1;
	//}
	//aaa[8] = 10;
	//auto a = block_cyclic_mat_t::createWithArray(grid, n_global, n_global, aaa);


    // Create a NxN random matrix A
    auto a = block_cyclic_mat_t::random(grid, n_global, n_global);        

    // Create a NxN matrix to hold A^{-1}
    auto ai = block_cyclic_mat_t::constant(grid, n_global, n_global);

    // Copy A to A^{-1} since it will be overwritten during factorization
    std::copy_n(a->local_data(), a->local_size(), ai->local_data());

    MPI_Barrier (MPI_COMM_WORLD);

    double t0 = MPI_Wtime();
    
    // Factorize A 
    blas_idx_t ia = 1, ja = 1;
    std::vector<blas_idx_t> ipiv(a->local_rows() + a->row_block_size() + 100);
    blas_idx_t info;

	//含义应该是D-GE-TRF。
	//第一个D表示我们的矩阵是double类型的
	//GE表示我们的矩阵是General类型的
	//TRF表示对矩阵进行三角分解也就是我们通常所说的LU分解。
    pdgetrf_(n_global, n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        info);
    assert(info == 0);
    double t_factor = MPI_Wtime() - t0;

    // Compute A^{-1} based on the LU factorization

    // Compute workspace for double and integer work arrays on each process
    blas_idx_t lwork  = 10;
    blas_idx_t liwork = 10;
    std::vector<double>     work (lwork); 
    std::vector<blas_idx_t> iwork(liwork);

    lwork = liwork = -1;   

	// 计算lwork与liwork的值
    pdgetri_(n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        work.data(), lwork, iwork.data(), liwork, info);
    assert(info == 0);
    lwork  = static_cast<blas_idx_t>(work[0]);
    liwork = static_cast<size_t>(iwork[0]);
    work.resize(lwork);
    iwork.resize(liwork);

    // Now compute the inverse
    t0 = MPI_Wtime();
    pdgetri_(n_global, 
        ai->local_data(), ia, ja, ai->descriptor(), 
        ipiv.data(), 
        work.data(), lwork, iwork.data(), liwork, info);
    assert(info == 0);
    double t_solve = MPI_Wtime() - t0;

    // Verify that the inverse is correct using A*A^{-1} = I
    auto identity = block_cyclic_mat_t::diagonal(grid, n_global, n_global);

    // Compute I = A * A^{-1} - I and verify that the ||I|| is small    
    char nein = 'N';
    double alpha = 1.0, beta = -1.0;
    pdgemm_(nein, nein, n_global, n_global, n_global, alpha, 
        a->local_data() , ia, ja, a->descriptor(),
        ai->local_data(), ia, ja, ai->descriptor(),
        beta,
        identity->local_data(), ia, ja, identity->descriptor());

    // Compute 1-norm of the result
    char norm='1';
    work.resize(identity->local_cols());
    double err = pdlange_(norm, n_global, n_global, 
        identity->local_data(), ia, ja, identity->descriptor(), work.data());

    double t_total = t_factor + t_solve;
    double t_glob;
    MPI_Reduce(&t_total, &t_glob, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

    if (grid->iam() == 0) 
    {
        double gflops = getri_flops(n_global)/t_glob/grid->nprocs();
        printf("\n"
            "MATRIX INVERSE BENCHMARK SUMMARY\n"
            "================================\n"
            "N = %d\tNP = %d\tNP_ROW = %d\tNP_COL = %d\n"
            "Time for PxGETRF + PxGETRI = %10.7f seconds\tGflops/Proc = %10.7f, Error = %f\n",
            n_global, grid->nprocs(), grid->nprows(), grid->npcols(), 
            t_glob, gflops, err);fflush(stdout);
    }
}