void create_nb_indices_per_row(cf3::common::PE::CommPattern& cp, const VariablesDescriptor& variables, const std::vector<Uint>& starting_indices, std::vector<int>& num_indices_per_row ) { const Uint nb_vars = variables.nb_vars(); const Uint total_nb_eq = variables.size(); const Uint nb_nodes_for_rank = cp.isUpdatable().size(); cf3_assert(nb_nodes_for_rank+1 == starting_indices.size()); num_indices_per_row.reserve(nb_nodes_for_rank*total_nb_eq); for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx) { const Uint neq = variables.var_length(var_idx); const Uint var_offset = variables.offset(var_idx); for (int i=0; i<nb_nodes_for_rank; i++) { if (cp.isUpdatable()[i]) { for(int j = 0; j != neq; ++j) { num_indices_per_row.push_back(total_nb_eq*(starting_indices[i+1]-starting_indices[i])); } } } } }
void TrilinosVector::create_blocked(common::PE::CommPattern& cp, const VariablesDescriptor& vars, const std::vector<Uint>& periodic_links_nodes, const std::vector<bool>& periodic_links_active) { // if built if (m_is_created) destroy(); // prepare intermediate data int nmyglobalelements=0; std::vector<int> myglobalelements(0); std::vector<Uint> my_ranks; create_map_data(cp, vars, m_p2m, myglobalelements, my_ranks, nmyglobalelements, periodic_links_nodes, periodic_links_active); m_data.resize(myglobalelements.size()); std::vector<Uint> gids(myglobalelements.begin(), myglobalelements.end()); // need Uint data for GIDs if(is_not_null(get_child("CommPattern"))) remove_component("CommPattern"); m_comm_pattern = common::allocate_component<common::PE::CommPattern>("CommPattern"); m_comm_pattern->insert("gid",gids,1,false); m_comm_pattern->setup(Handle<common::PE::CommWrapper>(m_comm_pattern->get_child("gid")),my_ranks); m_comm_pattern->insert(name(), m_data, true); // map (its actually blockmap insteady of rowmap, to involve ghosts) m_map = Teuchos::rcp(new Epetra_Map(-1,nmyglobalelements,&myglobalelements[0],0,m_comm)); // create vector m_vec=Teuchos::rcp(new Epetra_Vector(View, *m_map, &m_data[0])); m_neq=vars.size(); m_blockrow_size=cp.isUpdatable().size(); m_is_created=true; }
void TrilinosCrsMatrix::create_blocked(common::PE::CommPattern& cp, const VariablesDescriptor& vars, const std::vector< Uint >& node_connectivity, const std::vector< Uint >& starting_indices, Vector& solution, Vector& rhs, const std::vector<Uint>& periodic_links_nodes, const std::vector<bool>& periodic_links_active) { // if already created if (m_is_created) destroy(); // Copy node connectivity m_node_connectivity.resize(node_connectivity.size()); m_starting_indices.resize(starting_indices.size()); std::copy(node_connectivity.begin(), node_connectivity.end(), m_node_connectivity.begin()); std::copy(starting_indices.begin(), starting_indices.end(), m_starting_indices.begin()); const Uint total_nb_eq = vars.size(); // prepare intermediate data std::vector<int> my_global_elements; std::vector<Uint> my_ranks; create_map_data(cp, vars, m_p2m, my_global_elements, my_ranks, m_num_my_elements, periodic_links_nodes, periodic_links_active); std::vector<int> num_indices_per_row; num_indices_per_row.reserve(m_num_my_elements); std::vector<int> indices_per_row; create_indices_per_row(cp, vars, node_connectivity, starting_indices, m_p2m, num_indices_per_row, indices_per_row, periodic_links_nodes, periodic_links_active); m_converted_indices.resize(*std::max_element(num_indices_per_row.begin(), num_indices_per_row.end())); // rowmap, ghosts not present Epetra_Map rowmap(-1,m_num_my_elements,&my_global_elements[0],0,m_comm); // colmap, has ghosts at the end Epetra_Map colmap(-1,my_global_elements.size(),&my_global_elements[0],0,m_comm); my_global_elements.clear(); // no longer needed // Create the graph, using static profile for performance Epetra_CrsGraph graph(Copy, rowmap, colmap, &num_indices_per_row[0], true); // Fill the graph int row_start = 0; cf3_assert(num_indices_per_row.size() == m_num_my_elements); for(int i = 0; i != m_num_my_elements; ++i) { const int row_nb_elems = num_indices_per_row[i]; cf3_assert( (row_start + row_nb_elems) <= indices_per_row.size() ); TRILINOS_THROW(graph.InsertMyIndices(i, row_nb_elems, &indices_per_row[row_start])); row_start += row_nb_elems; } TRILINOS_THROW(graph.FillComplete()); TRILINOS_THROW(graph.OptimizeStorage()); // create matrix m_mat=Teuchos::rcp(new Epetra_CrsMatrix(Copy, graph)); TRILINOS_THROW(m_mat->FillComplete()); TRILINOS_THROW(m_mat->OptimizeStorage()); // set class properties m_is_created=true; m_neq=total_nb_eq; CFdebug << "Rank " << common::PE::Comm::instance().rank() << ": Created a " << m_mat->NumGlobalCols() << " x " << m_mat->NumGlobalRows() << " trilinos matrix with " << m_mat->NumGlobalNonzeros() << " non-zero elements and " << m_num_my_elements << " local rows" << CFendl; }
void TrilinosCrsMatrix::create_blocked(common::PE::CommPattern& cp, const VariablesDescriptor& vars, const std::vector< Uint >& node_connectivity, const std::vector< Uint >& starting_indices, Vector& solution, Vector& rhs) { // if already created if (m_is_created) destroy(); const Uint total_nb_eq = vars.size(); // prepare intermediate data std::vector<int> num_indices_per_row; std::vector<int> my_global_elements; create_map_data(cp, vars, m_p2m, my_global_elements, m_num_my_elements); create_nb_indices_per_row(cp, vars, starting_indices, num_indices_per_row); // rowmap, ghosts not present Epetra_Map rowmap(-1,m_num_my_elements,&my_global_elements[0],0,m_comm); // colmap, has ghosts at the end const Uint nb_nodes_for_rank = cp.isUpdatable().size(); Epetra_Map colmap(-1,nb_nodes_for_rank*total_nb_eq,&my_global_elements[0],0,m_comm); my_global_elements.clear(); // Create the graph, using static profile for performance Epetra_CrsGraph graph(Copy, rowmap, colmap, &num_indices_per_row[0], true); // Fill the graph int max_nb_row_entries=0; for(int i = 0; i != nb_nodes_for_rank; ++i) { const int nb_row_nodes = starting_indices[i+1] - starting_indices[i]; max_nb_row_entries = nb_row_nodes > max_nb_row_entries ? nb_row_nodes : max_nb_row_entries; } m_converted_indices.resize(max_nb_row_entries*total_nb_eq); for(int i = 0; i != nb_nodes_for_rank; ++i) { if(cp.isUpdatable()[i]) { const Uint columns_begin = starting_indices[i]; const Uint columns_end = starting_indices[i+1]; for(Uint j = columns_begin; j != columns_end; ++j) { const Uint column = j-columns_begin; const Uint node_idx = node_connectivity[j]*total_nb_eq; for(int k = 0; k != total_nb_eq; ++k) { m_converted_indices[column*total_nb_eq+k] = m_p2m[node_idx+k]; } } for(int k = 0; k != total_nb_eq; ++k) { const int row = m_p2m[i*total_nb_eq+k]; TRILINOS_THROW(graph.InsertMyIndices(row, static_cast<int>(total_nb_eq*(columns_end - columns_begin)), &m_converted_indices[0])); } } } TRILINOS_THROW(graph.FillComplete()); TRILINOS_THROW(graph.OptimizeStorage()); // create matrix m_mat=Teuchos::rcp(new Epetra_CrsMatrix(Copy, graph)); TRILINOS_THROW(m_mat->FillComplete()); TRILINOS_THROW(m_mat->OptimizeStorage()); // set class properties m_is_created=true; m_neq=total_nb_eq; CFdebug << "Rank " << common::PE::Comm::instance().rank() << ": Created a " << m_mat->NumGlobalCols() << " x " << m_mat->NumGlobalRows() << " trilinos matrix with " << m_mat->NumGlobalNonzeros() << " non-zero elements and " << m_num_my_elements << " local rows" << CFendl; }
void create_map_data(common::PE::CommPattern& cp, const VariablesDescriptor& variables, std::vector< int >& p2m, std::vector< int >& my_global_elements, int& num_my_elements) { // get global ids vector int *gid=(int*)cp.gid()->pack(); num_my_elements = 0; const Uint nb_vars = variables.nb_vars(); const Uint total_nb_eq = variables.size(); const Uint nb_nodes_for_rank = cp.isUpdatable().size(); my_global_elements.reserve(nb_nodes_for_rank*total_nb_eq); // Get the maximum gid, for per-equation blocked storage int local_max_gid = 0; int global_nb_gid = 0; for(Uint i = 0; i != nb_nodes_for_rank; ++i) local_max_gid = gid[i] > local_max_gid ? gid[i] : local_max_gid; common::PE::Comm::instance().all_reduce(common::PE::max(), &local_max_gid, 1, &global_nb_gid); ++global_nb_gid; // number of GIDs is the maximum + 1 CFdebug << "Number of GIDs: " << global_nb_gid << CFendl; for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx) { const Uint neq = variables.var_length(var_idx); const Uint var_offset = variables.offset(var_idx); const int var_start_gid = var_offset * global_nb_gid; for (int i=0; i<nb_nodes_for_rank; i++) { if (cp.isUpdatable()[i]) { num_my_elements += neq; const int start_gid = var_start_gid + gid[i]*neq; for(int j = 0; j != neq; ++j) { my_global_elements.push_back(start_gid+j); } } } } // process local to matrix local numbering mapper const int nb_local_nodes = num_my_elements / total_nb_eq; const int nb_ghosts = nb_nodes_for_rank - nb_local_nodes; p2m.resize(nb_nodes_for_rank*total_nb_eq); for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx) { const Uint neq = variables.var_length(var_idx); const Uint var_offset = variables.offset(var_idx); int iupd=nb_local_nodes*var_offset; int ighost=num_my_elements + nb_ghosts*var_offset; int p_idx = 0; for (int i=0; i<nb_nodes_for_rank; ++i) { const int p_start = i*total_nb_eq+var_offset; if (cp.isUpdatable()[i]) { for(Uint j = 0; j != neq; ++j) p2m[p_start + j] = iupd++; } else { for(Uint j = 0; j != neq; ++j) p2m[p_start + j] = ighost++; } } } // append the ghosts at the end of the element list for(Uint var_idx = 0; var_idx != nb_vars; ++var_idx) { const Uint neq = variables.var_length(var_idx); const Uint var_offset = variables.offset(var_idx); const int var_start_gid = var_offset * global_nb_gid; for (int i=0; i<nb_nodes_for_rank; i++) { if (!cp.isUpdatable()[i]) { const int start_gid = var_start_gid + gid[i]*neq; for(int j = 0; j != neq; ++j) my_global_elements.push_back(start_gid+j); } } } delete[] gid; }