void AbstractConcreteMatrixAdapter< Epetra_RowMatrix, DerivedMat>::getGlobalRowCopy_impl(global_ordinal_t row, const ArrayView<global_ordinal_t>& indices, const ArrayView<scalar_t>& vals, size_t& nnz) const { using Teuchos::as; local_ordinal_t local_row = this->row_map_->getLocalElement(row); int nnz_ret = 0; int rowmatrix_return_val = this->mat_->ExtractMyRowCopy(as<int>(local_row), as<int>(std::min(indices.size(), vals.size())), nnz_ret, vals.getRawPtr(), indices.getRawPtr()); TEUCHOS_TEST_FOR_EXCEPTION( rowmatrix_return_val != 0, std::runtime_error, "Epetra_RowMatrix object returned error code " << rowmatrix_return_val << " from ExtractMyRowCopy." ); nnz = as<size_t>(nnz_ret); // Epetra_CrsMatrix::ExtractMyRowCopy returns local column // indices, so transform these into global indices for( size_t i = 0; i < nnz; ++i ){ indices[i] = this->col_map_->getGlobalElement(indices[i]); } }
RCP<Epetra_CrsMatrix> UserInputForTests::getEpetraCrsMatrix() { if (M_.is_null()) throw std::runtime_error("could not read mtx file"); RCP<Epetra_CrsGraph> egraph = getEpetraCrsGraph(); eM_ = rcp(new Epetra_CrsMatrix(Copy, *egraph)); size_t maxRow = M_->getNodeMaxNumRowEntries(); int nrows = egraph->NumMyRows(); int base = egraph->IndexBase(); const Epetra_BlockMap &rowMap = egraph->RowMap(); const Epetra_BlockMap &colMap = egraph->ColMap(); Array<int> colGid(maxRow); for (int i=0; i < nrows; i++){ ArrayView<const int> colLid; ArrayView<const scalar_t> nz; M_->getLocalRowView(i+base, colLid, nz); size_t rowSize = colLid.size(); int rowGid = rowMap.GID(i+base); for (size_t j=0; j < rowSize; j++){ colGid[j] = colMap.GID(colLid[j]); } eM_->InsertGlobalValues( rowGid, rowSize, nz.getRawPtr(), colGid.getRawPtr()); } eM_->FillComplete(); return eM_; }
void EpetraCrsMatrixT<EpetraGlobalOrdinal>::getLocalRowCopy(LocalOrdinal LocalRow, const ArrayView<LocalOrdinal> &Indices, const ArrayView<Scalar> &Values, size_t &NumEntries) const { XPETRA_MONITOR("EpetraCrsMatrixT::getLocalRowCopy"); int numEntries = -1; XPETRA_ERR_CHECK(mtx_->ExtractMyRowCopy(LocalRow, Indices.size(), numEntries, Values.getRawPtr(), Indices.getRawPtr())); NumEntries = numEntries; }
template <class T> inline void CUDANodeMemoryModel::copyFromBuffer(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayView<T> &hostDest) { CHECK_COMPUTE_BUFFER(buffSrc); TEUCHOS_TEST_FOR_EXCEPTION( (size_t)buffSrc.size() < size, std::runtime_error, "CUDANodeMemoryModel::copyFromBuffer<" << Teuchos::TypeNameTraits<T>::name () << ">: invalid copy. Device source buffer has size " << buffSrc.size () << ", which is less than the requested copy size " << size << "."); TEUCHOS_TEST_FOR_EXCEPTION( (size_t)hostDest.size() < size, std::runtime_error, "CUDANodeMemoryModel::copyFromBuffer<" << Teuchos::TypeNameTraits<T>::name () << ">: invalid copy. Host destination buffer has size " << hostDest.size () << ", which is less than the requested copy size " << size << "."); #ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING ++numCopiesD2H_; bytesCopiedD2H_ += size*sizeof(T); #endif #ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_TRACE std::cerr << "copyFromBuffer<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl; #endif cudaError_t err = cudaMemcpy( hostDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToHost); TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error, "Kokkos::CUDANodeMemoryModel::copyFromBuffer<" << Teuchos::TypeNameTraits<T>::name () << ">(): cudaMemcpy() returned error: " << cudaGetErrorString (err) ); }
void MpiComm<Ordinal>::readySend( const ArrayView<const char> &sendBuffer, const int destRank ) const { TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::readySend(...)" ); #ifdef TEUCHOS_DEBUG TEST_FOR_EXCEPTION( ! ( 0 <= destRank && destRank < size_ ), std::logic_error ,"Error, destRank = " << destRank << " is not < 0 or is not" " in the range [0,"<<size_-1<<"]!" ); #endif // TEUCHOS_DEBUG #ifdef TEUCHOS_MPI_COMM_DUMP if(show_dump) { dumpBuffer<Ordinal,char>( "Teuchos::MpiComm<Ordinal>::readySend(...)" ,"sendBuffer", bytes, sendBuffer ); } #endif // TEUCHOS_MPI_COMM_DUMP MPI_Rsend( const_cast<char*>(sendBuffer.getRawPtr()),sendBuffer.size(),MPI_CHAR,destRank,tag_,*rawMpiComm_ ); // ToDo: What about error handling??? }
REFCOUNTPTR_INLINE Teuchos::ArrayView<T2> Teuchos::av_const_cast(const ArrayView<T1>& p1) { T2 *ptr2 = const_cast<T2*>(p1.getRawPtr()); return ArrayView<T2>(ptr2, p1.size()); // Note: Above is just fine even if p1.get()==NULL! }
int order(const RCP<OrderingSolution<typename Adapter::lno_t, typename Adapter::gno_t> > &solution) { #ifndef HAVE_ZOLTAN2_AMD throw std::runtime_error( "BUILD ERROR: AMD requested but not compiled into Zoltan2.\n" "Please set CMake flag Zoltan2_ENABLE_AMD:BOOL=ON."); #else typedef typename Adapter::lno_t lno_t; typedef typename Adapter::scalar_t scalar_t; int ierr= 0; const size_t nVtx = model->getLocalNumVertices(); //cout << "Local num vertices" << nVtx << endl; ArrayView<const gno_t> edgeIds; ArrayView<const lno_t> offsets; ArrayView<StridedData<lno_t, scalar_t> > wgts; // wgts are ignored in AMD model->getEdgeList(edgeIds, offsets, wgts); AMDTraits<lno_t> AMDobj; double Control[AMD_CONTROL]; double Info[AMD_INFO]; amd_defaults(Control); amd_control(Control); lno_t *perm; perm = (lno_t *) (solution->getPermutationRCP().getRawPtr()); lno_t result = AMDobj.order(nVtx, offsets.getRawPtr(), edgeIds.getRawPtr(), perm, Control, Info); if (result != AMD_OK && result != AMD_OK_BUT_JUMBLED) ierr = -1; solution->setHavePerm(true); return ierr; #endif }
void GlobalMPISession::allGather(int localVal, const ArrayView<int> &allVals) { justInTimeInitialize(); TEUCHOS_ASSERT_EQUALITY(allVals.size(), getNProc()); #ifdef HAVE_MPI MPI_Allgather( &localVal, 1, MPI_INT, allVals.getRawPtr(), 1, MPI_INT, MPI_COMM_WORLD); #else allVals[0] = localVal; #endif }
void ArrayView<T>::assign(const ArrayView<const T>& array) const { debug_assert_valid_ptr(); debug_assert_not_null(); if (this->getRawPtr()==array.getRawPtr() && this->size()==array.size()) return; // Assignment to self debug_assert_in_range(0,array.size()); std::copy( array.begin(), array.end(), this->begin() ); // Note: Above, in debug mode, the iterators are range checked! In // optimized mode, these are raw pointers which should run very fast! }
void EpetraCrsMatrixT<EpetraGlobalOrdinal>::replaceLocalValues(LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &indices, const ArrayView< const Scalar > &values) { XPETRA_MONITOR("EpetraCrsMatrixT::replaceLocalValues"); { const std::string tfecfFuncName("replaceLocalValues"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive(), std::runtime_error, ": Fill must be active in order to call this method. If you have already " "called fillComplete(), you need to call resumeFill() before you can " "replace values."); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(), std::runtime_error, ": values.size() must equal indices.size()."); } XPETRA_ERR_CHECK(mtx_->ReplaceMyValues(localRow, indices.size(), values.getRawPtr(), indices.getRawPtr())); }
REFCOUNTPTR_INLINE Teuchos::ArrayView<T2> Teuchos::av_reinterpret_cast(const ArrayView<T1>& p1) { typedef typename ArrayView<T1>::size_type size_type; const int sizeOfT1 = sizeof(T1); const int sizeOfT2 = sizeof(T2); size_type size2 = (p1.size()*sizeOfT1) / sizeOfT2; T2 *ptr2 = reinterpret_cast<T2*>(p1.getRawPtr()); return ArrayView<T2>( ptr2, size2 #ifdef HAVE_TEUCHOS_ARRAY_BOUNDSCHECK ,arcp_reinterpret_cast<T2>(p1.access_private_arcp()) #endif ); // Note: Above is just fine even if p1.get()==NULL! }
static inline void ASSIGN_SCOTCH_NUM_ARRAY( SCOTCH_Num **a, ArrayView<const SCOTCH_Num> &b, const RCP<const Environment> &env) { if (b.size() > 0) *a = const_cast<SCOTCH_Num *> (b.getRawPtr()); else { *a = NULL; // Note: the Scotch manual says that if any rank has a non-NULL array, // every process must have a non-NULL array. In practice, // however, this condition is not needed for the arrays we use. // For now, we'll set these arrays to NULL, because if we // allocated a dummy value here, we'll have to track whether or // not we can free it. KDD 1/23/14 } }
RCP<CommRequest> MpiComm<Ordinal>::ireceive( const ArrayView<char> &recvBuffer, const int sourceRank ) const { TEUCHOS_COMM_TIME_MONITOR( "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::ireceive(...)" ); #ifdef TEUCHOS_DEBUG assertRank(sourceRank, "sourceRank"); #endif // TEUCHOS_DEBUG MPI_Request rawMpiRequest = MPI_REQUEST_NULL; MPI_Irecv( const_cast<char*>(recvBuffer.getRawPtr()), recvBuffer.size(), MPI_CHAR, sourceRank, tag_, *rawMpiComm_, &rawMpiRequest ); return mpiCommRequest(rawMpiRequest); // ToDo: What about MPI error handling??? }
RCP<Epetra_CrsGraph> UserInputForTests::getEpetraCrsGraph() { if (M_.is_null()) throw std::runtime_error("could not read mtx file"); RCP<const tcrsGraph_t> tgraph = M_->getCrsGraph(); RCP<const Tpetra::Map<lno_t, gno_t> > trowMap = tgraph->getRowMap(); RCP<const Tpetra::Map<lno_t, gno_t> > tcolMap = tgraph->getColMap(); int nElts = static_cast<int>(trowMap->getGlobalNumElements()); int nMyElts = static_cast<int>(trowMap->getNodeNumElements()); int base = trowMap->getIndexBase(); ArrayView<const int> gids = trowMap->getNodeElementList(); Epetra_BlockMap erowMap(nElts, nMyElts, gids.getRawPtr(), 1, base, *ecomm_); Array<int> rowSize(nMyElts); for (int i=0; i < nMyElts; i++){ rowSize[i] = static_cast<int>(M_->getNumEntriesInLocalRow(i+base)); } size_t maxRow = M_->getNodeMaxNumRowEntries(); Array<int> colGids(maxRow); ArrayView<const int> colLid; eG_ = rcp(new Epetra_CrsGraph(Copy, erowMap, rowSize.getRawPtr(), true)); for (int i=0; i < nMyElts; i++){ tgraph->getLocalRowView(i+base, colLid); for (int j=0; j < colLid.size(); j++) colGids[j] = tcolMap->getGlobalElement(colLid[j]); eG_->InsertGlobalIndices(gids[i], rowSize[i], colGids.getRawPtr()); } eG_->FillComplete(); return eG_; }
void globalWeightedCutsMessagesHopsByPart( const RCP<const Environment> &env, const RCP<const Comm<int> > &comm, const RCP<const GraphModel<typename Adapter::base_adapter_t> > &graph, const ArrayView<const typename Adapter::part_t> &parts, typename Adapter::part_t &numParts, ArrayRCP<RCP<BaseClassMetrics<typename Adapter::scalar_t> > > &metrics, ArrayRCP<typename Adapter::scalar_t> &globalSums, const RCP <const MachineRep> machine) { env->debug(DETAILED_STATUS, "Entering globalWeightedCutsMessagesHopsByPart"); ////////////////////////////////////////////////////////// // Initialize return values typedef typename Adapter::lno_t t_lno_t; typedef typename Adapter::gno_t t_gno_t; typedef typename Adapter::scalar_t t_scalar_t; typedef typename Adapter::part_t part_t; typedef typename Adapter::node_t t_node_t; typedef typename Zoltan2::GraphModel<typename Adapter::base_adapter_t>::input_t t_input_t; t_lno_t localNumVertices = graph->getLocalNumVertices(); t_gno_t globalNumVertices = graph->getGlobalNumVertices(); t_lno_t localNumEdges = graph->getLocalNumEdges(); ArrayView<const t_gno_t> Ids; ArrayView<t_input_t> v_wghts; graph->getVertexList(Ids, v_wghts); typedef GraphMetrics<t_scalar_t> mv_t; //get the edge ids, and weights ArrayView<const t_gno_t> edgeIds; ArrayView<const t_lno_t> offsets; ArrayView<t_input_t> e_wgts; graph->getEdgeList(edgeIds, offsets, e_wgts); std::vector <t_scalar_t> edge_weights; int numWeightPerEdge = graph->getNumWeightsPerEdge(); int numMetrics = 4; // "edge cuts", messages, hops, weighted hops if (numWeightPerEdge) numMetrics += numWeightPerEdge * 2; // "weight n", weighted hops per weight n // add some more metrics to the array typedef typename ArrayRCP<RCP<BaseClassMetrics<typename Adapter::scalar_t> > >::size_type array_size_type; metrics.resize( metrics.size() + numMetrics ); for( array_size_type n = metrics.size() - numMetrics; n < metrics.size(); ++n ){ mv_t * newMetric = new mv_t; // allocate the new memory env->localMemoryAssertion(__FILE__,__LINE__,1,newMetric); // check errors metrics[n] = rcp( newMetric); // create the new members } array_size_type next = metrics.size() - numMetrics; // MDM - this is most likely temporary to preserve the format here - we are now filling a larger array so we may not have started at 0 std::vector <part_t> e_parts (localNumEdges); #ifdef HAVE_ZOLTAN2_MPI if (comm->getSize() > 1) { Zoltan_DD_Struct *dd = NULL; MPI_Comm mpicomm = Teuchos::getRawMpiComm(*comm); int size_gnot = Zoltan2::TPL_Traits<ZOLTAN_ID_PTR, t_gno_t>::NUM_ID; int debug_level = 0; Zoltan_DD_Create(&dd, mpicomm, size_gnot, 0, sizeof(part_t), localNumVertices, debug_level); ZOLTAN_ID_PTR ddnotneeded = NULL; // Local IDs not needed Zoltan_DD_Update( dd, (ZOLTAN_ID_PTR) Ids.getRawPtr(), ddnotneeded, (char *) &(parts[0]), NULL, int(localNumVertices)); Zoltan_DD_Find( dd, (ZOLTAN_ID_PTR) edgeIds.getRawPtr(), ddnotneeded, (char *)&(e_parts[0]), NULL, localNumEdges, NULL ); Zoltan_DD_Destroy(&dd); } else #endif { std::map<t_gno_t,t_lno_t> global_id_to_local_index; //else everything is local. //we need a globalid to local index conversion. //this does not exists till this point, so we need to create one. for (t_lno_t i = 0; i < localNumVertices; ++i){ //at the local index i, we have the global index Ids[i]. //so write i, to Ids[i] index of the vector. global_id_to_local_index[Ids[i]] = i; } for (t_lno_t i = 0; i < localNumEdges; ++i){ t_gno_t ei = edgeIds[i]; //ei is the global index of the neighbor one. part_t p = parts[global_id_to_local_index[ei]]; e_parts[i] = p; } } RCP<const Teuchos::Comm<int> > tcomm = comm; env->timerStart(MACRO_TIMERS, "Communication Graph Create"); { //get the vertices in each part in my part. std::vector <t_lno_t> part_begins(numParts, -1); std::vector <t_lno_t> part_nexts(localNumVertices, -1); //cluster vertices according to their parts. //create local part graph. for (t_lno_t i = 0; i < localNumVertices; ++i){ part_t ap = parts[i]; part_nexts[i] = part_begins[ap]; part_begins[ap] = i; } for (int weight_index = -1; weight_index < numWeightPerEdge ; ++weight_index){ //MD: these two should be part_t. //but we dont want to compile tpetra from the beginning. //This can be changed when directory is updated. typedef t_lno_t local_part_type; typedef t_gno_t global_part_type; typedef Tpetra::Map<local_part_type, global_part_type, t_node_t> map_t; Teuchos::RCP<const map_t> map = Teuchos::rcp (new map_t (numParts, 0, tcomm)); typedef Tpetra::CrsMatrix<t_scalar_t, local_part_type, global_part_type, t_node_t> tcrsMatrix_t; Teuchos::RCP<tcrsMatrix_t> tMatrix(new tcrsMatrix_t (map, 0)); std::vector <global_part_type> part_neighbors (numParts); std::vector <t_scalar_t> part_neighbor_weights(numParts, 0); std::vector <t_scalar_t> part_neighbor_weights_ordered(numParts); //coarsen for all vertices in my part in order with parts. for (global_part_type i = 0; i < (global_part_type) numParts; ++i){ part_t num_neighbor_parts = 0; t_lno_t v = part_begins[i]; //get part i, and first vertex in this part v. while (v != -1){ //now get the neightbors of v. for (t_lno_t j = offsets[v]; j < offsets[v+1]; ++j){ //get the part of the second vertex. part_t ep = e_parts[j]; t_scalar_t ew = 1; if (weight_index > -1){ ew = e_wgts[weight_index][j]; } //add it to my local part neighbors for part i. if (part_neighbor_weights[ep] < 0.00001){ part_neighbors[num_neighbor_parts++] = ep; } part_neighbor_weights[ep] += ew; } v = part_nexts[v]; } //now get the part list. for (t_lno_t j = 0; j < num_neighbor_parts; ++j){ part_t neighbor_part = part_neighbors[j]; part_neighbor_weights_ordered[j] = part_neighbor_weights[neighbor_part]; part_neighbor_weights[neighbor_part] = 0; } //insert it to tpetra crsmatrix. if (num_neighbor_parts > 0){ Teuchos::ArrayView<const global_part_type> destinations(&(part_neighbors[0]), num_neighbor_parts); Teuchos::ArrayView<const t_scalar_t> vals(&(part_neighbor_weights_ordered[0]), num_neighbor_parts); tMatrix->insertGlobalValues (i,destinations, vals); } } tMatrix->fillComplete (); local_part_type num_local_parts = map->getNodeNumElements(); Array<global_part_type> Indices; Array<t_scalar_t> Values; t_scalar_t max_edge_cut = 0; t_scalar_t total_edge_cut = 0; global_part_type max_message = 0; global_part_type total_message = 0; global_part_type total_hop_count = 0; t_scalar_t total_weighted_hop_count = 0; global_part_type max_hop_count = 0; t_scalar_t max_weighted_hop_count = 0; for (local_part_type i=0; i < num_local_parts; i++) { const global_part_type globalRow = map->getGlobalElement(i); size_t NumEntries = tMatrix->getNumEntriesInGlobalRow (globalRow); Indices.resize (NumEntries); Values.resize (NumEntries); tMatrix->getGlobalRowCopy (globalRow,Indices(),Values(),NumEntries); t_scalar_t part_edge_cut = 0; global_part_type part_messages = 0; for (size_t j=0; j < NumEntries; j++){ if (Indices[j] != globalRow){ part_edge_cut += Values[j]; part_messages += 1; typename MachineRep::machine_pcoord_t hop_count = 0; machine->getHopCount(globalRow, Indices[j], hop_count); global_part_type hop_counts = hop_count; t_scalar_t weighted_hop_counts = hop_count * Values[j]; total_hop_count += hop_counts; total_weighted_hop_count += weighted_hop_counts; if (hop_counts > max_hop_count ){ max_hop_count = hop_counts; } if (weighted_hop_counts > max_weighted_hop_count ){ max_weighted_hop_count = weighted_hop_counts; } } } if (part_edge_cut > max_edge_cut){ max_edge_cut = part_edge_cut; } total_edge_cut += part_edge_cut; if (part_messages > max_message){ max_message = part_messages; } total_message += part_messages; } t_scalar_t g_max_edge_cut = 0; t_scalar_t g_total_edge_cut = 0; global_part_type g_max_message = 0; global_part_type g_total_message = 0; global_part_type g_total_hop_count = 0; t_scalar_t g_total_weighted_hop_count = 0; global_part_type g_max_hop_count = 0; t_scalar_t g_max_weighted_hop_count = 0; try{ Teuchos::reduceAll<int, t_scalar_t>(*comm,Teuchos::REDUCE_MAX,1,&max_edge_cut,&g_max_edge_cut); Teuchos::reduceAll<int, global_part_type>(*comm,Teuchos::REDUCE_MAX,1,&max_message,&g_max_message); Teuchos::reduceAll<int, global_part_type>(*comm,Teuchos::REDUCE_MAX,1,&max_hop_count,&g_max_hop_count); Teuchos::reduceAll<int, t_scalar_t>(*comm,Teuchos::REDUCE_MAX,1,&max_weighted_hop_count,&g_max_weighted_hop_count); Teuchos::reduceAll<int, t_scalar_t>(*comm,Teuchos::REDUCE_SUM,1,&total_edge_cut,&g_total_edge_cut); Teuchos::reduceAll<int, global_part_type>(*comm,Teuchos::REDUCE_SUM,1,&total_message,&g_total_message); Teuchos::reduceAll<int, global_part_type>(*comm,Teuchos::REDUCE_SUM,1,&total_hop_count,&g_total_hop_count); Teuchos::reduceAll<int, t_scalar_t>(*comm,Teuchos::REDUCE_SUM,1,&total_weighted_hop_count,&g_total_weighted_hop_count); } Z2_THROW_OUTSIDE_ERROR(*env); if (weight_index == -1){ metrics[next]->setName("md edge cuts"); } else { std::ostringstream oss; oss << "md weight " << weight_index; metrics[next]->setName( oss.str()); } metrics[next]->setMetricValue("global maximum", g_max_edge_cut); metrics[next]->setMetricValue("global sum", g_total_edge_cut); next++; if (weight_index == -1){ metrics[next]->setName("message"); metrics[next]->setMetricValue("global maximum", g_max_message); metrics[next]->setMetricValue("global sum", g_total_message); next++; } if (weight_index == -1){ metrics[next]->setName("hops"); metrics[next]->setMetricValue("global maximum", g_max_hop_count); metrics[next]->setMetricValue("global sum", g_total_hop_count); next++; } std::ostringstream oss; oss << "weighted hops" << weight_index; metrics[next]->setName( oss.str()); metrics[next]->setMetricValue("global maximum", g_max_weighted_hop_count); metrics[next]->setMetricValue("global sum", g_total_weighted_hop_count); next++; } } env->timerStop(MACRO_TIMERS, "Communication Graph Create"); env->debug(DETAILED_STATUS, "Exiting globalWeightedCutsMessagesHopsByPart"); }
void Export<LocalOrdinal,GlobalOrdinal,Node>:: setupSamePermuteExport (Teuchos::Array<GlobalOrdinal>& exportGIDs) { using Teuchos::arcp; using Teuchos::Array; using Teuchos::ArrayRCP; using Teuchos::ArrayView; using Teuchos::as; using Teuchos::null; typedef LocalOrdinal LO; typedef GlobalOrdinal GO; typedef typename ArrayView<const GO>::size_type size_type; const Map<LO,GO,Node>& source = * (getSourceMap ()); const Map<LO,GO,Node>& target = * (getTargetMap ()); ArrayView<const GO> sourceGIDs = source.getNodeElementList (); ArrayView<const GO> targetGIDs = target.getNodeElementList (); #ifdef HAVE_TPETRA_DEBUG ArrayView<const GO> rawSrcGids = sourceGIDs; ArrayView<const GO> rawTgtGids = targetGIDs; #else const GO* const rawSrcGids = sourceGIDs.getRawPtr (); const GO* const rawTgtGids = targetGIDs.getRawPtr (); #endif // HAVE_TPETRA_DEBUG const size_type numSrcGids = sourceGIDs.size (); const size_type numTgtGids = targetGIDs.size (); const size_type numGids = std::min (numSrcGids, numTgtGids); // Compute numSameIDs_: the number of initial GIDs that are the // same (and occur in the same order) in both Maps. The point of // numSameIDs_ is for the common case of an Export where all the // overlapping GIDs are at the end of the source Map, but // otherwise the source and target Maps are the same. This allows // a fast contiguous copy for the initial "same IDs." size_type numSameGids = 0; for ( ; numSameGids < numGids && rawSrcGids[numSameGids] == rawTgtGids[numSameGids]; ++numSameGids) {} // third clause of 'for' does everything ExportData_->numSameIDs_ = numSameGids; // Compute permuteToLIDs_, permuteFromLIDs_, exportGIDs, and // exportLIDs_. The first two arrays are IDs to be permuted, and // the latter two arrays are IDs to sent out ("exported"), called // "export" IDs. // // IDs to permute are in both the source and target Maps, which // means we don't have to send or receive them, but we do have to // rearrange (permute) them in general. IDs to send are in the // source Map, but not in the target Map. exportGIDs.resize (0); Array<LO>& permuteToLIDs = ExportData_->permuteToLIDs_; Array<LO>& permuteFromLIDs = ExportData_->permuteFromLIDs_; Array<LO>& exportLIDs = ExportData_->exportLIDs_; const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid (); const LO numSrcLids = as<LO> (numSrcGids); // Iterate over the source Map's LIDs, since we only need to do // GID -> LID lookups for the target Map. for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) { const GO curSrcGid = rawSrcGids[srcLid]; // getLocalElement() returns LINVALID if the GID isn't in the target Map. // This saves us a lookup (which isNodeGlobalElement() would do). const LO tgtLid = target.getLocalElement (curSrcGid); if (tgtLid != LINVALID) { // if target.isNodeGlobalElement (curSrcGid) permuteToLIDs.push_back (tgtLid); permuteFromLIDs.push_back (srcLid); } else { exportGIDs.push_back (curSrcGid); exportLIDs.push_back (srcLid); } } // exportLIDs_ is the list of this process' LIDs that it has to // send out. Since this is an Export, and therefore the target // Map is nonoverlapping, we know that each export LID only needs // to be sent to one process. However, the source Map may be // overlapping, so multiple processes might send to the same LID // on a receiving process. TPETRA_ABUSE_WARNING( getNumExportIDs() > 0 && ! source.isDistributed(), std::runtime_error, "::setupSamePermuteExport(): Source has export LIDs but Source is not " "distributed globally." << std::endl << "Exporting to a submap of the target map."); // Compute exportPIDs_ ("outgoing" process IDs). // // For each GID in exportGIDs (GIDs to which this process must // send), find its corresponding owning process (a.k.a. "image") // ID in the target Map. Store these process IDs in // exportPIDs_. These are the process IDs to which the Export // needs to send data. // // We only need to do this if the source Map is distributed; // otherwise, the Export doesn't have to perform any // communication. if (source.isDistributed ()) { ExportData_->exportPIDs_.resize(exportGIDs.size ()); // This call will assign any GID in the target Map with no // corresponding process ID a fake process ID of -1. We'll use // this below to remove exports for processses that don't exist. const LookupStatus lookup = target.getRemoteIndexList (exportGIDs(), ExportData_->exportPIDs_ ()); TPETRA_ABUSE_WARNING( lookup == IDNotPresent, std::runtime_error, "::setupSamePermuteExport(): The source Map has GIDs not found " "in the target Map."); // Get rid of process IDs not in the target Map. This prevents // exporting to GIDs which don't belong to any process in the // target Map. if (lookup == IDNotPresent) { const size_type numInvalidExports = std::count_if (ExportData_->exportPIDs_().begin(), ExportData_->exportPIDs_().end(), std::bind1st (std::equal_to<int>(), -1)); // count number of valid and total number of exports const size_type totalNumExports = ExportData_->exportPIDs_.size(); if (numInvalidExports == totalNumExports) { // all exports are invalid; we have no exports; we can delete all exports exportGIDs.resize(0); ExportData_->exportLIDs_.resize(0); ExportData_->exportPIDs_.resize(0); } else { // some exports are valid; we need to keep the valid exports // pack and resize size_type numValidExports = 0; for (size_type e = 0; e < totalNumExports; ++e) { if (ExportData_->exportPIDs_[e] != -1) { exportGIDs[numValidExports] = exportGIDs[e]; ExportData_->exportLIDs_[numValidExports] = ExportData_->exportLIDs_[e]; ExportData_->exportPIDs_[numValidExports] = ExportData_->exportPIDs_[e]; ++numValidExports; } } exportGIDs.resize (numValidExports); ExportData_->exportLIDs_.resize (numValidExports); ExportData_->exportPIDs_.resize (numValidExports); } } } } // setupSamePermuteExport()
size_t removeUndesiredEdges( const RCP<const Environment> &env, int myRank, bool removeSelfEdges, bool removeOffProcessEdges, bool removeOffGroupEdges, ArrayView<const typename InputTraits<User>::zgid_t> &gids, ArrayView<const typename InputTraits<User>::zgid_t> &gidNbors, ArrayView<const int> &procIds, ArrayView<StridedData<typename InputTraits<User>::lno_t, typename InputTraits<User>::scalar_t> > &edgeWeights, ArrayView<const typename InputTraits<User>::lno_t> &offsets, ArrayRCP<const typename InputTraits<User>::zgid_t> &newGidNbors, // out typename InputTraits<User>::scalar_t **&newWeights, // out ArrayRCP<const typename InputTraits<User>::lno_t> &newOffsets) // out { typedef typename InputTraits<User>::zgid_t zgid_t; typedef typename InputTraits<User>::scalar_t scalar_t; typedef typename InputTraits<User>::lno_t lno_t; size_t numKeep = 0; size_t numVtx = offsets.size() - 1; size_t numNbors = gidNbors.size(); env->localInputAssertion(__FILE__, __LINE__, "need more input", (!removeSelfEdges || gids.size() >= static_cast<typename ArrayView<const zgid_t>::size_type>(numVtx)) && (!removeOffProcessEdges || procIds.size() >= static_cast<typename ArrayView<const int>::size_type>(numNbors)) && (!removeOffGroupEdges || procIds.size() >= static_cast<typename ArrayView<const int>::size_type>(numNbors)), BASIC_ASSERTION); // initialize edge weight array newWeights = NULL; int eDim = edgeWeights.size(); // count desired edges lno_t *offs = new lno_t [numVtx + 1]; env->localMemoryAssertion(__FILE__, __LINE__, numVtx+1, offs); for (size_t i = 0; i < numVtx+1; i++) offs[i] = 0; ArrayRCP<const lno_t> offArray = arcp(offs, 0, numVtx+1, true); const lno_t *allOffs = offsets.getRawPtr(); const zgid_t *allIds = gidNbors.getRawPtr(); const zgid_t *vtx = NULL; const int *proc = NULL; if (gids.size() > 0) vtx = gids.getRawPtr(); if (procIds.size() > 0) proc = procIds.getRawPtr(); offs[0] = 0; for (size_t i=0; i < numVtx; i++){ offs[i+1] = 0; zgid_t vid = vtx ? vtx[i] : zgid_t(0); for (lno_t j=allOffs[i]; j < allOffs[i+1]; j++){ int owner = proc ? proc[j] : 0; bool keep = (!removeSelfEdges || vid != allIds[j]) && (!removeOffProcessEdges || owner == myRank) && (!removeOffGroupEdges || owner >= 0); if (keep) offs[i+1]++; } } // from counters to offsets for (size_t i=1; i < numVtx; i++) offs[i+1] += offs[i]; numKeep = offs[numVtx]; // do we need a new neighbor list? if (numNbors == numKeep){ newGidNbors = Teuchos::arcpFromArrayView(gidNbors); newOffsets = Teuchos::arcpFromArrayView(offsets); return numNbors; } else if (numKeep == 0){ newGidNbors = ArrayRCP<const zgid_t>(Teuchos::null); newOffsets = offArray; return 0; } // Build the subset neighbor lists (id, weight, and offset). zgid_t *newGids = new zgid_t [numKeep]; env->localMemoryAssertion(__FILE__, __LINE__, numKeep, newGids); newGidNbors = arcp(newGids, 0, numKeep, true); newOffsets = offArray; if (eDim > 0){ newWeights = new scalar_t * [eDim]; env->localMemoryAssertion(__FILE__, __LINE__, eDim, newWeights); if (numKeep) { for (int w=0; w < eDim; w++){ newWeights[w] = new scalar_t [numKeep]; env->localMemoryAssertion(__FILE__, __LINE__, numKeep, newWeights[w]); } } else { for (int w=0; w < eDim; w++) newWeights[w] = NULL; } } size_t next = 0; for (size_t i=0; i < numVtx && next < numKeep; i++){ zgid_t vid = vtx ? vtx[i] : zgid_t(0); for (lno_t j=allOffs[i]; j < allOffs[i+1]; j++){ int owner = proc ? proc[j] : 0; bool keep = (!removeSelfEdges || vid != allIds[j]) && (!removeOffProcessEdges || owner == myRank) && (!removeOffGroupEdges || owner >= 0); if (keep){ newGids[next] = allIds[j]; for (int w=0; w < eDim; w++){ newWeights[w][next] = edgeWeights[w][j]; } next++; if (next == numKeep) break; } // if (keep) } } return numKeep; }
void testIdentifierModel(std::string fname, zgno_t xdim, zgno_t ydim, zgno_t zdim, const RCP<const Comm<int> > &comm, bool consecutiveIds) { int rank = comm->getRank(); int fail = 0, gfail = 0; std::bitset<Zoltan2::NUM_MODEL_FLAGS> modelFlags = 0; if (consecutiveIds) modelFlags.set(Zoltan2::IDS_MUST_BE_GLOBALLY_CONSECUTIVE); RCP<const Zoltan2::Environment> env = rcp(new Zoltan2::Environment); ////////////////////////////////////////////////////////////// // Use an Tpetra::CrsMatrix for the user data. ////////////////////////////////////////////////////////////// typedef Tpetra::CrsMatrix<zscalar_t, zlno_t, zgno_t> tcrsMatrix_t; UserInputForTests *uinput; if (fname.size() > 0) uinput = new UserInputForTests(testDataFilePath, fname, comm, true); else uinput = new UserInputForTests(xdim,ydim,zdim,string(""),comm, true, true); RCP<tcrsMatrix_t > M = uinput->getUITpetraCrsMatrix(); zlno_t nLocalIds = M->getNodeNumRows(); zgno_t nGlobalIds = M->getGlobalNumRows(); ArrayView<const zgno_t> idList = M->getRowMap()->getNodeElementList(); std::set<zgno_t> idSet(idList.begin(), idList.end()); ////////////////////////////////////////////////////////////// // Create an IdentifierModel with this input ////////////////////////////////////////////////////////////// typedef Zoltan2::XpetraCrsMatrixAdapter<tcrsMatrix_t> adapter_t; typedef Zoltan2::MatrixAdapter<tcrsMatrix_t> base_adapter_t; typedef Zoltan2::StridedData<zlno_t, zscalar_t> input_t; RCP<const adapter_t> ia = Teuchos::rcp(new adapter_t(M)); Zoltan2::IdentifierModel<base_adapter_t> *model = NULL; RCP<const base_adapter_t> base_ia = Teuchos::rcp_dynamic_cast<const base_adapter_t>(ia); try { model = new Zoltan2::IdentifierModel<base_adapter_t>( base_ia, env, comm, modelFlags); } catch (std::exception &e) { std::cerr << rank << ") " << e.what() << std::endl; fail = 1; } gfail = globalFail(comm, fail); if (gfail) printFailureCode(comm, fail); // Test the IdentifierModel interface if (model->getLocalNumIdentifiers() != size_t(nLocalIds)) { std::cerr << rank << ") getLocalNumIdentifiers " << model->getLocalNumIdentifiers() << " " << nLocalIds << std::endl; fail = 2; } if (!fail && model->getGlobalNumIdentifiers() != size_t(nGlobalIds)) { std::cerr << rank << ") getGlobalNumIdentifiers " << model->getGlobalNumIdentifiers() << " " << nGlobalIds << std::endl; fail = 3; } gfail = globalFail(comm, fail); if (gfail) printFailureCode(comm, fail); ArrayView<const zgno_t> gids; ArrayView<input_t> wgts; model->getIdentifierList(gids, wgts); if (!fail && gids.size() != nLocalIds) { std::cerr << rank << ") getIdentifierList IDs " << gids.size() << " " << nLocalIds << std::endl; fail = 5; } if (!fail && wgts.size() != 0) { std::cerr << rank << ") getIdentifierList Weights " << wgts.size() << " " << 0 << std::endl; fail = 6; } for (zlno_t i=0; !fail && i < nLocalIds; i++) { std::set<zgno_t>::iterator next = idSet.find(gids[i]); if (next == idSet.end()) { std::cerr << rank << ") getIdentifierList gid not found " << gids[i] << std::endl; fail = 7; } } if (!fail && consecutiveIds) { bool inARow = Zoltan2::IdentifierTraits<zgno_t>::areConsecutive( gids.getRawPtr(), nLocalIds); if (!inARow) { std::cerr << rank << ") getIdentifierList not consecutive " << std::endl; fail = 8; } } gfail = globalFail(comm, fail); if (gfail) printFailureCode(comm, fail); delete model; delete uinput; }
void testCoordinateModel(std::string &fname, int nWeights, const RCP<const Comm<int> > &comm, bool consecutiveIds, bool nodeZeroHasAll, bool printInfo) { int fail = 0, gfail = 0; if (printInfo){ cout << "Test: " << fname << endl; cout << "Num Weights: " << nWeights; cout << " want consec ids: " << consecutiveIds; cout << " proc 0 has all: " << nodeZeroHasAll; cout << endl; } ////////////////////////////////////////////////////////////// // Input data ////////////////////////////////////////////////////////////// typedef Tpetra::MultiVector<scalar_t, lno_t, gno_t, node_t> mv_t; RCP<UserInputForTests> uinput; try{ uinput = rcp(new UserInputForTests(testDataFilePath, fname, comm, true)); } catch(std::exception &e){ fail=1; } TEST_FAIL_AND_EXIT(*comm, !fail, "input constructor", 1); RCP<mv_t> coords; try{ coords = uinput->getUICoordinates(); } catch(std::exception &e){ fail=2; } TEST_FAIL_AND_EXIT(*comm, !fail, "getting coordinates", 1); int coordDim = coords->getNumVectors(); TEST_FAIL_AND_EXIT(*comm, coordDim <= 3, "dim 3 at most", 1); const scalar_t *x=NULL, *y=NULL, *z=NULL; x = coords->getData(0).getRawPtr(); if (coordDim > 1){ y = coords->getData(1).getRawPtr(); if (coordDim > 2) z = coords->getData(2).getRawPtr(); } // Are these coordinates correct int nLocalIds = coords->getLocalLength(); ArrayView<const gno_t> idList = coords->getMap()->getNodeElementList(); int nGlobalIds = 0; if (nodeZeroHasAll){ if (comm->getRank() > 0){ x = y = z = NULL; nLocalIds = 0; } else{ nGlobalIds = nLocalIds; } Teuchos::broadcast<int, int>(*comm, 0, &nGlobalIds); } else{ nGlobalIds = coords->getGlobalLength(); } Array<ArrayRCP<const scalar_t> > coordWeights(nWeights); if (nLocalIds > 0){ for (int wdim=0; wdim < nWeights; wdim++){ scalar_t *w = new scalar_t [nLocalIds]; for (int i=0; i < nLocalIds; i++){ w[i] = ((i%2) + 1) + wdim; } coordWeights[wdim] = Teuchos::arcp(w, 0, nLocalIds); } } ////////////////////////////////////////////////////////////// // Create a BasicVectorAdapter adapter object. ////////////////////////////////////////////////////////////// typedef Zoltan2::BasicVectorAdapter<mv_t> ia_t; typedef Zoltan2::VectorAdapter<mv_t> base_ia_t; RCP<ia_t> ia; if (nWeights == 0){ // use the simpler constructor try{ ia = rcp(new ia_t(nLocalIds, idList.getRawPtr(), x, y, z)); } catch(std::exception &e){ fail=3; } } else{ std::vector<const scalar_t *> values, weights; std::vector<int> valueStrides, weightStrides; // default is 1 values.push_back(x); if (y) { values.push_back(y); if (z) values.push_back(z); } for (int wdim=0; wdim < nWeights; wdim++){ weights.push_back(coordWeights[wdim].getRawPtr()); } try{ ia = rcp(new ia_t(nLocalIds, idList.getRawPtr(), values, valueStrides, weights, weightStrides)); } catch(std::exception &e){ fail=4; } } RCP<base_ia_t> base_ia = Teuchos::rcp_implicit_cast<base_ia_t>(ia); TEST_FAIL_AND_EXIT(*comm, !fail, "making input adapter", 1); ////////////////////////////////////////////////////////////// // Create an CoordinateModel with this input ////////////////////////////////////////////////////////////// typedef Zoltan2::StridedData<lno_t, scalar_t> input_t; typedef std::bitset<Zoltan2::NUM_MODEL_FLAGS> modelFlags_t; typedef Zoltan2::CoordinateModel<base_ia_t> model_t; modelFlags_t modelFlags; if (consecutiveIds) modelFlags.set(Zoltan2::IDS_MUST_BE_GLOBALLY_CONSECUTIVE); RCP<const Zoltan2::Environment> env = rcp(new Zoltan2::Environment); RCP<model_t> model; try{ model = rcp(new model_t(base_ia.getRawPtr(), env, comm, modelFlags)); } catch (std::exception &e){ fail = 5; } TEST_FAIL_AND_EXIT(*comm, !fail, "making model", 1); // Test the CoordinateModel interface if (model->getCoordinateDim() != coordDim) fail = 6; if (!fail && model->getLocalNumCoordinates() != size_t(nLocalIds)) fail = 7; if (!fail && model->getGlobalNumCoordinates() != size_t(nGlobalIds)) fail = 8; if (!fail && model->getNumWeightsPerCoordinate() != nWeights) fail = 9; gfail = globalFail(comm, fail); if (gfail) printFailureCode(comm, fail); ArrayView<const gno_t> gids; ArrayView<input_t> xyz; ArrayView<input_t> wgts; model->getCoordinates(gids, xyz, wgts); if (!fail && gids.size() != nLocalIds) fail = 10; for (int i=0; !fail && i < nLocalIds; i++){ if (gids[i] != idList[i]) fail = 11; } if (!fail && wgts.size() != nWeights) fail = 12; const scalar_t *vals[3] = {x, y, z}; for (int dim=0; !fail && dim < coordDim; dim++){ for (int i=0; !fail && i < nLocalIds; i++){ if (xyz[dim][i] != vals[dim][i]) fail = 13; } } for (int wdim=0; !fail && wdim < nWeights; wdim++){ for (int i=0; !fail && i < nLocalIds; i++){ if (wgts[wdim][i] != coordWeights[wdim][i]) fail = 14; } } if (!fail && consecutiveIds){ bool inARow = Zoltan2::IdentifierTraits<gno_t>::areConsecutive( gids.getRawPtr(), nLocalIds); if (!inARow) fail = 15; } gfail = globalFail(comm, fail); if (gfail) printFailureCode(comm, fail); }
void EpetraCrsMatrixT<EpetraGlobalOrdinal>::insertLocalValues(LocalOrdinal localRow, const ArrayView<const LocalOrdinal> &cols, const ArrayView<const Scalar> &vals) { XPETRA_MONITOR("EpetraCrsMatrixT::insertLocalValues"); XPETRA_ERR_CHECK(mtx_->InsertMyValues(localRow, vals.size(), vals.getRawPtr(), cols.getRawPtr())); }
int main(int argc, char *argv[]) { // MEMORY_CHECK(true, "Before initializing MPI"); Teuchos::GlobalMPISession session(&argc, &argv, NULL); RCP<const Comm<int> > comm = Teuchos::DefaultComm<int>::getComm(); int rank = comm->getRank(); int nprocs = comm->getSize(); MEMORY_CHECK(rank==0, "After initializing MPI"); if (rank==0) cout << "Number of processes: " << nprocs << endl; // Default values double numGlobalCoords = 1000; int numTestCuts = 1; int nWeights = 0; string timingType("no_timers"); string debugLevel("basic_status"); string memoryOn("memoryOn"); string memoryOff("memoryOff"); string memoryProcs("0"); bool doMemory=false; int numGlobalParts = nprocs; CommandLineProcessor commandLine(false, true); commandLine.setOption("size", &numGlobalCoords, "Approximate number of global coordinates."); commandLine.setOption("testCuts", &numTestCuts, "Number of test cuts to make when looking for bisector."); commandLine.setOption("numParts", &numGlobalParts, "Number of parts (default is one per proc)."); commandLine.setOption("nWeights", &nWeights, "Number of weights per coordinate, zero implies uniform weights."); string balanceCount("balance_object_count"); string balanceWeight("balance_object_weight"); string mcnorm1("multicriteria_minimize_total_weight"); string mcnorm2("multicriteria_balance_total_maximum"); string mcnorm3("multicriteria_minimize_maximum_weight"); string objective(balanceWeight); // default string doc(balanceCount); doc.append(": ignore weights\n"); doc.append(balanceWeight); doc.append(": balance on first weight\n"); doc.append(mcnorm1); doc.append(": given multiple weights, balance their total.\n"); doc.append(mcnorm3); doc.append(": given multiple weights, balance the maximum for each coordinate.\n"); doc.append(mcnorm2); doc.append(": given multiple weights, balance the L2 norm of the weights.\n"); commandLine.setOption("objective", &objective, doc.c_str()); commandLine.setOption("timers", &timingType, "no_timers, micro_timers, macro_timers, both_timers, test_timers"); commandLine.setOption("debug", &debugLevel, "no_status, basic_status, detailed_status, verbose_detailed_status"); commandLine.setOption(memoryOn.c_str(), memoryOff.c_str(), &doMemory, "do memory profiling"); commandLine.setOption("memoryProcs", &memoryProcs, "list of processes that output memory usage"); CommandLineProcessor::EParseCommandLineReturn rc = commandLine.parse(argc, argv); if (rc != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL){ if (rc == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED){ if (rank==0) cout << "PASS" << endl; return 1; } else{ if (rank==0) cout << "FAIL" << endl; return 0; } } //MEMORY_CHECK(doMemory && rank==0, "After processing parameters"); zgno_t globalSize = static_cast<zgno_t>(numGlobalCoords); RCP<tMVector_t> coordinates = getMeshCoordinates(comm, globalSize); size_t numLocalCoords = coordinates->getLocalLength(); #if 0 comm->barrier(); for (int p=0; p < nprocs; p++){ if (p==rank){ cout << "Rank " << rank << ", " << numLocalCoords << "coords" << endl; const zscalar_t *x = coordinates->getData(0).getRawPtr(); const zscalar_t *y = coordinates->getData(1).getRawPtr(); const zscalar_t *z = coordinates->getData(2).getRawPtr(); for (zlno_t i=0; i < numLocalCoords; i++) cout << " " << x[i] << " " << y[i] << " " << z[i] << endl; } cout.flush(); comm->barrier(); } #endif Array<ArrayRCP<zscalar_t> > weights(nWeights); if (nWeights > 0){ int wt = 0; zscalar_t scale = 1.0; for (int i=0; i < nWeights; i++){ weights[i] = makeWeights(comm, numLocalCoords, weightTypes(wt++), scale, rank); if (wt == numWeightTypes){ wt = 0; scale++; } } } MEMORY_CHECK(doMemory && rank==0, "After creating input"); // Create an input adapter. const RCP<const tMap_t> &coordmap = coordinates->getMap(); ArrayView<const zgno_t> ids = coordmap->getNodeElementList(); const zgno_t *globalIds = ids.getRawPtr(); size_t localCount = coordinates->getLocalLength(); typedef Zoltan2::BasicVectorAdapter<tMVector_t> inputAdapter_t; RCP<inputAdapter_t> ia; if (nWeights == 0){ ia = rcp(new inputAdapter_t (localCount, globalIds, coordinates->getData(0).getRawPtr(), coordinates->getData(1).getRawPtr(), coordinates->getData(2).getRawPtr(), 1,1,1)); } else{ vector<const zscalar_t *> values(3); for (int i=0; i < 3; i++) values[i] = coordinates->getData(i).getRawPtr(); vector<int> valueStrides(0); // implies stride is one vector<const zscalar_t *> weightPtrs(nWeights); for (int i=0; i < nWeights; i++) weightPtrs[i] = weights[i].getRawPtr(); vector<int> weightStrides(0); // implies stride is one ia = rcp(new inputAdapter_t (localCount, globalIds, values, valueStrides, weightPtrs, weightStrides)); } MEMORY_CHECK(doMemory && rank==0, "After creating input adapter"); // Parameters Teuchos::ParameterList params; if (timingType != "no_timers"){ params.set("timer_output_stream" , "std::cout"); params.set("timer_type" , timingType); } if (doMemory){ params.set("memory_output_stream" , "std::cout"); params.set("memory_procs" , memoryProcs); } params.set("debug_output_stream" , "std::cerr"); params.set("debug_procs" , "0"); if (debugLevel != "basic_status"){ params.set("debug_level" , debugLevel); } params.set("algorithm", "rcb"); params.set("partitioning_objective", objective); double tolerance = 1.1; params.set("imbalance_tolerance", tolerance ); if (numGlobalParts != nprocs) params.set("num_global_parts" , numGlobalParts); if (rank==0){ cout << "Number of parts: " << numGlobalParts << endl; } // Create a problem, solve it, and display the quality. Zoltan2::PartitioningProblem<inputAdapter_t> problem(&(*ia), ¶ms); problem.solve(); comm->barrier(); problem.printTimers(); comm->barrier(); if (rank == 0){ cout << "PASS" << endl; } return 0; }
void EpetraCrsGraph::insertLocalIndices(int localRow, const ArrayView<const int> &indices) { XPETRA_MONITOR("EpetraCrsGraph::insertLocalIndices"); int* indices_rawPtr = const_cast<int*>(indices.getRawPtr()); // there is no const in the Epetra interface :( XPETRA_ERR_CHECK(graph_->InsertMyIndices(localRow, indices.size(), indices_rawPtr)); }
void FilteredAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Matrix filtering", currentLevel); RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); if (currentLevel.Get<bool>("Filtering", currentLevel.GetFactoryManager()->GetFactory("Filtering").get()) == false) { GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl; Set(currentLevel, "A", A); return; } size_t blkSize = A->GetFixedBlockSize(); const ParameterList& pL = GetParameterList(); bool lumping = pL.get<bool>("lumping"); if (lumping) GetOStream(Runtime0) << "Lumping dropped entries" << std::endl; RCP<GraphBase> G = Get< RCP<GraphBase> >(currentLevel, "Graph"); SC zero = Teuchos::ScalarTraits<SC>::zero(); // Both Epetra and Tpetra matrix-matrix multiply use the following trick: // if an entry of the left matrix is zero, it does not compute or store the // zero value. // // This trick allows us to bypass constructing a new matrix. Instead, we // make a deep copy of the original one, and fill it in with zeros, which // are ignored during the prolongator smoothing. RCP<Matrix> filteredA = MatrixFactory::Build(A->getCrsGraph()); filteredA->resumeFill(); ArrayView<const LO> inds; ArrayView<const SC> valsA; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW ArrayView<SC> vals; #else Array<SC> vals; #endif Array<char> filter(blkSize * G->GetImportMap()->getNodeNumElements(), 0); size_t numGRows = G->GetNodeNumVertices(); for (size_t i = 0; i < numGRows; i++) { // Set up filtering array ArrayView<const LO> indsG = G->getNeighborVertices(i); for (size_t j = 0; j < as<size_t>(indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 1; for (size_t k = 0; k < blkSize; k++) { LO row = i*blkSize + k; A->getLocalRowView(row, inds, valsA); size_t nnz = inds.size(); if (nnz == 0) continue; #ifdef ASSUME_DIRECT_ACCESS_TO_ROW // Transform ArrayView<const SC> into ArrayView<SC> ArrayView<const SC> vals1; filteredA->getLocalRowView(row, inds, vals1); vals = ArrayView<SC>(const_cast<SC*>(vals1.getRawPtr()), nnz); memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz*sizeof(SC)); #else vals = Array<SC>(valsA); #endif if (lumping == false) { for (size_t j = 0; j < nnz; j++) if (!filter[inds[j]]) vals[j] = zero; } else { LO diagIndex = -1; SC diagExtra = zero; for (size_t j = 0; j < nnz; j++) { if (filter[inds[j]]) continue; if (inds[j] == row) { // Remember diagonal position diagIndex = j; } else { diagExtra += vals[j]; } vals[j] = zero; } // Lump dropped entries // NOTE // * Does it make sense to lump for elasticity? // * Is it different for diffusion and elasticity? if (diagIndex != -1) vals[diagIndex] += diagExtra; } #ifndef ASSUME_DIRECT_ACCESS_TO_ROW // Because we used a column map in the construction of the matrix // we can just use insertLocalValues here instead of insertGlobalValues filteredA->replaceLocalValues(row, inds, vals); #endif } // Reset filtering array for (size_t j = 0; j < as<size_t> (indsG.size()); j++) for (size_t k = 0; k < blkSize; k++) filter[indsG[j]*blkSize+k] = 0; } RCP<ParameterList> fillCompleteParams(new ParameterList); fillCompleteParams->set("No Nonlocal Changes", true); filteredA->fillComplete(fillCompleteParams); filteredA->SetFixedBlockSize(blkSize); if (pL.get<bool>("filtered matrix: reuse eigenvalue")) { // Reuse max eigenvalue from A // It is unclear what eigenvalue is the best for the smoothing, but we already may have // the D^{-1}A estimate in A, may as well use it. // NOTE: ML does that too filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate()); } Set(currentLevel, "A", filteredA); }