void
  AbstractConcreteMatrixAdapter<
    Epetra_RowMatrix,
    DerivedMat>::getGlobalRowCopy_impl(global_ordinal_t row,
                                       const ArrayView<global_ordinal_t>& indices,
                                       const ArrayView<scalar_t>& vals,
                                       size_t& nnz) const
  {
    using Teuchos::as;

    local_ordinal_t local_row = this->row_map_->getLocalElement(row);
    int nnz_ret = 0;
    int rowmatrix_return_val
      = this->mat_->ExtractMyRowCopy(as<int>(local_row),
                                     as<int>(std::min(indices.size(), vals.size())),
                                     nnz_ret,
                                     vals.getRawPtr(),
                                     indices.getRawPtr());
    TEUCHOS_TEST_FOR_EXCEPTION( rowmatrix_return_val != 0,
                        std::runtime_error,
                        "Epetra_RowMatrix object returned error code "
                        << rowmatrix_return_val << " from ExtractMyRowCopy." );
    nnz = as<size_t>(nnz_ret);

    // Epetra_CrsMatrix::ExtractMyRowCopy returns local column
    // indices, so transform these into global indices
    for( size_t i = 0; i < nnz; ++i ){
      indices[i] = this->col_map_->getGlobalElement(indices[i]);
    }
  }
Ejemplo n.º 2
0
RCP<Epetra_CrsMatrix> UserInputForTests::getEpetraCrsMatrix()
{
  if (M_.is_null())
    throw std::runtime_error("could not read mtx file");
  RCP<Epetra_CrsGraph> egraph = getEpetraCrsGraph();
  eM_ = rcp(new Epetra_CrsMatrix(Copy, *egraph));

  size_t maxRow = M_->getNodeMaxNumRowEntries();
  int nrows = egraph->NumMyRows();
  int base = egraph->IndexBase();
  const Epetra_BlockMap &rowMap = egraph->RowMap();
  const Epetra_BlockMap &colMap = egraph->ColMap();
  Array<int> colGid(maxRow);

  for (int i=0; i < nrows; i++){
    ArrayView<const int> colLid;
    ArrayView<const scalar_t> nz;
    M_->getLocalRowView(i+base, colLid, nz);
    size_t rowSize = colLid.size();
    int rowGid = rowMap.GID(i+base);
    for (size_t j=0; j < rowSize; j++){
      colGid[j] = colMap.GID(colLid[j]);
    }
    eM_->InsertGlobalValues(
      rowGid, rowSize, nz.getRawPtr(), colGid.getRawPtr());
  }
  eM_->FillComplete();
  return eM_;
}
  void EpetraCrsMatrixT<EpetraGlobalOrdinal>::getLocalRowCopy(LocalOrdinal LocalRow, const ArrayView<LocalOrdinal> &Indices, const ArrayView<Scalar> &Values, size_t &NumEntries) const {
    XPETRA_MONITOR("EpetraCrsMatrixT::getLocalRowCopy");

    int numEntries = -1;
    XPETRA_ERR_CHECK(mtx_->ExtractMyRowCopy(LocalRow, Indices.size(), numEntries, Values.getRawPtr(), Indices.getRawPtr()));
    NumEntries = numEntries;
  }
Ejemplo n.º 4
0
  template <class T> inline
  void CUDANodeMemoryModel::copyFromBuffer(size_t size, const ArrayRCP<const T> &buffSrc, const ArrayView<T> &hostDest) {
    CHECK_COMPUTE_BUFFER(buffSrc);
    TEUCHOS_TEST_FOR_EXCEPTION( (size_t)buffSrc.size() < size, std::runtime_error,
      "CUDANodeMemoryModel::copyFromBuffer<" 
      << Teuchos::TypeNameTraits<T>::name () 
      << ">: invalid copy.  Device source buffer has size " << buffSrc.size () 
      << ", which is less than the requested copy size " << size << ".");
    TEUCHOS_TEST_FOR_EXCEPTION( (size_t)hostDest.size() < size, std::runtime_error,
      "CUDANodeMemoryModel::copyFromBuffer<" 
      << Teuchos::TypeNameTraits<T>::name () 
      << ">: invalid copy.  Host destination buffer has size " << hostDest.size () 
      << ", which is less than the requested copy size " << size << ".");
#ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_PROFILING
    ++numCopiesD2H_;
    bytesCopiedD2H_ += size*sizeof(T);
#endif
#ifdef HAVE_KOKKOSCLASSIC_CUDA_NODE_MEMORY_TRACE
    std::cerr << "copyFromBuffer<" << Teuchos::TypeNameTraits<T>::name() << "> of size " << sizeof(T) * size << std::endl;
#endif
    cudaError_t err = cudaMemcpy( hostDest.getRawPtr(), buffSrc.getRawPtr(), size*sizeof(T), cudaMemcpyDeviceToHost);
    TEUCHOS_TEST_FOR_EXCEPTION( cudaSuccess != err, std::runtime_error,
      "Kokkos::CUDANodeMemoryModel::copyFromBuffer<"
      << Teuchos::TypeNameTraits<T>::name () 
      << ">(): cudaMemcpy() returned error: " << cudaGetErrorString (err) 
      );
  }
Ejemplo n.º 5
0
void MpiComm<Ordinal>::readySend(
    const ArrayView<const char> &sendBuffer,
    const int destRank
) const
{
    TEUCHOS_COMM_TIME_MONITOR(
        "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::readySend(...)"
    );
#ifdef TEUCHOS_DEBUG
    TEST_FOR_EXCEPTION(
        ! ( 0 <= destRank && destRank < size_ ), std::logic_error
        ,"Error, destRank = " << destRank << " is not < 0 or is not"
        " in the range [0,"<<size_-1<<"]!"
    );
#endif // TEUCHOS_DEBUG
#ifdef TEUCHOS_MPI_COMM_DUMP
    if(show_dump) {
        dumpBuffer<Ordinal,char>(
            "Teuchos::MpiComm<Ordinal>::readySend(...)"
            ,"sendBuffer", bytes, sendBuffer
        );
    }
#endif // TEUCHOS_MPI_COMM_DUMP
    MPI_Rsend(
        const_cast<char*>(sendBuffer.getRawPtr()),sendBuffer.size(),MPI_CHAR,destRank,tag_,*rawMpiComm_
    );
    // ToDo: What about error handling???
}
Ejemplo n.º 6
0
REFCOUNTPTR_INLINE
Teuchos::ArrayView<T2>
Teuchos::av_const_cast(const ArrayView<T1>& p1)
{
  T2 *ptr2 = const_cast<T2*>(p1.getRawPtr());
  return ArrayView<T2>(ptr2, p1.size());
  // Note: Above is just fine even if p1.get()==NULL!
}
Ejemplo n.º 7
0
    int order(const RCP<OrderingSolution<typename Adapter::lno_t,
                                         typename Adapter::gno_t> > &solution)
    {
#ifndef HAVE_ZOLTAN2_AMD
  throw std::runtime_error(
        "BUILD ERROR: AMD requested but not compiled into Zoltan2.\n"
        "Please set CMake flag Zoltan2_ENABLE_AMD:BOOL=ON.");
#else
      typedef typename Adapter::lno_t lno_t;
      typedef typename Adapter::scalar_t scalar_t;

      int ierr= 0;

      const size_t nVtx = model->getLocalNumVertices();

      //cout << "Local num vertices" << nVtx << endl;
      ArrayView<const gno_t> edgeIds;
      ArrayView<const lno_t> offsets;
      ArrayView<StridedData<lno_t, scalar_t> > wgts;

      // wgts are ignored in AMD
      model->getEdgeList(edgeIds, offsets, wgts);

      AMDTraits<lno_t> AMDobj;
      double Control[AMD_CONTROL];
      double Info[AMD_INFO];

      amd_defaults(Control);
      amd_control(Control);

      lno_t *perm;
      perm = (lno_t *) (solution->getPermutationRCP().getRawPtr());

      lno_t result = AMDobj.order(nVtx, offsets.getRawPtr(),
                             edgeIds.getRawPtr(), perm, Control, Info);

      if (result != AMD_OK && result != AMD_OK_BUT_JUMBLED)
          ierr = -1;

      solution->setHavePerm(true);
      return ierr;
#endif
    }
Ejemplo n.º 8
0
void GlobalMPISession::allGather(int localVal, const ArrayView<int> &allVals)
{
  justInTimeInitialize();
  TEUCHOS_ASSERT_EQUALITY(allVals.size(), getNProc());
#ifdef HAVE_MPI
  MPI_Allgather( &localVal, 1, MPI_INT, allVals.getRawPtr(), 1, MPI_INT,
    MPI_COMM_WORLD);
#else
  allVals[0] = localVal;
#endif
}
Ejemplo n.º 9
0
void ArrayView<T>::assign(const ArrayView<const T>& array) const
{
  debug_assert_valid_ptr();
  debug_assert_not_null();
  if (this->getRawPtr()==array.getRawPtr() && this->size()==array.size())
    return; // Assignment to self
  debug_assert_in_range(0,array.size());
  std::copy( array.begin(), array.end(), this->begin() );
  // Note: Above, in debug mode, the iterators are range checked!  In
  // optimized mode, these are raw pointers which should run very fast!
}
  void EpetraCrsMatrixT<EpetraGlobalOrdinal>::replaceLocalValues(LocalOrdinal localRow, const ArrayView< const LocalOrdinal > &indices, const ArrayView< const Scalar > &values) {
    XPETRA_MONITOR("EpetraCrsMatrixT::replaceLocalValues");

    {
      const std::string tfecfFuncName("replaceLocalValues");
      TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive(), std::runtime_error,
                                            ": Fill must be active in order to call this method.  If you have already "
                                            "called fillComplete(), you need to call resumeFill() before you can "
                                            "replace values.");

      TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(),
                                            std::runtime_error, ": values.size() must equal indices.size().");
    }

    XPETRA_ERR_CHECK(mtx_->ReplaceMyValues(localRow, indices.size(), values.getRawPtr(), indices.getRawPtr()));

  }
Ejemplo n.º 11
0
REFCOUNTPTR_INLINE
Teuchos::ArrayView<T2>
Teuchos::av_reinterpret_cast(const ArrayView<T1>& p1)
{
  typedef typename ArrayView<T1>::size_type size_type;
  const int sizeOfT1 = sizeof(T1);
  const int sizeOfT2 = sizeof(T2);
  size_type size2 = (p1.size()*sizeOfT1) / sizeOfT2;
  T2 *ptr2 = reinterpret_cast<T2*>(p1.getRawPtr());
  return ArrayView<T2>(
    ptr2, size2
#ifdef HAVE_TEUCHOS_ARRAY_BOUNDSCHECK
    ,arcp_reinterpret_cast<T2>(p1.access_private_arcp())
#endif
    );
  // Note: Above is just fine even if p1.get()==NULL!
}
Ejemplo n.º 12
0
 static inline void ASSIGN_SCOTCH_NUM_ARRAY(
   SCOTCH_Num **a,
   ArrayView<const SCOTCH_Num> &b,
   const RCP<const Environment> &env)
 {
   if (b.size() > 0)
     *a = const_cast<SCOTCH_Num *> (b.getRawPtr());
   else {
     *a = NULL;
     // Note:  the Scotch manual says that if any rank has a non-NULL array,
     //        every process must have a non-NULL array.  In practice, 
     //        however, this condition is not needed for the arrays we use.
     //        For now, we'll set these arrays to NULL, because if we
     //        allocated a dummy value here, we'll have to track whether or
     //        not we can free it.  KDD 1/23/14
   }
 }
Ejemplo n.º 13
0
RCP<CommRequest> MpiComm<Ordinal>::ireceive(
    const ArrayView<char> &recvBuffer,
    const int sourceRank
) const
{
    TEUCHOS_COMM_TIME_MONITOR(
        "Teuchos::MpiComm<"<<OrdinalTraits<Ordinal>::name()<<">::ireceive(...)"
    );
#ifdef TEUCHOS_DEBUG
    assertRank(sourceRank, "sourceRank");
#endif // TEUCHOS_DEBUG
    MPI_Request rawMpiRequest = MPI_REQUEST_NULL;
    MPI_Irecv(
        const_cast<char*>(recvBuffer.getRawPtr()), recvBuffer.size(), MPI_CHAR, sourceRank,
        tag_, *rawMpiComm_, &rawMpiRequest );
    return mpiCommRequest(rawMpiRequest);
    // ToDo: What about MPI error handling???
}
Ejemplo n.º 14
0
RCP<Epetra_CrsGraph> UserInputForTests::getEpetraCrsGraph()
{
  if (M_.is_null())
    throw std::runtime_error("could not read mtx file");
  RCP<const tcrsGraph_t> tgraph = M_->getCrsGraph();
  RCP<const Tpetra::Map<lno_t, gno_t> > trowMap = tgraph->getRowMap();
  RCP<const Tpetra::Map<lno_t, gno_t> > tcolMap = tgraph->getColMap();

  int nElts = static_cast<int>(trowMap->getGlobalNumElements());
  int nMyElts = static_cast<int>(trowMap->getNodeNumElements());
  int base = trowMap->getIndexBase();
  ArrayView<const int> gids = trowMap->getNodeElementList();

  Epetra_BlockMap erowMap(nElts, nMyElts,
    gids.getRawPtr(), 1, base, *ecomm_);

  Array<int> rowSize(nMyElts);
  for (int i=0; i < nMyElts; i++){
    rowSize[i] = static_cast<int>(M_->getNumEntriesInLocalRow(i+base));
  }

  size_t maxRow = M_->getNodeMaxNumRowEntries();
  Array<int> colGids(maxRow);
  ArrayView<const int> colLid;

  eG_ = rcp(new Epetra_CrsGraph(Copy, erowMap, 
    rowSize.getRawPtr(), true));

  for (int i=0; i < nMyElts; i++){
    tgraph->getLocalRowView(i+base, colLid);
    for (int j=0; j < colLid.size(); j++)
      colGids[j] = tcolMap->getGlobalElement(colLid[j]);
    eG_->InsertGlobalIndices(gids[i], rowSize[i], colGids.getRawPtr());
  }
  eG_->FillComplete();
  return eG_;
}
void globalWeightedCutsMessagesHopsByPart(
    const RCP<const Environment> &env,
    const RCP<const Comm<int> > &comm,
    const RCP<const GraphModel<typename Adapter::base_adapter_t> > &graph,
    const ArrayView<const typename Adapter::part_t> &parts,
    typename Adapter::part_t &numParts,
    ArrayRCP<RCP<BaseClassMetrics<typename Adapter::scalar_t> > > &metrics,
    ArrayRCP<typename Adapter::scalar_t> &globalSums,
    const RCP <const MachineRep> machine)
{
  env->debug(DETAILED_STATUS, "Entering globalWeightedCutsMessagesHopsByPart");
  //////////////////////////////////////////////////////////
  // Initialize return values

  typedef typename Adapter::lno_t t_lno_t;
  typedef typename Adapter::gno_t t_gno_t;
  typedef typename Adapter::scalar_t t_scalar_t;
  typedef typename Adapter::part_t part_t;
  typedef typename Adapter::node_t t_node_t;


  typedef typename Zoltan2::GraphModel<typename Adapter::base_adapter_t>::input_t t_input_t;

  t_lno_t localNumVertices = graph->getLocalNumVertices();
  t_gno_t globalNumVertices = graph->getGlobalNumVertices();
  t_lno_t localNumEdges = graph->getLocalNumEdges();

  ArrayView<const t_gno_t> Ids;
  ArrayView<t_input_t> v_wghts;
  graph->getVertexList(Ids, v_wghts);

  typedef GraphMetrics<t_scalar_t> mv_t;

  //get the edge ids, and weights
  ArrayView<const t_gno_t> edgeIds;
  ArrayView<const t_lno_t> offsets;
  ArrayView<t_input_t> e_wgts;
  graph->getEdgeList(edgeIds, offsets, e_wgts);


  std::vector <t_scalar_t> edge_weights;
  int numWeightPerEdge = graph->getNumWeightsPerEdge();

  int numMetrics = 4;                   // "edge cuts", messages, hops, weighted hops
  if (numWeightPerEdge) numMetrics += numWeightPerEdge * 2;   // "weight n", weighted hops per weight n

  // add some more metrics to the array
  typedef typename ArrayRCP<RCP<BaseClassMetrics<typename Adapter::scalar_t> > >::size_type array_size_type;
  metrics.resize( metrics.size() + numMetrics );

  for( array_size_type n = metrics.size() - numMetrics; n < metrics.size(); ++n ){
    mv_t * newMetric = new mv_t;                  // allocate the new memory
    env->localMemoryAssertion(__FILE__,__LINE__,1,newMetric);   // check errors
    metrics[n] = rcp( newMetric);         // create the new members
  }
  array_size_type next = metrics.size() - numMetrics; // MDM - this is most likely temporary to preserve the format here - we are now filling a larger array so we may not have started at 0

  std::vector <part_t> e_parts (localNumEdges);
#ifdef HAVE_ZOLTAN2_MPI
  if (comm->getSize() > 1)
  {
    Zoltan_DD_Struct *dd = NULL;

    MPI_Comm mpicomm = Teuchos::getRawMpiComm(*comm);
    int size_gnot = Zoltan2::TPL_Traits<ZOLTAN_ID_PTR, t_gno_t>::NUM_ID;

    int debug_level = 0;
    Zoltan_DD_Create(&dd, mpicomm,
        size_gnot, 0,
        sizeof(part_t), localNumVertices, debug_level);

    ZOLTAN_ID_PTR ddnotneeded = NULL;  // Local IDs not needed
    Zoltan_DD_Update(
        dd,
        (ZOLTAN_ID_PTR) Ids.getRawPtr(),
        ddnotneeded,
        (char *) &(parts[0]),
        NULL,
        int(localNumVertices));

    Zoltan_DD_Find(
        dd,
        (ZOLTAN_ID_PTR) edgeIds.getRawPtr(),
        ddnotneeded,
        (char *)&(e_parts[0]),
        NULL,
        localNumEdges,
        NULL
        );
    Zoltan_DD_Destroy(&dd);
  } else
#endif
  {

    std::map<t_gno_t,t_lno_t> global_id_to_local_index;

    //else everything is local.
    //we need a globalid to local index conversion.
    //this does not exists till this point, so we need to create one.
    for (t_lno_t i = 0; i < localNumVertices; ++i){
      //at the local index i, we have the global index Ids[i].
      //so write i, to Ids[i] index of the vector.
      global_id_to_local_index[Ids[i]] = i;
    }

    for (t_lno_t i = 0; i < localNumEdges; ++i){
      t_gno_t ei = edgeIds[i];
      //ei is the global index of the neighbor one.
      part_t p = parts[global_id_to_local_index[ei]];
      e_parts[i] = p;
    }
  }

  RCP<const Teuchos::Comm<int> > tcomm = comm;

  env->timerStart(MACRO_TIMERS, "Communication Graph Create");
  {
    //get the vertices in each part in my part.
    std::vector <t_lno_t> part_begins(numParts, -1);
    std::vector <t_lno_t> part_nexts(localNumVertices, -1);

    //cluster vertices according to their parts.
    //create local part graph.
    for (t_lno_t i = 0; i < localNumVertices; ++i){
      part_t ap = parts[i];
      part_nexts[i] = part_begins[ap];
      part_begins[ap] = i;
    }


    for (int weight_index = -1; weight_index < numWeightPerEdge ; ++weight_index){

      //MD: these two should be part_t.
      //but we dont want to compile tpetra from the beginning.
      //This can be changed when directory is updated.
      typedef t_lno_t local_part_type;
      typedef t_gno_t global_part_type;

      typedef Tpetra::Map<local_part_type, global_part_type, t_node_t> map_t;
      Teuchos::RCP<const map_t> map = Teuchos::rcp (new map_t (numParts, 0, tcomm));

      typedef Tpetra::CrsMatrix<t_scalar_t, local_part_type, global_part_type, t_node_t> tcrsMatrix_t;
      Teuchos::RCP<tcrsMatrix_t> tMatrix(new tcrsMatrix_t (map, 0));


      std::vector <global_part_type> part_neighbors (numParts);

      std::vector <t_scalar_t> part_neighbor_weights(numParts, 0);
      std::vector <t_scalar_t> part_neighbor_weights_ordered(numParts);

      //coarsen for all vertices in my part in order with parts.
      for (global_part_type i = 0; i < (global_part_type) numParts; ++i){
        part_t num_neighbor_parts = 0;
        t_lno_t v = part_begins[i];
        //get part i, and first vertex in this part v.
        while (v != -1){
          //now get the neightbors of v.
          for (t_lno_t j = offsets[v]; j < offsets[v+1]; ++j){
            //get the part of the second vertex.
            part_t ep = e_parts[j];

            t_scalar_t ew = 1;
            if (weight_index > -1){
              ew = e_wgts[weight_index][j];
            }
            //add it to my local part neighbors for part i.
            if (part_neighbor_weights[ep] < 0.00001){
              part_neighbors[num_neighbor_parts++] = ep;
            }
            part_neighbor_weights[ep] += ew;
          }
          v = part_nexts[v];
        }

        //now get the part list.
        for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
          part_t neighbor_part = part_neighbors[j];
          part_neighbor_weights_ordered[j] = part_neighbor_weights[neighbor_part];
          part_neighbor_weights[neighbor_part] = 0;
        }

        //insert it to tpetra crsmatrix.
        if (num_neighbor_parts > 0){
          Teuchos::ArrayView<const global_part_type> destinations(&(part_neighbors[0]), num_neighbor_parts);
          Teuchos::ArrayView<const t_scalar_t> vals(&(part_neighbor_weights_ordered[0]), num_neighbor_parts);
          tMatrix->insertGlobalValues (i,destinations, vals);
        }
      }
      tMatrix->fillComplete ();
      local_part_type num_local_parts = map->getNodeNumElements();

      Array<global_part_type> Indices;
      Array<t_scalar_t> Values;

      t_scalar_t max_edge_cut = 0;
      t_scalar_t total_edge_cut = 0;
      global_part_type max_message = 0;
      global_part_type total_message = 0;

      global_part_type total_hop_count = 0;
      t_scalar_t total_weighted_hop_count = 0;
      global_part_type max_hop_count = 0;
      t_scalar_t max_weighted_hop_count = 0;

      for (local_part_type i=0; i < num_local_parts; i++) {

        const global_part_type globalRow = map->getGlobalElement(i);
        size_t NumEntries = tMatrix->getNumEntriesInGlobalRow (globalRow);
        Indices.resize (NumEntries);
        Values.resize (NumEntries);
        tMatrix->getGlobalRowCopy (globalRow,Indices(),Values(),NumEntries);

        t_scalar_t part_edge_cut = 0;
        global_part_type part_messages = 0;

        for (size_t j=0; j < NumEntries; j++){
          if (Indices[j] != globalRow){
            part_edge_cut += Values[j];
            part_messages += 1;

            typename MachineRep::machine_pcoord_t hop_count = 0;
            machine->getHopCount(globalRow, Indices[j], hop_count);

            global_part_type hop_counts = hop_count;
            t_scalar_t weighted_hop_counts = hop_count * Values[j];

            total_hop_count += hop_counts;
            total_weighted_hop_count += weighted_hop_counts;

            if (hop_counts > max_hop_count ){
              max_hop_count = hop_counts;
            }
            if (weighted_hop_counts > max_weighted_hop_count ){
              max_weighted_hop_count = weighted_hop_counts;
            }
          }
        }
        if (part_edge_cut > max_edge_cut){
          max_edge_cut = part_edge_cut;
        }
        total_edge_cut += part_edge_cut;

        if (part_messages > max_message){
          max_message = part_messages;
        }
        total_message += part_messages;

      }
      t_scalar_t g_max_edge_cut = 0;
      t_scalar_t g_total_edge_cut = 0;
      global_part_type g_max_message = 0;
      global_part_type g_total_message = 0;



      global_part_type g_total_hop_count = 0;
      t_scalar_t g_total_weighted_hop_count = 0;
      global_part_type g_max_hop_count = 0;
      t_scalar_t g_max_weighted_hop_count = 0;

      try{

        Teuchos::reduceAll<int, t_scalar_t>(*comm,Teuchos::REDUCE_MAX,1,&max_edge_cut,&g_max_edge_cut);
        Teuchos::reduceAll<int, global_part_type>(*comm,Teuchos::REDUCE_MAX,1,&max_message,&g_max_message);

        Teuchos::reduceAll<int, global_part_type>(*comm,Teuchos::REDUCE_MAX,1,&max_hop_count,&g_max_hop_count);
        Teuchos::reduceAll<int, t_scalar_t>(*comm,Teuchos::REDUCE_MAX,1,&max_weighted_hop_count,&g_max_weighted_hop_count);

        Teuchos::reduceAll<int, t_scalar_t>(*comm,Teuchos::REDUCE_SUM,1,&total_edge_cut,&g_total_edge_cut);
        Teuchos::reduceAll<int, global_part_type>(*comm,Teuchos::REDUCE_SUM,1,&total_message,&g_total_message);

        Teuchos::reduceAll<int, global_part_type>(*comm,Teuchos::REDUCE_SUM,1,&total_hop_count,&g_total_hop_count);
        Teuchos::reduceAll<int, t_scalar_t>(*comm,Teuchos::REDUCE_SUM,1,&total_weighted_hop_count,&g_total_weighted_hop_count);

      }
      Z2_THROW_OUTSIDE_ERROR(*env);


      if (weight_index == -1){
        metrics[next]->setName("md edge cuts");
      }
      else {
        std::ostringstream oss;
        oss << "md weight " << weight_index;
        metrics[next]->setName( oss.str());
      }

      metrics[next]->setMetricValue("global maximum", g_max_edge_cut);
      metrics[next]->setMetricValue("global sum", g_total_edge_cut);
      next++;

      if (weight_index == -1){
        metrics[next]->setName("message");
        metrics[next]->setMetricValue("global maximum", g_max_message);
        metrics[next]->setMetricValue("global sum", g_total_message);
        next++;
      }


      if (weight_index == -1){
        metrics[next]->setName("hops");
        metrics[next]->setMetricValue("global maximum", g_max_hop_count);
        metrics[next]->setMetricValue("global sum", g_total_hop_count);
        next++;
      }

      std::ostringstream oss;
      oss << "weighted hops" << weight_index;
      metrics[next]->setName( oss.str());
      metrics[next]->setMetricValue("global maximum", g_max_weighted_hop_count);
      metrics[next]->setMetricValue("global sum", g_total_weighted_hop_count);
      next++;

    }
  }
  env->timerStop(MACRO_TIMERS, "Communication Graph Create");

  env->debug(DETAILED_STATUS, "Exiting globalWeightedCutsMessagesHopsByPart");
}
Ejemplo n.º 16
0
  void
  Export<LocalOrdinal,GlobalOrdinal,Node>::
  setupSamePermuteExport (Teuchos::Array<GlobalOrdinal>& exportGIDs)
  {
    using Teuchos::arcp;
    using Teuchos::Array;
    using Teuchos::ArrayRCP;
    using Teuchos::ArrayView;
    using Teuchos::as;
    using Teuchos::null;
    typedef LocalOrdinal LO;
    typedef GlobalOrdinal GO;
    typedef typename ArrayView<const GO>::size_type size_type;
    const Map<LO,GO,Node>& source = * (getSourceMap ());
    const Map<LO,GO,Node>& target = * (getTargetMap ());
    ArrayView<const GO> sourceGIDs = source.getNodeElementList ();
    ArrayView<const GO> targetGIDs = target.getNodeElementList ();

#ifdef HAVE_TPETRA_DEBUG
    ArrayView<const GO> rawSrcGids = sourceGIDs;
    ArrayView<const GO> rawTgtGids = targetGIDs;
#else
    const GO* const rawSrcGids = sourceGIDs.getRawPtr ();
    const GO* const rawTgtGids = targetGIDs.getRawPtr ();
#endif // HAVE_TPETRA_DEBUG
    const size_type numSrcGids = sourceGIDs.size ();
    const size_type numTgtGids = targetGIDs.size ();
    const size_type numGids = std::min (numSrcGids, numTgtGids);

    // Compute numSameIDs_: the number of initial GIDs that are the
    // same (and occur in the same order) in both Maps.  The point of
    // numSameIDs_ is for the common case of an Export where all the
    // overlapping GIDs are at the end of the source Map, but
    // otherwise the source and target Maps are the same.  This allows
    // a fast contiguous copy for the initial "same IDs."
    size_type numSameGids = 0;
    for ( ; numSameGids < numGids && rawSrcGids[numSameGids] == rawTgtGids[numSameGids]; ++numSameGids)
      {} // third clause of 'for' does everything
    ExportData_->numSameIDs_ = numSameGids;

    // Compute permuteToLIDs_, permuteFromLIDs_, exportGIDs, and
    // exportLIDs_.  The first two arrays are IDs to be permuted, and
    // the latter two arrays are IDs to sent out ("exported"), called
    // "export" IDs.
    //
    // IDs to permute are in both the source and target Maps, which
    // means we don't have to send or receive them, but we do have to
    // rearrange (permute) them in general.  IDs to send are in the
    // source Map, but not in the target Map.

    exportGIDs.resize (0);
    Array<LO>& permuteToLIDs = ExportData_->permuteToLIDs_;
    Array<LO>& permuteFromLIDs = ExportData_->permuteFromLIDs_;
    Array<LO>& exportLIDs = ExportData_->exportLIDs_;
    const LO LINVALID = Teuchos::OrdinalTraits<LO>::invalid ();
    const LO numSrcLids = as<LO> (numSrcGids);
    // Iterate over the source Map's LIDs, since we only need to do
    // GID -> LID lookups for the target Map.
    for (LO srcLid = numSameGids; srcLid < numSrcLids; ++srcLid) {
      const GO curSrcGid = rawSrcGids[srcLid];
      // getLocalElement() returns LINVALID if the GID isn't in the target Map.
      // This saves us a lookup (which isNodeGlobalElement() would do).
      const LO tgtLid = target.getLocalElement (curSrcGid);
      if (tgtLid != LINVALID) { // if target.isNodeGlobalElement (curSrcGid)
        permuteToLIDs.push_back (tgtLid);
        permuteFromLIDs.push_back (srcLid);
      } else {
        exportGIDs.push_back (curSrcGid);
        exportLIDs.push_back (srcLid);
      }
    }

    // exportLIDs_ is the list of this process' LIDs that it has to
    // send out.  Since this is an Export, and therefore the target
    // Map is nonoverlapping, we know that each export LID only needs
    // to be sent to one process.  However, the source Map may be
    // overlapping, so multiple processes might send to the same LID
    // on a receiving process.

    TPETRA_ABUSE_WARNING(
      getNumExportIDs() > 0 && ! source.isDistributed(),
      std::runtime_error,
      "::setupSamePermuteExport(): Source has export LIDs but Source is not "
      "distributed globally." << std::endl
      << "Exporting to a submap of the target map.");

    // Compute exportPIDs_ ("outgoing" process IDs).
    //
    // For each GID in exportGIDs (GIDs to which this process must
    // send), find its corresponding owning process (a.k.a. "image")
    // ID in the target Map.  Store these process IDs in
    // exportPIDs_.  These are the process IDs to which the Export
    // needs to send data.
    //
    // We only need to do this if the source Map is distributed;
    // otherwise, the Export doesn't have to perform any
    // communication.
    if (source.isDistributed ()) {
      ExportData_->exportPIDs_.resize(exportGIDs.size ());
      // This call will assign any GID in the target Map with no
      // corresponding process ID a fake process ID of -1.  We'll use
      // this below to remove exports for processses that don't exist.
      const LookupStatus lookup =
        target.getRemoteIndexList (exportGIDs(),
                                   ExportData_->exportPIDs_ ());
      TPETRA_ABUSE_WARNING( lookup == IDNotPresent, std::runtime_error,
        "::setupSamePermuteExport(): The source Map has GIDs not found "
        "in the target Map.");

      // Get rid of process IDs not in the target Map.  This prevents
      // exporting to GIDs which don't belong to any process in the
      // target Map.
      if (lookup == IDNotPresent) {
        const size_type numInvalidExports =
          std::count_if (ExportData_->exportPIDs_().begin(),
                         ExportData_->exportPIDs_().end(),
                         std::bind1st (std::equal_to<int>(), -1));

        // count number of valid and total number of exports
        const size_type totalNumExports = ExportData_->exportPIDs_.size();
        if (numInvalidExports == totalNumExports) {
          // all exports are invalid; we have no exports; we can delete all exports
          exportGIDs.resize(0);
          ExportData_->exportLIDs_.resize(0);
          ExportData_->exportPIDs_.resize(0);
        }
        else {
          // some exports are valid; we need to keep the valid exports
          // pack and resize
          size_type numValidExports = 0;
          for (size_type e = 0; e < totalNumExports; ++e) {
            if (ExportData_->exportPIDs_[e] != -1) {
              exportGIDs[numValidExports]               = exportGIDs[e];
              ExportData_->exportLIDs_[numValidExports] = ExportData_->exportLIDs_[e];
              ExportData_->exportPIDs_[numValidExports] = ExportData_->exportPIDs_[e];
              ++numValidExports;
            }
          }
          exportGIDs.resize (numValidExports);
          ExportData_->exportLIDs_.resize (numValidExports);
          ExportData_->exportPIDs_.resize (numValidExports);
        }
      }
    }
  } // setupSamePermuteExport()
Ejemplo n.º 17
0
size_t removeUndesiredEdges(
  const RCP<const Environment> &env,
  int myRank,
  bool removeSelfEdges,
  bool removeOffProcessEdges,
  bool removeOffGroupEdges,
  ArrayView<const typename InputTraits<User>::zgid_t> &gids,
  ArrayView<const typename InputTraits<User>::zgid_t> &gidNbors,
  ArrayView<const int> &procIds,
  ArrayView<StridedData<typename InputTraits<User>::lno_t,
                        typename InputTraits<User>::scalar_t> > &edgeWeights,
  ArrayView<const typename InputTraits<User>::lno_t> &offsets,
  ArrayRCP<const typename InputTraits<User>::zgid_t> &newGidNbors, // out
  typename InputTraits<User>::scalar_t **&newWeights,             // out
  ArrayRCP<const typename InputTraits<User>::lno_t> &newOffsets)  // out
{
  typedef typename InputTraits<User>::zgid_t zgid_t;
  typedef typename InputTraits<User>::scalar_t scalar_t;
  typedef typename InputTraits<User>::lno_t lno_t;
  size_t numKeep = 0;

  size_t numVtx = offsets.size() - 1;
  size_t numNbors = gidNbors.size();

  env->localInputAssertion(__FILE__, __LINE__, "need more input",
    (!removeSelfEdges ||
      gids.size() >=
       static_cast<typename ArrayView<const zgid_t>::size_type>(numVtx))
      &&
    (!removeOffProcessEdges ||
      procIds.size() >=
       static_cast<typename ArrayView<const int>::size_type>(numNbors)) &&
    (!removeOffGroupEdges ||
      procIds.size() >=
       static_cast<typename ArrayView<const int>::size_type>(numNbors)),
    BASIC_ASSERTION);

  // initialize edge weight array

  newWeights = NULL;
  int eDim = edgeWeights.size();

  // count desired edges

  lno_t *offs = new lno_t [numVtx + 1];
  env->localMemoryAssertion(__FILE__, __LINE__, numVtx+1, offs);
  for (size_t i = 0; i < numVtx+1; i++) offs[i] = 0;
  ArrayRCP<const lno_t> offArray = arcp(offs, 0, numVtx+1, true);

  const lno_t *allOffs = offsets.getRawPtr();
  const zgid_t *allIds = gidNbors.getRawPtr();

  const zgid_t *vtx = NULL;
  const int *proc = NULL;

  if (gids.size() > 0)
    vtx = gids.getRawPtr();

  if (procIds.size() > 0)
    proc = procIds.getRawPtr();

  offs[0] = 0;
  for (size_t i=0; i < numVtx; i++){
    offs[i+1] = 0;
    zgid_t vid = vtx ? vtx[i] : zgid_t(0);
    for (lno_t j=allOffs[i]; j < allOffs[i+1]; j++){
      int owner = proc ? proc[j] : 0;
      bool keep = (!removeSelfEdges || vid != allIds[j]) &&
               (!removeOffProcessEdges || owner == myRank) &&
               (!removeOffGroupEdges || owner >= 0);

      if (keep)
        offs[i+1]++;
    }
  }

  // from counters to offsets

  for (size_t i=1; i < numVtx; i++)
    offs[i+1] += offs[i];

  numKeep = offs[numVtx];

  // do we need a new neighbor list?

  if (numNbors == numKeep){
    newGidNbors = Teuchos::arcpFromArrayView(gidNbors);
    newOffsets = Teuchos::arcpFromArrayView(offsets);
    return numNbors;
  }
  else if (numKeep == 0){
    newGidNbors = ArrayRCP<const zgid_t>(Teuchos::null);
    newOffsets = offArray;
    return 0;
  }

  // Build the subset neighbor lists (id, weight, and offset).

  zgid_t *newGids = new zgid_t [numKeep];
  env->localMemoryAssertion(__FILE__, __LINE__, numKeep, newGids);

  newGidNbors = arcp(newGids, 0, numKeep, true);
  newOffsets = offArray;

  if (eDim > 0){
    newWeights = new scalar_t * [eDim];
    env->localMemoryAssertion(__FILE__, __LINE__, eDim, newWeights);

    if (numKeep) {
      for (int w=0; w < eDim; w++){
        newWeights[w] = new scalar_t [numKeep];
        env->localMemoryAssertion(__FILE__, __LINE__, numKeep, newWeights[w]);
      }
    }
    else {
      for (int w=0; w < eDim; w++)
        newWeights[w] = NULL;
    }
  }

  size_t next = 0;
  for (size_t i=0; i < numVtx && next < numKeep; i++){
    zgid_t vid = vtx ? vtx[i] : zgid_t(0);
    for (lno_t j=allOffs[i]; j < allOffs[i+1]; j++){
      int owner = proc ? proc[j] : 0;
      bool keep = (!removeSelfEdges || vid != allIds[j]) &&
               (!removeOffProcessEdges || owner == myRank) &&
               (!removeOffGroupEdges || owner >= 0);

      if (keep){
        newGids[next] = allIds[j];
        for (int w=0; w < eDim; w++){
          newWeights[w][next] = edgeWeights[w][j];
        }
        next++;
        if (next == numKeep)
          break;

      }  // if (keep)
    }
  }

  return numKeep;
}
Ejemplo n.º 18
0
void testIdentifierModel(std::string fname, zgno_t xdim, zgno_t ydim, zgno_t zdim,
                         const RCP<const Comm<int> > &comm, bool consecutiveIds)
{
    int rank = comm->getRank();
    int fail = 0, gfail = 0;

    std::bitset<Zoltan2::NUM_MODEL_FLAGS> modelFlags = 0;
    if (consecutiveIds)
        modelFlags.set(Zoltan2::IDS_MUST_BE_GLOBALLY_CONSECUTIVE);

    RCP<const Zoltan2::Environment> env = rcp(new Zoltan2::Environment);

    //////////////////////////////////////////////////////////////
    // Use an Tpetra::CrsMatrix for the user data.
    //////////////////////////////////////////////////////////////
    typedef Tpetra::CrsMatrix<zscalar_t, zlno_t, zgno_t> tcrsMatrix_t;

    UserInputForTests *uinput;
    if (fname.size() > 0)
        uinput = new UserInputForTests(testDataFilePath, fname, comm, true);
    else
        uinput = new UserInputForTests(xdim,ydim,zdim,string(""),comm, true, true);

    RCP<tcrsMatrix_t > M = uinput->getUITpetraCrsMatrix();
    zlno_t nLocalIds = M->getNodeNumRows();
    zgno_t nGlobalIds =  M->getGlobalNumRows();

    ArrayView<const zgno_t> idList = M->getRowMap()->getNodeElementList();
    std::set<zgno_t> idSet(idList.begin(), idList.end());

    //////////////////////////////////////////////////////////////
    // Create an IdentifierModel with this input
    //////////////////////////////////////////////////////////////

    typedef Zoltan2::XpetraCrsMatrixAdapter<tcrsMatrix_t> adapter_t;
    typedef Zoltan2::MatrixAdapter<tcrsMatrix_t> base_adapter_t;
    typedef Zoltan2::StridedData<zlno_t, zscalar_t> input_t;

    RCP<const adapter_t> ia = Teuchos::rcp(new adapter_t(M));

    Zoltan2::IdentifierModel<base_adapter_t> *model = NULL;
    RCP<const base_adapter_t> base_ia =
        Teuchos::rcp_dynamic_cast<const base_adapter_t>(ia);

    try {
        model = new Zoltan2::IdentifierModel<base_adapter_t>(
            base_ia, env, comm, modelFlags);
    }
    catch (std::exception &e) {
        std::cerr << rank << ") " << e.what() << std::endl;
        fail = 1;
    }

    gfail = globalFail(comm, fail);

    if (gfail)
        printFailureCode(comm, fail);

    // Test the IdentifierModel interface

    if (model->getLocalNumIdentifiers() != size_t(nLocalIds)) {
        std::cerr << rank << ") getLocalNumIdentifiers "
                  << model->getLocalNumIdentifiers() << " "
                  << nLocalIds << std::endl;
        fail = 2;
    }

    if (!fail && model->getGlobalNumIdentifiers() != size_t(nGlobalIds)) {
        std::cerr << rank << ") getGlobalNumIdentifiers "
                  << model->getGlobalNumIdentifiers() << " "
                  << nGlobalIds << std::endl;
        fail = 3;
    }

    gfail = globalFail(comm, fail);

    if (gfail)
        printFailureCode(comm, fail);

    ArrayView<const zgno_t> gids;
    ArrayView<input_t> wgts;

    model->getIdentifierList(gids, wgts);

    if (!fail && gids.size() != nLocalIds) {
        std::cerr << rank << ") getIdentifierList IDs "
                  << gids.size() << " "
                  << nLocalIds << std::endl;
        fail = 5;
    }

    if (!fail && wgts.size() != 0) {
        std::cerr << rank << ") getIdentifierList Weights "
                  << wgts.size() << " "
                  << 0 << std::endl;
        fail = 6;
    }

    for (zlno_t i=0; !fail && i < nLocalIds; i++) {
        std::set<zgno_t>::iterator next = idSet.find(gids[i]);
        if (next == idSet.end()) {
            std::cerr << rank << ") getIdentifierList gid not found "
                      << gids[i] << std::endl;
            fail = 7;
        }
    }

    if (!fail && consecutiveIds) {
        bool inARow = Zoltan2::IdentifierTraits<zgno_t>::areConsecutive(
                          gids.getRawPtr(), nLocalIds);

        if (!inARow) {
            std::cerr << rank << ") getIdentifierList not consecutive " << std::endl;
            fail = 8;
        }
    }

    gfail = globalFail(comm, fail);

    if (gfail)
        printFailureCode(comm, fail);

    delete model;
    delete uinput;
}
Ejemplo n.º 19
0
void testCoordinateModel(std::string &fname, int nWeights,
  const RCP<const Comm<int> > &comm, bool consecutiveIds,
  bool nodeZeroHasAll, bool printInfo)
{
  int fail = 0, gfail = 0;

  if (printInfo){
    cout << "Test: " << fname << endl;
    cout << "Num Weights: " << nWeights;
    cout << " want consec ids: " << consecutiveIds;
    cout << " proc 0 has all: " << nodeZeroHasAll;
    cout << endl;
  }

  //////////////////////////////////////////////////////////////
  // Input data
  //////////////////////////////////////////////////////////////

  typedef Tpetra::MultiVector<scalar_t, lno_t, gno_t, node_t> mv_t;

  RCP<UserInputForTests> uinput;

  try{
    uinput = rcp(new UserInputForTests(testDataFilePath, fname, comm, true));
  }
  catch(std::exception &e){
    fail=1;
  }

  TEST_FAIL_AND_EXIT(*comm, !fail, "input constructor", 1);

  RCP<mv_t> coords;

  try{
    coords = uinput->getUICoordinates();
  }
  catch(std::exception &e){
    fail=2;
  }

  TEST_FAIL_AND_EXIT(*comm, !fail, "getting coordinates", 1);

  int coordDim = coords->getNumVectors();

  TEST_FAIL_AND_EXIT(*comm, coordDim <= 3, "dim 3 at most", 1);

  const scalar_t *x=NULL, *y=NULL, *z=NULL;

  x = coords->getData(0).getRawPtr();
  if (coordDim > 1){
    y = coords->getData(1).getRawPtr();
    if (coordDim > 2)
      z = coords->getData(2).getRawPtr();
  }

  // Are these coordinates correct

  int nLocalIds = coords->getLocalLength();
  ArrayView<const gno_t> idList = coords->getMap()->getNodeElementList();

  int nGlobalIds = 0;
  if (nodeZeroHasAll){
    if (comm->getRank() > 0){
      x = y = z = NULL;
      nLocalIds = 0;
    }
    else{
      nGlobalIds = nLocalIds;
    }
    Teuchos::broadcast<int, int>(*comm, 0, &nGlobalIds);
  }
  else{
    nGlobalIds = coords->getGlobalLength();
  }

  Array<ArrayRCP<const scalar_t> > coordWeights(nWeights);

  if (nLocalIds > 0){
    for (int wdim=0; wdim < nWeights; wdim++){
      scalar_t *w = new scalar_t [nLocalIds];
      for (int i=0; i < nLocalIds; i++){
        w[i] = ((i%2) + 1) + wdim;
      }
      coordWeights[wdim] = Teuchos::arcp(w, 0, nLocalIds);
    }
  }


  //////////////////////////////////////////////////////////////
  // Create a BasicVectorAdapter adapter object.
  //////////////////////////////////////////////////////////////

  typedef Zoltan2::BasicVectorAdapter<mv_t> ia_t;
  typedef Zoltan2::VectorAdapter<mv_t> base_ia_t;

  RCP<ia_t> ia;

  if (nWeights == 0){   // use the simpler constructor
    try{
      ia = rcp(new ia_t(nLocalIds, idList.getRawPtr(), x, y, z));
    }
    catch(std::exception &e){
      fail=3;
    }
  }
  else{
    std::vector<const scalar_t *> values, weights;
    std::vector<int> valueStrides, weightStrides;  // default is 1
    values.push_back(x);
    if (y) {
      values.push_back(y);
      if (z) 
        values.push_back(z);
    }
    for (int wdim=0; wdim < nWeights; wdim++){
      weights.push_back(coordWeights[wdim].getRawPtr());
    }

    try{
      ia = rcp(new ia_t(nLocalIds, idList.getRawPtr(),
               values, valueStrides, weights, weightStrides));
    }
    catch(std::exception &e){
      fail=4;
    }
  }

  RCP<base_ia_t> base_ia = Teuchos::rcp_implicit_cast<base_ia_t>(ia);

  TEST_FAIL_AND_EXIT(*comm, !fail, "making input adapter", 1);

  //////////////////////////////////////////////////////////////
  // Create an CoordinateModel with this input
  //////////////////////////////////////////////////////////////

  typedef Zoltan2::StridedData<lno_t, scalar_t> input_t;
  typedef std::bitset<Zoltan2::NUM_MODEL_FLAGS> modelFlags_t;
  typedef Zoltan2::CoordinateModel<base_ia_t> model_t;
  modelFlags_t modelFlags;

  if (consecutiveIds)
    modelFlags.set(Zoltan2::IDS_MUST_BE_GLOBALLY_CONSECUTIVE);

  RCP<const Zoltan2::Environment> env = rcp(new Zoltan2::Environment);
  RCP<model_t> model;
  

  try{
    model = rcp(new model_t(base_ia.getRawPtr(), env, comm, modelFlags));
  }
  catch (std::exception &e){
    fail = 5;
  }

  TEST_FAIL_AND_EXIT(*comm, !fail, "making model", 1);

  // Test the CoordinateModel interface

  if (model->getCoordinateDim() != coordDim)
    fail = 6;

  if (!fail && model->getLocalNumCoordinates() != size_t(nLocalIds))
    fail = 7;

  if (!fail && model->getGlobalNumCoordinates() != size_t(nGlobalIds))
    fail = 8;

  if (!fail && model->getNumWeightsPerCoordinate() !=  nWeights)
    fail = 9;

  gfail = globalFail(comm, fail);

  if (gfail)
    printFailureCode(comm, fail);
  
  ArrayView<const gno_t> gids;
  ArrayView<input_t> xyz;
  ArrayView<input_t> wgts;
  
  model->getCoordinates(gids, xyz, wgts);

  if (!fail && gids.size() != nLocalIds)
    fail = 10;

  for (int i=0; !fail && i < nLocalIds; i++){
    if (gids[i] != idList[i])
      fail = 11;
  }

  if (!fail && wgts.size() != nWeights)
    fail = 12;

  const scalar_t *vals[3] = {x, y, z};

  for (int dim=0; !fail && dim < coordDim; dim++){
    for (int i=0; !fail && i < nLocalIds; i++){
      if (xyz[dim][i] != vals[dim][i])
        fail = 13;
    }
  }

  for (int wdim=0; !fail && wdim < nWeights; wdim++){
    for (int i=0; !fail && i < nLocalIds; i++){
      if (wgts[wdim][i] != coordWeights[wdim][i])
        fail = 14;
    }
  }

  if (!fail && consecutiveIds){
    bool inARow = Zoltan2::IdentifierTraits<gno_t>::areConsecutive(
      gids.getRawPtr(), nLocalIds);

    if (!inARow)
      fail = 15;
  }

  gfail = globalFail(comm, fail);

  if (gfail)
    printFailureCode(comm, fail);
}
 void EpetraCrsMatrixT<EpetraGlobalOrdinal>::insertLocalValues(LocalOrdinal localRow, const ArrayView<const LocalOrdinal> &cols, const ArrayView<const Scalar> &vals) {
   XPETRA_MONITOR("EpetraCrsMatrixT::insertLocalValues");
   XPETRA_ERR_CHECK(mtx_->InsertMyValues(localRow, vals.size(), vals.getRawPtr(), cols.getRawPtr()));
 }
Ejemplo n.º 21
0
int main(int argc, char *argv[])
{
  // MEMORY_CHECK(true, "Before initializing MPI");

  Teuchos::GlobalMPISession session(&argc, &argv, NULL);
  RCP<const Comm<int> > comm = Teuchos::DefaultComm<int>::getComm();
  int rank = comm->getRank();
  int nprocs = comm->getSize();

  MEMORY_CHECK(rank==0, "After initializing MPI");

  if (rank==0)
    cout << "Number of processes: " << nprocs << endl;

  // Default values
  double numGlobalCoords = 1000;
  int numTestCuts = 1;
  int nWeights = 0;
  string timingType("no_timers");
  string debugLevel("basic_status");
  string memoryOn("memoryOn");
  string memoryOff("memoryOff");
  string memoryProcs("0");
  bool doMemory=false;
  int numGlobalParts = nprocs;

  CommandLineProcessor commandLine(false, true);
  commandLine.setOption("size", &numGlobalCoords, 
    "Approximate number of global coordinates.");
  commandLine.setOption("testCuts", &numTestCuts, 
    "Number of test cuts to make when looking for bisector.");
  commandLine.setOption("numParts", &numGlobalParts, 
    "Number of parts (default is one per proc).");
  commandLine.setOption("nWeights", &nWeights, 
    "Number of weights per coordinate, zero implies uniform weights.");

  string balanceCount("balance_object_count");
  string balanceWeight("balance_object_weight");
  string mcnorm1("multicriteria_minimize_total_weight");
  string mcnorm2("multicriteria_balance_total_maximum");
  string mcnorm3("multicriteria_minimize_maximum_weight");

  string objective(balanceWeight);   // default

  string doc(balanceCount); doc.append(": ignore weights\n");

  doc.append(balanceWeight); doc.append(": balance on first weight\n");

  doc.append(mcnorm1);
  doc.append(": given multiple weights, balance their total.\n");

  doc.append(mcnorm3);
  doc.append(": given multiple weights, balance the maximum for each coordinate.\n");

  doc.append(mcnorm2);
  doc.append(": given multiple weights, balance the L2 norm of the weights.\n");

  commandLine.setOption("objective", &objective,  doc.c_str());

  commandLine.setOption("timers", &timingType,
    "no_timers, micro_timers, macro_timers, both_timers, test_timers");

  commandLine.setOption("debug", &debugLevel,
   "no_status, basic_status, detailed_status, verbose_detailed_status");

  commandLine.setOption(memoryOn.c_str(), memoryOff.c_str(), &doMemory,
    "do memory profiling");

  commandLine.setOption("memoryProcs", &memoryProcs,
   "list of processes that output memory usage");

  CommandLineProcessor::EParseCommandLineReturn rc = 
    commandLine.parse(argc, argv);

  if (rc != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL){
    if (rc == Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED){
      if (rank==0)
        cout << "PASS" << endl;
      return 1;
    }
    else{
      if (rank==0)
        cout << "FAIL" << endl;
      return 0;
    }
  }

  //MEMORY_CHECK(doMemory && rank==0, "After processing parameters");

  zgno_t globalSize = static_cast<zgno_t>(numGlobalCoords);

  RCP<tMVector_t> coordinates = getMeshCoordinates(comm, globalSize);
  size_t numLocalCoords = coordinates->getLocalLength();

#if 0
  comm->barrier();
  for (int p=0; p < nprocs; p++){
    if (p==rank){
      cout << "Rank " << rank << ", " << numLocalCoords << "coords" << endl;
      const zscalar_t *x = coordinates->getData(0).getRawPtr();
      const zscalar_t *y = coordinates->getData(1).getRawPtr();
      const zscalar_t *z = coordinates->getData(2).getRawPtr();
      for (zlno_t i=0; i < numLocalCoords; i++)
        cout << " " << x[i] << " " << y[i] << " " << z[i] << endl;
    }
    cout.flush();
    comm->barrier();
  }
#endif

  Array<ArrayRCP<zscalar_t> > weights(nWeights);

  if (nWeights > 0){
    int wt = 0;
    zscalar_t scale = 1.0;
    for (int i=0; i < nWeights; i++){
      weights[i] = 
        makeWeights(comm, numLocalCoords, weightTypes(wt++), scale, rank);
      if (wt == numWeightTypes){
        wt = 0;
        scale++;
      }
    }
  }

  MEMORY_CHECK(doMemory && rank==0, "After creating input");

  // Create an input adapter.
  const RCP<const tMap_t> &coordmap = coordinates->getMap();
  ArrayView<const zgno_t> ids = coordmap->getNodeElementList();
  const zgno_t *globalIds = ids.getRawPtr();
  
  size_t localCount = coordinates->getLocalLength();
  typedef Zoltan2::BasicVectorAdapter<tMVector_t> inputAdapter_t;
  RCP<inputAdapter_t> ia;
  
  if (nWeights == 0){
    ia = rcp(new inputAdapter_t (localCount, globalIds, 
      coordinates->getData(0).getRawPtr(), coordinates->getData(1).getRawPtr(),
      coordinates->getData(2).getRawPtr(), 1,1,1));
  }
  else{
    vector<const zscalar_t *> values(3);
    for (int i=0; i < 3; i++)
      values[i] = coordinates->getData(i).getRawPtr();
    vector<int> valueStrides(0);  // implies stride is one
    vector<const zscalar_t *> weightPtrs(nWeights);
    for (int i=0; i < nWeights; i++)
      weightPtrs[i] = weights[i].getRawPtr();
    vector<int> weightStrides(0); // implies stride is one

    ia = rcp(new inputAdapter_t (localCount, globalIds, 
      values, valueStrides, weightPtrs, weightStrides));
  }

  MEMORY_CHECK(doMemory && rank==0, "After creating input adapter");

  // Parameters

  Teuchos::ParameterList params;

  if (timingType != "no_timers"){
    params.set("timer_output_stream" , "std::cout");
    params.set("timer_type" , timingType);
  }

  if (doMemory){
    params.set("memory_output_stream" , "std::cout");
    params.set("memory_procs" , memoryProcs);
  }

  params.set("debug_output_stream" , "std::cerr");
  params.set("debug_procs" , "0");

  if (debugLevel != "basic_status"){
    params.set("debug_level" , debugLevel);
  }

  params.set("algorithm", "rcb");
  params.set("partitioning_objective", objective);
  double tolerance = 1.1;
  params.set("imbalance_tolerance", tolerance );

  if (numGlobalParts != nprocs)
    params.set("num_global_parts" , numGlobalParts);

  if (rank==0){
    cout << "Number of parts: " << numGlobalParts << endl;
  }

  // Create a problem, solve it, and display the quality.

  Zoltan2::PartitioningProblem<inputAdapter_t> problem(&(*ia), &params);

  problem.solve();

  comm->barrier();

  problem.printTimers();

  comm->barrier();

  if (rank == 0){
    cout << "PASS" << endl;
  }

  return 0;
}
Ejemplo n.º 22
0
  void EpetraCrsGraph::insertLocalIndices(int localRow, const ArrayView<const int> &indices) { 
    XPETRA_MONITOR("EpetraCrsGraph::insertLocalIndices"); 

    int* indices_rawPtr = const_cast<int*>(indices.getRawPtr()); // there is no const in the Epetra interface :(
    XPETRA_ERR_CHECK(graph_->InsertMyIndices(localRow, indices.size(), indices_rawPtr)); 
  }
  void FilteredAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& currentLevel) const {
    FactoryMonitor m(*this, "Matrix filtering", currentLevel);

    RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
    if (currentLevel.Get<bool>("Filtering", currentLevel.GetFactoryManager()->GetFactory("Filtering").get()) == false) {
      GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl;
      Set(currentLevel, "A", A);
      return;
    }
    size_t blkSize = A->GetFixedBlockSize();

    const ParameterList& pL = GetParameterList();
    bool lumping = pL.get<bool>("lumping");
    if (lumping)
      GetOStream(Runtime0) << "Lumping dropped entries" << std::endl;

    RCP<GraphBase> G = Get< RCP<GraphBase> >(currentLevel, "Graph");

    SC zero = Teuchos::ScalarTraits<SC>::zero();

    // Both Epetra and Tpetra matrix-matrix multiply use the following trick:
    // if an entry of the left matrix is zero, it does not compute or store the
    // zero value.
    //
    // This trick allows us to bypass constructing a new matrix. Instead, we
    // make a deep copy of the original one, and fill it in with zeros, which
    // are ignored during the prolongator smoothing.
    RCP<Matrix> filteredA = MatrixFactory::Build(A->getCrsGraph());

    filteredA->resumeFill();

    ArrayView<const LO> inds;
    ArrayView<const SC> valsA;
#ifdef ASSUME_DIRECT_ACCESS_TO_ROW
    ArrayView<SC>       vals;
#else
    Array<SC>           vals;
#endif
    Array<char> filter(blkSize * G->GetImportMap()->getNodeNumElements(), 0);

    size_t numGRows = G->GetNodeNumVertices();
    for (size_t i = 0; i < numGRows; i++) {
      // Set up filtering array
      ArrayView<const LO> indsG = G->getNeighborVertices(i);
      for (size_t j = 0; j < as<size_t>(indsG.size()); j++)
        for (size_t k = 0; k < blkSize; k++)
          filter[indsG[j]*blkSize+k] = 1;

      for (size_t k = 0; k < blkSize; k++) {
        LO row = i*blkSize + k;

        A->getLocalRowView(row, inds, valsA);

        size_t nnz = inds.size();
        if (nnz == 0)
          continue;

#ifdef ASSUME_DIRECT_ACCESS_TO_ROW
        // Transform ArrayView<const SC> into ArrayView<SC>
        ArrayView<const SC> vals1;
        filteredA->getLocalRowView(row, inds, vals1);
        vals = ArrayView<SC>(const_cast<SC*>(vals1.getRawPtr()), nnz);

        memcpy(vals.getRawPtr(), valsA.getRawPtr(), nnz*sizeof(SC));
#else
        vals = Array<SC>(valsA);
#endif

        if (lumping == false) {
          for (size_t j = 0; j < nnz; j++)
            if (!filter[inds[j]])
              vals[j] = zero;

        } else {
          LO diagIndex = -1;
          SC diagExtra = zero;

          for (size_t j = 0; j < nnz; j++) {
            if (filter[inds[j]])
              continue;

            if (inds[j] == row) {
              // Remember diagonal position
              diagIndex = j;

            } else {
              diagExtra += vals[j];
            }

            vals[j] = zero;
          }

          // Lump dropped entries
          // NOTE
          //  * Does it make sense to lump for elasticity?
          //  * Is it different for diffusion and elasticity?
          if (diagIndex != -1)
            vals[diagIndex] += diagExtra;
        }

#ifndef ASSUME_DIRECT_ACCESS_TO_ROW
        // Because we used a column map in the construction of the matrix
        // we can just use insertLocalValues here instead of insertGlobalValues
        filteredA->replaceLocalValues(row, inds, vals);
#endif
      }

      // Reset filtering array
      for (size_t j = 0; j < as<size_t> (indsG.size()); j++)
        for (size_t k = 0; k < blkSize; k++)
          filter[indsG[j]*blkSize+k] = 0;
    }

    RCP<ParameterList> fillCompleteParams(new ParameterList);
    fillCompleteParams->set("No Nonlocal Changes", true);
    filteredA->fillComplete(fillCompleteParams);

    filteredA->SetFixedBlockSize(blkSize);

    if (pL.get<bool>("filtered matrix: reuse eigenvalue")) {
      // Reuse max eigenvalue from A
      // It is unclear what eigenvalue is the best for the smoothing, but we already may have
      // the D^{-1}A estimate in A, may as well use it.
      // NOTE: ML does that too
      filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
    }

    Set(currentLevel, "A", filteredA);
  }