int fei::Vector_Impl<T>::putScalar(double scalar)
{
  if (output_level_ >= fei::BRIEF_LOGS && output_stream_ != NULL) {
    FEI_OSTREAM& os = *output_stream_;
    os << dbgprefix_<<"putScalar("<<scalar<<")"<<FEI_ENDL;
  }

  if (haveFEVector()) {
    if (scalar != 0.0) return(-1);
    CHK_ERR( snl_fei::FEVectorTraits<T>::reset(vector_) );
  }
  else {
    CHK_ERR( fei::VectorTraits<T>::setValues(vector_, firstLocalOffset(), scalar) );
  }
  for(unsigned p=0; p<remotelyOwned().size(); ++p) {
    fei::set_values(*(remotelyOwned()[p]), scalar);
  }
  return(0);
}
int fei::Vector_core::gatherFromOverlap(bool accumulate)
{
  if (fei::numProcs(comm_) == 1 || haveFEVector()) {
    return(0);
  }

#ifndef FEI_SER
  //first create the list of procs we'll be sending to.
  std::vector<int> sendProcs;
  for(unsigned i=0; i<remotelyOwned_.size(); ++i) {
    if ((int)i == fei::localProc(comm_)) continue;
    if (remotelyOwned_[i]->size() == 0) continue;

    sendProcs.push_back(i);
  }

  std::vector<int> recvProcs;
  fei::mirrorProcs(comm_, sendProcs, recvProcs);

  //declare arrays to hold the indices and coefs we'll be receiving.
  std::vector<std::vector<int> > recv_ints(recvProcs.size());
  std::vector<std::vector<double> > recv_doubles(recvProcs.size());
  std::vector<int> recv_sizes(recvProcs.size());

  std::vector<MPI_Request> mpiReqs(recvProcs.size()*2);
  std::vector<MPI_Status> mpiStatuses(recvProcs.size()*2);
  int tag1 = 11111;
  int tag2 = 11112;

  //post the recvs for the sizes.
  for(size_t i=0; i<recvProcs.size(); ++i) {
    int proc = recvProcs[i];
    MPI_Irecv(&recv_sizes[i], 1, MPI_INT, proc,
              tag1, comm_, &mpiReqs[i]);
  }

  //send the sizes of data we'll be sending.
  for(unsigned i=0; i<sendProcs.size(); ++i) {
    int proc = sendProcs[i];
    int size = remotelyOwned_[proc]->size();
    MPI_Send(&size, 1, MPI_INT, proc, tag1, comm_);
  }

  if (recvProcs.size() > 0) {
    MPI_Waitall(recvProcs.size(), &mpiReqs[0], &mpiStatuses[0]);
  }

  //now post the recvs for the data.
  unsigned offset = 0;
  for(size_t i=0; i<recvProcs.size(); ++i) {
    int proc = recvProcs[i];
    int size = recv_sizes[i];
    std::vector<int>& recv_ints_i = recv_ints[i];
    std::vector<double>& recv_doubles_i = recv_doubles[i];
    recv_ints_i.resize(size);
    recv_doubles_i.resize(size);
    MPI_Irecv(&(recv_ints_i[0]), size, MPI_INT, proc,
              tag1, comm_, &mpiReqs[offset++]);
    MPI_Irecv(&(recv_doubles_i[0]), size, MPI_DOUBLE, proc,
              tag2, comm_, &mpiReqs[offset++]);
  }

  //now send the outgoing data.
  for(size_t i=0; i<sendProcs.size(); ++i) {
    int proc = sendProcs[i];
    int size = remotelyOwned_[proc]->size();
    int* indices = &(remotelyOwned_[proc]->indices())[0];
    MPI_Send(indices, size, MPI_INT, proc, tag1, comm_);
    double* coefs = &(remotelyOwned_[proc]->coefs())[0];
    MPI_Send(coefs, size, MPI_DOUBLE, proc, tag2, comm_);

    fei::set_values(*remotelyOwned_[proc], 0.0);
  }

  if (recvProcs.size() > 0) {
    MPI_Waitall(recvProcs.size()*2, &mpiReqs[0], &mpiStatuses[0]);
  }

  //now store the data we've received.
  for(size_t i=0; i<recvProcs.size(); ++i) {
    int num = recv_sizes[i];
    std::vector<int>& recv_ints_i = recv_ints[i];
    std::vector<double>& recv_doubles_i = recv_doubles[i];
    int err = giveToUnderlyingVector(num, &(recv_ints_i[0]),
                                     &(recv_doubles_i[0]), accumulate, 0);
    if (err != 0) {
      FEI_COUT << "fei::Vector_core::gatherFromOverlap ERROR storing recvd data" << FEI_ENDL;
      return(err);
    }
  }

#endif  //#ifndef FEI_SER

  return(0);
}
int fei::Vector_core::scatterToOverlap()
{
  if (fei::numProcs(comm_) == 1 || haveFEVector()) {
    return(0);
  }

#ifndef FEI_SER
  if (!overlapAlreadySet_) {
    setOverlap();
  }

  //...and now the overlap is whatever is in our remotelyOwned_ vectors.

  //first find out which procs we'll be receiving from.
  std::vector<int> recvProcs;
  for(unsigned i=0; i<remotelyOwned_.size(); ++i) {
    if ((int)i == fei::localProc(comm_)) continue;
    if (remotelyOwned_[i]->size() == 0) continue;

    recvProcs.push_back((int)i);
  }

  //find out the send-procs.
  std::vector<int> sendProcs;
  fei::mirrorProcs(comm_, recvProcs, sendProcs);

  //declare arrays to send from, and corresponding sizes
  std::vector<std::vector<int> > send_ints(sendProcs.size());
  std::vector<std::vector<double> > send_doubles(sendProcs.size());
  std::vector<int> send_sizes(sendProcs.size());

  std::vector<MPI_Request> mpiReqs(sendProcs.size()+recvProcs.size());
  std::vector<MPI_Status> mpiStatuses(sendProcs.size()+recvProcs.size());
  int tag1 = 11111;
  int tag2 = 11112;

  //first, the procs we're going to send to, have to let us know
  //how much data we're supposed to send. So we have to receive
  //sizes and then indices from the "send"-procs.
  for(unsigned i=0; i<sendProcs.size(); ++i) {
    MPI_Irecv(&send_sizes[i], 1, MPI_INT, sendProcs[i],
              tag1, comm_, &mpiReqs[i]);
  }

  //now we'll send the sizes of our remotely-owned data to the
  //procs that we will be receiving the data from, and also the
  //indices that we want to receive.
  for(unsigned i=0; i<recvProcs.size(); ++i) {
    int proc = recvProcs[i];

    int size = remotelyOwned_[proc]->size();
    MPI_Send(&size, 1, MPI_INT, proc, tag1, comm_);
  }
 
  MPI_Waitall(sendProcs.size(), &mpiReqs[0], &mpiStatuses[0]);

  //now resize our send_ints and send_doubles arrays, and post the recvs
  //for indices that we're supposed to pack.
  for(unsigned i=0; i<sendProcs.size(); ++i) {
    int proc = sendProcs[i];
    int size = send_sizes[i];
    send_ints[i].resize(size);
    MPI_Irecv(&(send_ints[i][0]), size, MPI_INT, proc, tag1,
              comm_, &mpiReqs[i]);
    send_doubles[i].resize(size);
  }

  //now send the indices that we want to receive data for.
  for(unsigned i=0; i<recvProcs.size(); ++i) {
    int proc = recvProcs[i];
    int size = remotelyOwned_[proc]->size();
    int* indices = &(remotelyOwned_[proc]->indices())[0];
    MPI_Send(indices, size, MPI_INT, proc, tag1, comm_);
  }

  MPI_Waitall(sendProcs.size(), &mpiReqs[0], &mpiStatuses[0]);

  //now post our recvs.
  for(unsigned i=0; i<recvProcs.size(); ++i) {
    int proc = recvProcs[i];
    int size = remotelyOwned_[proc]->size();
    double* coefs = &(remotelyOwned_[proc]->coefs())[0];
    MPI_Irecv(coefs, size, MPI_DOUBLE, proc, tag2, comm_, &mpiReqs[i]);
  }

  //now pack and send the coefs that the other procs need from us.
  for(unsigned i=0; i<sendProcs.size(); ++i) {
    int proc = sendProcs[i];

    int num = send_sizes[i];
    int err = copyOutOfUnderlyingVector(num, &(send_ints[i][0]),
                                        &(send_doubles[i][0]), 0);
    if (err != 0) {
      FEI_COUT << "fei::Vector_core::scatterToOverlap ERROR getting data to send."<<FEI_ENDL;
      return(err);
    }

    MPI_Send(&(send_doubles[i][0]), num, MPI_DOUBLE, proc,
             tag2, comm_);
  }

  MPI_Waitall(recvProcs.size(), &mpiReqs[0], &mpiStatuses[0]);

#endif  //#ifndef FEI_SER

  return(0);
}
int fei::Vector_core::gatherFromOverlap(bool accumulate)
{
  if (fei::numProcs(comm_) == 1 || haveFEVector()) {
    return(0);
  }

#ifndef FEI_SER
  std::vector<MPI_Request> mpiReqs;
  int tag1 = 11111;

  if (sendRecvProcsNeedUpdated_) {
    setCommSizes();
  }
  
  mpiReqs.resize(recvProcs_.size());

  //now post the recvs for the data.
  for(size_t i=0; i<recvProcs_.size(); ++i) {
    MPI_Irecv(&(recv_chars_[i][0]), recv_sizes_[i], MPI_CHAR, recvProcs_[i],
              tag1, comm_, &mpiReqs[i]);
  }

  bool resize_buffer = false;
  bool zero_remotely_owned_after_packing = true;
  pack_send_buffers(sendProcs_, remotelyOwned_, send_chars_,
                    resize_buffer, zero_remotely_owned_after_packing);

  //now send the outgoing data.
  for(size_t i=0; i<sendProcs_.size(); ++i) {
    int proc = sendProcs_[i];

    int size = send_chars_[i].size();
    MPI_Send(&(send_chars_[i][0]), size, MPI_CHAR, proc, tag1, comm_);
  }

  int numRecvProcs = recvProcs_.size();
  for(size_t i=0; i<recvProcs_.size(); ++i) {
    int index;
    MPI_Status status;
    MPI_Waitany(numRecvProcs, &mpiReqs[0], &index, &status);
  }

  std::vector<int> indices;
  std::vector<double> coefs;
  //now store the data we've received.
  for(size_t i=0; i<recvProcs_.size(); ++i) {
    fei::impl_utils::unpack_indices_coefs(recv_chars_[i], indices, coefs);
    int num = indices.size();
    if (num == 0) continue;
    int err = giveToUnderlyingVector(num, &(indices[0]),
                                     &(coefs[0]), accumulate, 0);
    if (err != 0) {
    //  FEI_COUT << "fei::Vector_core::gatherFromOverlap ERROR storing recvd data" << FEI_ENDL;
      return(err);
    }
  }

#endif  //#ifndef FEI_SER

  return(0);
}