Beispiel #1
0
int main(int argc, char* argv[]) {
	MPI::Init(argc, argv);
	
	rank = MPI::COMM_WORLD.Get_rank();
	size = MPI::COMM_WORLD.Get_size();
	if (size < 2) MPI::COMM_WORLD.Abort(1);
	if (size < 1+COLS+ROWS) MPI::COMM_WORLD.Abort(1);
	
	MPI::Group globalGroup = MPI::COMM_WORLD.Get_group();

	if (0 == rank) {
		int matrix[COLS][ROWS], xirtam[ROWS][COLS];
	
		srand(time(0));
		for (int i=0; i<COLS; i++)
			for (int j=0; j<ROWS; j++) {
				matrix[i][j] = 9 * (double)rand() / (double)RAND_MAX;
				xirtam[j][i] = matrix[i][j];
			}
		
		cout << "random matrica: " << endl;
		for (int i=0; i<COLS; i++) {
			for (int j=0; j<ROWS; j++)
				cout << matrix[i][j] << " ";
			cout << endl;
		}
	}
	else {	
		MPI::Group group;
		MPI::Intracomm comm;

		int j=0, k=0;
		for (int i=1; i<size; i++)
			if (i % 2) ranksA[j++] = i;
			else ranksB[k++] = i;

		if (rank % 2) 
			group = globalGroup.Incl(size / 2 + size % 2, ranksA);
		else 
			group = globalGroup.Incl(size / 2, ranksB);

		comm = MPI::COMM_WORLD.Create(group);
		int newRank = comm.Get_rank();
	
	
		pline(); cout << rank << ", " << newRank << ", " << powerSum << endl;
		fflush(stdout); 

		group.Free();
		comm.Free();
	}
	
	
	
	//comm.Free();
	
	MPI::Finalize();
	
	return 0;
}
void LocalScalar3D<real>::Dump(BlockManager& blockManager, const int step, const char* label) {
	ImposeBoundaryCondition(blockManager);
	MPI::Intracomm comm = blockManager.getCommunicator();

	ostringstream ossFileNameTime;
	ossFileNameTime << "./BIN/";
	mkdir(ossFileNameTime.str().c_str(), 0755);

#ifdef _BLOCK_IS_LARGE_
#else
#endif
	for (int id = 0; id < blockManager.getNumBlock(); ++id) {
		BlockBase* block = blockManager.getBlock(id);

		::Vec3i size = block->getSize();
		Vec3d origin = block->getOrigin();
		Vec3d blockSize = block->getBlockSize();
		Vec3d cellSize = block->getCellSize();
		int level = block->getLevel();

		ostringstream ossFileName;
		ossFileName << "./BIN/";
		ossFileName << "dump-";
		ossFileName << label;
		ossFileName << "-";
		ossFileName.width(5);
		ossFileName.setf(ios::fixed);
		ossFileName.fill('0');
		ossFileName << comm.Get_rank();
		ossFileName << "-";
		ossFileName.width(5);
		ossFileName.setf(ios::fixed);
		ossFileName.fill('0');
		ossFileName << id;
		ossFileName << "-";
		ossFileName.width(10);
		ossFileName.setf(ios::fixed);
		ossFileName.fill('0');
		ossFileName << step;
		ossFileName << ".bin";

		int cx = size.x + 2*vc;
		int cy = size.y + 2*vc;
		int cz = size.z + 2*vc;
		int iNE = 1;

		real* pData = GetBlockData(block);

		ofstream ofs;
		ofs.open(ossFileName.str().c_str(), ios::out | ios::binary);
		ofs.write((char*)&size.x, sizeof(int));
		ofs.write((char*)&size.y, sizeof(int));
		ofs.write((char*)&size.z, sizeof(int));
		ofs.write((char*)&vc    , sizeof(int));
		ofs.write((char*)&iNE   , sizeof(int));
		ofs.write((char*)pData  , sizeof(real)*cx*cy*cz);
		ofs.close();
	}
}
Beispiel #3
0
 /** \brief
  * In many situations a rank computes a number of local DOFs. Then all
  * ranks want to know the number of global DOFs and the starting
  * displacment number of the DOF numbering in each rank.
  *
  * \param[in]   mpiComm        The MPI communicator.
  * \param[in]   nRankDofs      The number of local DOFs.
  * \param[out]  rStartDofs     Displacment of the DOF numbering. On rank n
  *                             this is the sum of all local DOF numbers in
  *                             ranks 0 to n - 1.
  * \param[out]  nOverallDofs   Global sum of nRankDofs. Is equal on all
  *                             ranks.
  */
 inline void getDofNumbering(MPI::Intracomm& mpiComm,
                             int nRankDofs,
                             int& rStartDofs,
                             int& nOverallDofs)
 {
   rStartDofs = 0;
   nOverallDofs = 0;
   mpiComm.Scan(&nRankDofs, &rStartDofs, 1, MPI_INT, MPI_SUM);
   rStartDofs -= nRankDofs;
   mpiComm.Allreduce(&nRankDofs, &nOverallDofs, 1, MPI_INT, MPI_SUM);
 }
Beispiel #4
0
// not necessary to create a new comm object
MPI::Intracomm init_workers(const MPI::Intracomm &comm_world, int managerid) {
	// get old group
	MPI::Group world_group = comm_world.Get_group();
	// create new group from old group
	int worker_size = comm_world.Get_size() - 1;
	int *workers = new int[worker_size];
	for (int i = 0, id = 0; i < worker_size; ++i, ++id) {
		if (id == managerid) ++id;  // skip the manager id
		workers[i] = id;
	}
	MPI::Group worker_group = world_group.Incl(worker_size, workers);
	delete [] workers;
	return comm_world.Create(worker_group);
}
Beispiel #5
0
   /*
   * Compute, store and return total number of atoms on all processors.
   */
   void AtomStorage::computeNAtomTotal(MPI::Intracomm& communicator)
   {
      // If nAtomTotal is already set, do nothing and return.
      // if (nAtomTotal_.isSet()) return;

      int nAtomLocal = nAtom();
      int nAtomTotal = 0;
      communicator.Reduce(&nAtomLocal, &nAtomTotal, 1, 
                          MPI::INT, MPI::SUM, 0);
      if (communicator.Get_rank() !=0) {
         nAtomTotal = 0;
      }
      nAtomTotal_.set(nAtomTotal);
   }
Beispiel #6
0
    bool recvData(std::vector<double>& receivedData)
    {
        bool isDataReceived = false;
        if ( intraComm != MPI::COMM_NULL)
        {
            MPI::Status status;
            double buffer[100];
            intraComm.Recv(buffer, 100,
                           MPI::DOUBLE,
                           MPI::ANY_SOURCE,
                           /*tag*/ 100,
                           status);

            int count = status.Get_count(MPI::DOUBLE);
            receivedData = std::vector<double>(buffer, buffer+count);

            log.Info() << "RECV [ " << getRank()
                        << " <-- "
                        << status.Get_source()
                        << " ] data : "
                        << receivedData
                        << std::endl;
            isDataReceived = true;
        }else
        {
            log.Err() << "PID " << getProcessId()
                      << " failed to RECV"
                      << std::endl;
        }
        return isDataReceived;
    }
 void MpiFileIo::setIoCommunicator(MPI::Intracomm& communicator)
 {
    communicatorPtr_ = &communicator; 
    if (communicator.Get_rank() == 0) {
       isIoProcessor_ = true;
    } else {
       isIoProcessor_ = false;
    }
 }
   /*
   * Send a block (nonblocking)
   */
   void MemoryOArchive::iSend(MPI::Intracomm& comm, MPI::Request& req, int dest)
   {
      int  comm_size = comm.Get_size();
      int  myRank = comm.Get_rank();

      // Preconditions
      if (dest > comm_size - 1 || dest < 0) {
         UTIL_THROW("Destination rank out of bounds");
      }
      if (dest == myRank) {
         UTIL_THROW("Source and desination identical");
      }

      size_t  sendBytes = cursor_ - buffer_;
      size_t* sizePtr = (size_t*)buffer_;
      *sizePtr = sendBytes;
      req = comm.Isend(buffer_, sendBytes, MPI::UNSIGNED_CHAR, dest, 5);
   }
Beispiel #9
0
/*
* Receive a block.
*/
void PackedData::recv(MPI::Intracomm& comm, int source)
{
    MPI::Request request;
    int  myRank     = comm.Get_rank();
    int  comm_size  = comm.Get_size();

    // Preconditons
    if (source > comm_size - 1 || source < 0) {
        UTIL_THROW("Source rank out of bounds");
    }
    if (source == myRank) {
        UTIL_THROW("Source and desination identical");
    }

    request = comm.Irecv(begin_, capacity_, MPI::UNSIGNED_CHAR, source, 5);
    request.Wait();
    cursor_ = begin_;

}
Beispiel #10
0
   /*
   * Send and receive buffer.
   */
   void Buffer::sendRecv(MPI::Intracomm& comm, int source, int dest)
   {

      MPI::Request request[2];
      int  sendBytes = 0;
      int  myRank    = comm.Get_rank();
      int  comm_size = comm.Get_size();

      // Preconditions
      if (dest > comm_size - 1 || dest < 0) {
         UTIL_THROW("Destination rank out of bounds");
      }
      if (source > comm_size - 1 || source < 0) {
         UTIL_THROW("Source rank out of bounds");
      }
      if (dest == myRank) {
         UTIL_THROW("Destination and my rank are identical");
      }
      if (source == myRank) {
         UTIL_THROW("Source and my rank are identical");
      }

      // Start nonblocking receive.
      request[0] = comm.Irecv(recvBufferBegin_, bufferCapacity_ , 
                              MPI::CHAR, source, 5);

      // Start nonblocking send.
      sendBytes = sendPtr_ - sendBufferBegin_;
      request[1] = comm.Isend(sendBufferBegin_, sendBytes , MPI::CHAR, dest, 5);

      // Wait for completion of receive.
      request[0].Wait();
      recvPtr_ = recvBufferBegin_;

      // Wait for completion of send.
      request[1].Wait();

      // Update statistics.
      if (sendBytes > maxSendLocal_) {
         maxSendLocal_ = sendBytes;
      }
   }
Beispiel #11
0
   /*
   * Receive a buffer.
   */
   void Buffer::recv(MPI::Intracomm& comm, int source)
   {
      MPI::Request request;
      int  myRank     = comm.Get_rank();
      int  comm_size  = comm.Get_size();

      // Preconditons
      if (source > comm_size - 1 || source < 0) {
         UTIL_THROW("Source rank out of bounds");
      }
      if (source == myRank) {
         UTIL_THROW("Source and destination identical");
      }

      request = comm.Irecv(recvBufferBegin_, bufferCapacity_, 
                           MPI::CHAR, source, 5);
      request.Wait();
      recvType_ = NONE;
      recvPtr_ = recvBufferBegin_;
   }
Beispiel #12
0
/*
* Send a block.
*/
void PackedData::send(MPI::Intracomm& comm, int dest)
{
    MPI::Request request;
    int  sendBytes = 0;
    int  comm_size = comm.Get_size();
    int  myRank = comm.Get_rank();

    // Preconditions
    if (dest > comm_size - 1 || dest < 0) {
        UTIL_THROW("Destination rank out of bounds");
    }
    if (dest == myRank) {
        UTIL_THROW("Source and desination identical");
    }

    sendBytes = cursor_ - begin_;
    request = comm.Isend(begin_, sendBytes, MPI::UNSIGNED_CHAR, dest, 5);
    request.Wait();

}
Beispiel #13
0
 int getRank() const
 {
     int rank = Network::INVALID_VALUE;
     if ( intraComm != MPI::COMM_NULL)
     {
         rank = intraComm.Get_rank();
     }else
     {
         log.Err() << "getRank() returns invalid\n";
     }
     return rank;
 }
Beispiel #14
0
 int getTopology() const
 {
     int topology = Network::INVALID_VALUE;
     if ( intraComm != MPI::COMM_NULL)
     {
         topology = intraComm.Get_topology();
     }else
     {
         log.Err() << "getTopology() returns invalid\n";
     }
     return topology;
 }
Beispiel #15
0
 int getSize() const
 {
     int size = Network::INVALID_VALUE;
     if ( intraComm != MPI::COMM_NULL)
     {
         size = intraComm.Get_size();
     }else
     {
         log.Err() << "getSize() returns invalid\n";
     }
     return size;
 }
   /*
   * Reduce (add) distributions from multiple MPI processors.
   */
   void Distribution::reduce(MPI::Intracomm& communicator, int root)
   {
  
      long* totHistogram = new long[nBin_]; 
      communicator.Reduce(histogram_.cArray(), totHistogram, nBin_, MPI::LONG, MPI::SUM, root);
      if (communicator.Get_rank() == root) {
         for (int i=0; i < nBin_; ++i) {
            histogram_[i] = totHistogram[i];
         }
      } else { 
         for (int i=0; i < nBin_; ++i) {
            histogram_[i] = 0.0;
         }
      }
      delete totHistogram;

      long totSample; 
      communicator.Reduce(&nSample_, &totSample, 1, MPI::LONG, MPI::SUM, root);
      if (communicator.Get_rank() == root) {
         nSample_ = totSample;
      } else {
         nSample_ = 0;
      }

      long totReject; 
      communicator.Reduce(&nReject_, &totReject, 1, MPI::LONG, MPI::SUM, root);
      if (communicator.Get_rank() == root) {
         nReject_ = totReject;
      } else {
         nReject_ = 0;
      }

   }
Beispiel #17
0
   /*
   * Broadcast a buffer.
   */
   void Buffer::bcast(MPI::Intracomm& comm, int source)
   {
      int comm_size = comm.Get_size();
      int myRank = comm.Get_rank();
      if (source > comm_size - 1 || source < 0) {
         UTIL_THROW("Source rank out of bounds");
      }

      int sendBytes;
      if (myRank == source) {
         sendBytes = sendPtr_ - sendBufferBegin_;
         comm.Bcast(&sendBytes, 1, MPI::INT, source);
         comm.Bcast(sendBufferBegin_, sendBytes, MPI::CHAR, source);
         sendPtr_ = sendBufferBegin_;
         sendType_ = NONE;
      } else {
         comm.Bcast(&sendBytes, 1, MPI::INT, source);
         comm.Bcast(recvBufferBegin_, sendBytes, MPI::CHAR, source);
         recvPtr_ = recvBufferBegin_;
         recvType_ = NONE;
      }
      if (sendBytes > maxSendLocal_) {
         maxSendLocal_ = sendBytes;
      }

   }
Beispiel #18
0
// Metropolis-Hastings population size resampling; not used anymore
void resample_popsizes_mh(ArgModel *model, const LocalTrees *trees,
                       bool sample_popsize_recomb, double heat) {
    list<PopsizeConfigParam> &l = model->popsize_config.params;
    double curr_like = sample_popsize_recomb ? calc_arg_prior(model, trees) :
        calc_arg_prior_recomb_integrate(model, trees, NULL, NULL, NULL);
#ifdef ARGWEAVER_MPI
    MPI::Intracomm *comm = model->mc3.group_comm;
    int rank = comm->Get_rank();
    comm->Reduce(rank == 0 ? MPI_IN_PLACE : &curr_like,
                 &curr_like, 1, MPI::DOUBLE, MPI_SUM, 0);
#endif
    for (int rep=0; rep < model->popsize_config.numsample; rep++) {
        int idx=0;
        for (list<PopsizeConfigParam>::iterator it = l.begin();
             it != l.end(); it++) {
            curr_like =
                resample_single_popsize_mh(model, trees, sample_popsize_recomb,
                                           heat, it, curr_like, idx++);
        }
    }

}
Beispiel #19
0
/// Octree情報を他rankにブロードキャスト.
void BCMOctree::broadcast(MPI::Intracomm& comm)
{
  assert(comm.Get_rank() == 0);
  rootGrid->broadcast(comm);

  int numLeafNode = leafNodeArray.size();
  int ibuf[2];
  ibuf[0] = numLeafNode;
  ibuf[1] = ordering;
  comm.Bcast(&ibuf, 2, MPI::INT, 0);

  size_t size = Pedigree::GetSerializeSize();
  unsigned char* buf = new unsigned char[size * numLeafNode];

  size_t ip = 0;
  for (int id = 0; id < rootGrid->getSize(); id++) {
    packPedigrees(rootNodes[id], ip, buf);
  }

  comm.Bcast(buf, size*numLeafNode, MPI::BYTE, 0);
  delete[] buf;
}
Beispiel #20
0
   /*
   * Receive a block.
   */
   void MemoryIArchive::recv(MPI::Intracomm& comm, int source)
   {
      int  myRank     = comm.Get_rank();
      int  comm_size  = comm.Get_size();

      // Preconditions
      if (source > comm_size - 1 || source < 0) {
         UTIL_THROW("Source rank out of bounds");
      }
      if (source == myRank) {
         UTIL_THROW("Source and desination identical");
      }

      size_t recvCapacity = capacity_ + sizeof(size_t);
      comm.Recv(buffer_, recvCapacity, MPI::UNSIGNED_CHAR, source, 5);

      begin_ = buffer_ + sizeof(size_t);
      cursor_ = begin_;

      size_t* sizePtr = (size_t*) buffer_;
      size_t  size = *sizePtr;
      end_  = buffer_ + size;
   }
FullyDistSpVec<IT, IT> FullyDistSpVec<IT, NT>::sort()
{
	MPI::Intracomm World = commGrid->GetWorld();
	FullyDistSpVec<IT,IT> temp(commGrid);
	IT nnz = getlocnnz(); 
	pair<NT,IT> * vecpair = new pair<NT,IT>[nnz];
	int nprocs = World.Get_size();
	int rank = World.Get_rank();

	IT * dist = new IT[nprocs];
	dist[rank] = nnz;
	World.Allgather(MPI::IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>());
	IT sizeuntil = accumulate(dist, dist+rank, 0);
	for(IT i=0; i< nnz; ++i)
	{
		vecpair[i].first = num[i];	// we'll sort wrt numerical values
		vecpair[i].second = ind[i] + sizeuntil;	
	}
	SpParHelper::MemoryEfficientPSort(vecpair, nnz, dist, World);

	vector< IT > nind(nnz);
	vector< IT > nnum(nnz);
	for(IT i=0; i< nnz; ++i)
	{
		num[i] = vecpair[i].first;	// sorted range (change the object itself)
		nind[i] = ind[i];		// make sure the sparsity distribution is the same
		nnum[i] = vecpair[i].second;	// inverse permutation stored as numerical values
	}
	delete [] vecpair;
	delete [] dist;

	temp.NOT_FOUND = NOT_FOUND;
	temp.glen = glen;
	temp.ind = nind;
	temp.num = nnum;
	return temp;
}
Beispiel #22
0
   /*
   * Send a buffer.
   */
   void Buffer::send(MPI::Intracomm& comm, int dest)
   {
      MPI::Request request;
      int  sendBytes = 0;
      int  comm_size = comm.Get_size();
      int  myRank = comm.Get_rank();

      // Preconditions
      if (dest > comm_size - 1 || dest < 0) {
         UTIL_THROW("Destination rank out of bounds");
      }
      if (dest == myRank) {
         UTIL_THROW("Source and destination identical");
      }

      sendBytes = sendPtr_ - sendBufferBegin_;
      request = comm.Isend(sendBufferBegin_, sendBytes, MPI::CHAR, dest, 5);
      request.Wait();

      // Update statistics.
      if (sendBytes > maxSendLocal_) {
         maxSendLocal_ = sendBytes;
      }
   }
Beispiel #23
0
    bool sendDataTo(const std::vector<double>& inData, int dest)
    {
        bool isDataSent = false;
        if ( intraComm != MPI::COMM_NULL)
        {
            log.Info() << "SEND [ " << getRank()
                       << " --> "
                       << dest
                       << " ] data : "
                       << inData
                       << std::endl;

            intraComm.Send(   &(inData[0]),
                                inData.size(),
                                MPI::DOUBLE,
                                dest, /*tag*/ 100);
            isDataSent = true;
        }else
        {
            log.Err() << "PID " << getProcessId()
                      << " failed to SEND\n";
        }
        return isDataSent;
    }
Beispiel #24
0
void transfH_MPI(mblock<double>** Ap, mblock<float>** A, unsigned* seq_part)
{
  unsigned rank = COMM_AHMED.Get_rank(),
                  nproc = COMM_AHMED.Get_size();

  unsigned info[8];

  if (rank==0) {
    copyH(seq_part[1]-seq_part[0], Ap, A);

    for (unsigned j=1; j<nproc; ++j) {
      for (unsigned i=seq_part[j]; i<seq_part[j+1]; ++i) {

        COMM_AHMED.Recv(info, 8, MPI::UNSIGNED, j, 7);
        A[i] = new mblock<float>(info[0], info[1]);
        float* tmp = NULL;
        if (info[7]) {
          tmp = new float[info[7]];
          COMM_AHMED.Recv(tmp, info[7], MPI::FLOAT, j, 8);
        }
        A[i]->cpy_mbl(info+2, tmp);
        delete [] tmp;
      }
    }
  } else {
    for (unsigned i=seq_part[rank]; i<seq_part[rank+1]; ++i) {
      mblock<double>* p = Ap[i-seq_part[rank]];
      info[0] = p->getn1();
      info[1] = p->getn2();
      p->get_prop(info+2);
      info[7] = p->nvals();
      COMM_AHMED.Send(info, 8, MPI::UNSIGNED, 0, 7);

      if (info[7]) {
        float* tmp = new float[info[7]];
        for (unsigned i=0; i<info[7]; ++i) tmp[i] = (float) p->getdata()[i];
        COMM_AHMED.Send(tmp, info[7], MPI::FLOAT, 0, 8);
        delete [] tmp;
      }
    }
  }
}
FullyDistVec<IT,NT> FullyDistSpVec<IT,NT>::operator() (const FullyDistVec<IT,IT> & ri) const
{
	MPI::Intracomm World = commGrid->GetWorld();
	// FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval, NT id);
	FullyDistVec<IT,NT> Indexed(ri.commGrid, ri.glen, zero, zero);
	int nprocs = World.Get_size();
        unordered_map<IT, IT> revr_map;       // inverted index that maps indices of *this to indices of output
	vector< vector<IT> > data_req(nprocs);
	IT locnnz = ri.LocArrSize();

	// ABAB: Input sanity check
	int local = 1;
	int whole = 1;
	for(IT i=0; i < locnnz; ++i)
	{
		if(ri.arr[i] >= glen || ri.arr[i] < 0)
		{
			local = 0;
		} 
	}
	World.Allreduce( &local, &whole, 1, MPI::INT, MPI::BAND);
	if(whole == 0)
	{
		throw outofrangeexception();
	}

	for(IT i=0; i < locnnz; ++i)
	{
		IT locind;
		int owner = Owner(ri.arr[i], locind);	// numerical values in ri are 0-based
		data_req[owner].push_back(locind);
                revr_map.insert(typename unordered_map<IT, IT>::value_type(locind, i));
	}
	IT * sendbuf = new IT[locnnz];
	int * sendcnt = new int[nprocs];
	int * sdispls = new int[nprocs];
	for(int i=0; i<nprocs; ++i)
		sendcnt[i] = data_req[i].size();

	int * rdispls = new int[nprocs];
	int * recvcnt = new int[nprocs];
	World.Alltoall(sendcnt, 1, MPI::INT, recvcnt, 1, MPI::INT);	// share the request counts 

	sdispls[0] = 0;
	rdispls[0] = 0;
	for(int i=0; i<nprocs-1; ++i)
	{
		sdispls[i+1] = sdispls[i] + sendcnt[i];
		rdispls[i+1] = rdispls[i] + recvcnt[i];
	}
	IT totrecv = accumulate(recvcnt,recvcnt+nprocs,0);
	IT * recvbuf = new IT[totrecv];

	for(int i=0; i<nprocs; ++i)
	{
		copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
		vector<IT>().swap(data_req[i]);
	}
	World.Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>());  // request data
		
	// We will return the requested data, 
	// our return can be at most as big as the request
	// and smaller if we are missing some elements 
	IT * indsback = new IT[totrecv];
	NT * databack = new NT[totrecv];		

	int * ddispls = new int[nprocs];
	copy(rdispls, rdispls+nprocs, ddispls);
	for(int i=0; i<nprocs; ++i)
	{
		// this is not the most efficient method because it scans ind vector nprocs = sqrt(p) times
		IT * it = set_intersection(recvbuf+rdispls[i], recvbuf+rdispls[i]+recvcnt[i], ind.begin(), ind.end(), indsback+rdispls[i]);
		recvcnt[i] = (it - (indsback+rdispls[i]));	// update with size of the intersection
	
		IT vi = 0;
		for(int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j)	// fetch the numerical values
		{
			// indsback is a subset of ind
			while(indsback[j] > ind[vi]) 
				++vi;
			databack[j] = num[vi++];
		}
	}
		
	DeleteAll(recvbuf, ddispls);
	NT * databuf = new NT[ri.LocArrSize()];

	World.Alltoall(recvcnt, 1, MPI::INT, sendcnt, 1, MPI::INT);	// share the response counts, overriding request counts 
	World.Alltoallv(indsback, recvcnt, rdispls, MPIType<IT>(), sendbuf, sendcnt, sdispls, MPIType<IT>());  // send indices
	World.Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>());  // send data
	DeleteAll(rdispls, recvcnt, indsback, databack);

	// Now create the output from databuf (holds numerical values) and sendbuf (holds indices)
	// arr is already resized during its construction
	for(int i=0; i<nprocs; ++i)
	{
		// data will come globally sorted from processors 
		// i.e. ind owned by proc_i is always smaller than 
		// ind owned by proc_j for j < i
		for(int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)	
		{
			typename unordered_map<IT,IT>::iterator it = revr_map.find(sendbuf[j]);
			Indexed.arr[it->second] = databuf[j];
			// cout << it->second << "(" << sendbuf[j] << "):" << databuf[j] << endl;
		}
	}
	DeleteAll(sdispls, sendcnt, sendbuf, databuf);
	return Indexed;
}
  // currently only hacked for spheres, with radius and sd as two parameters
  bool HipGISAXS::fit_steepest_descent(real_t zcut,
          real_t radius_min, real_t radius_max, real_t radius_num,
          real_t sd_min, real_t sd_max, real_t sd_num,
          unsigned int dim, MPI::Intracomm& world_comm,
          int x_min, int x_max, int x_step) {
    int mpi_rank = world_comm.Get_rank();

    if(!init_steepest_fit(world_comm, zcut)) return false;

    int num_alphai = 0, num_phi = 0, num_tilt = 0;;

    real_t alphai_min, alphai_max, alphai_step;
    HiGInput::instance().scattering_alphai(alphai_min, alphai_max, alphai_step);
    if(alphai_max < alphai_min) alphai_max = alphai_min;
    if(alphai_min == alphai_max || alphai_step == 0) num_alphai = 1;
    else num_alphai = (alphai_max - alphai_min) / alphai_step + 1;

    real_t phi_min, phi_max, phi_step;
    HiGInput::instance().scattering_inplanerot(phi_min, phi_max, phi_step);
    if(phi_step == 0) num_phi = 1;
    else num_phi = (phi_max - phi_min) / phi_step + 1;

    real_t tilt_min, tilt_max, tilt_step;
    HiGInput::instance().scattering_tilt(tilt_min, tilt_max, tilt_step);
    if(tilt_step == 0) num_tilt = 1;
    else num_tilt = (tilt_max - tilt_min) / tilt_step + 1;

    std::cout << "**                    Num alphai: " << num_alphai << std::endl
          << "**                       Num phi: " << num_phi << std::endl
          << "**                      Num tilt: " << num_tilt << std::endl;

    // prepare parameters

    std::vector<std::vector<real_t> > params;
    int num_params = 2;
    std::vector<real_t> temp;
    real_t deltap = 0.0;
    if(radius_num <= 1)
      temp.push_back(radius_min);
    else {
      deltap = fabs(radius_max - radius_min) / (radius_num - 1);
      for(int i = 0; i < radius_num; ++ i) {
        temp.push_back(radius_min + i * deltap);
      } // for
    } // if-else
    params.push_back(temp);
    temp.clear();
    if(sd_num <= 1)
      temp.push_back(sd_min);
    else {
      deltap = fabs(sd_max - sd_min) / (sd_num - 1);
      for(int i = 0; i < sd_num; ++ i) {
        temp.push_back(sd_min + i * deltap);
      } // for
    } // if-else
    params.push_back(temp);
    temp.clear();

    // this will work only on one shape and one structure

    const real_t err_threshold = 1e-8;
    const unsigned int max_iter = 200;

    std::vector<real_t> param_vals;
    //param_vals.push_back(16.0);
    //param_vals.push_back(6.0);
    param_vals.push_back(23.0);
    param_vals.push_back(2.0);
    std::vector<real_t> param_deltas;
    param_deltas.push_back(0.05);
    param_deltas.push_back(0.05);
    real_t gamma_const = 0.05;

    real_t qdeltay = QGrid::instance().delta_y();

    real_t alpha_i = alphai_min;
    // high level of parallelism here (alphai, phi, tilt) for dynamicity ...
    for(int i = 0; i < num_alphai; i ++, alpha_i += alphai_step) {
      real_t alphai = alpha_i * PI_ / 180;
      real_t phi = phi_min;
      for(int j = 0; j < num_phi; j ++, phi += phi_step) {
        real_t tilt = tilt_min;
        for(int k = 0; k < num_tilt; k ++, tilt += tilt_step) {

          std::cout << "-- Computing reference GISAXS "
                << i * num_phi * num_tilt + j * num_tilt + k + 1 << " / "
                << num_alphai * num_phi * num_tilt
                << " [alphai = " << alpha_i << ", phi = " << phi
                << ", tilt = " << tilt << "] ..." << std::endl;

          /* run the reference gisaxs simulation using input params */
          real_t* ref_data = NULL;
          if(!run_gisaxs(alpha_i, alphai, phi, tilt, ref_data, world_comm)) {
            if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl;
            return false;
          } // if

          if(dim != 1) {
            std::cerr << "uh-oh: only 1D is supported for now" << std::endl;
            return false;
          } // if

          real_t* ref_z_cut = new (std::nothrow) real_t[nqy_];
          for(unsigned int iy = 0; iy < nqy_; ++ iy) {
            // assuming nqz_ == 1 ...
            ref_z_cut[iy] = ref_data[nqx_ * iy + 0];
          } // for

          delete[] ref_data;

          // this will store z cut values for each iteration for plotting later
          real_t* z_cuts = new (std::nothrow) real_t[nqy_ * max_iter];
          real_t* temp_zcuts = new (std::nothrow) real_t[nqy_];

          // do some preprocessing
          // start the main loop, bound by max_iter and err_threshold
          //   compute gisaxs for current parameter values
          //   compute the neighbors parameter values
          //   for 12 combinations of current and neighbors, compute gisaxs and error
          //   compute the derivatives (gradient) and error stuff
          //   update parameter values
          // compute the error surface

          real_t err = 10.0;
          std::vector<real_t> param1_list;
          std::vector<real_t> param2_list;
          structure_iterator_t structure_iter = HiGInput::instance().structure_begin();
          Structure* structure = &((*structure_iter).second);
          Shape* shape = HiGInput::instance().shape(*structure);
          shape_param_iterator_t shape_param = (*shape).param_begin();
          real_t* data = NULL;
          std::vector<real_t> param_error_data;
          for(unsigned int iter = 0; iter < max_iter; ++ iter) {
            param1_list.clear();
            param1_list.push_back(param_vals[0] - 2 * param_deltas[0]);  // p1mm
            param1_list.push_back(param_vals[0] - param_deltas[0]);    // p1m
            param1_list.push_back(param_vals[0]);            // p1
            param1_list.push_back(param_vals[0] + param_deltas[0]);    // p1p
            param1_list.push_back(param_vals[0] + 2 * param_deltas[0]);  // p1pp
            param2_list.clear();
            param2_list.push_back(param_vals[1] - 2 * param_deltas[1]);  // p2mm
            param2_list.push_back(param_vals[1] - param_deltas[1]);    // p2m
            param2_list.push_back(param_vals[1]);            // p2
            param2_list.push_back(param_vals[1] + param_deltas[1]);    // p2p
            param2_list.push_back(param_vals[1] + 2 * param_deltas[1]);  // p2pp

            // current point
            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              z_cuts[iter * nqy_ + iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err22 = compute_cut_fit_error(z_cuts + iter * nqy_, ref_z_cut, qdeltay);

            // 12 neighbors

            (*shape_param).second.mean(param1_list[0]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err02 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[1]);
            (*shape_param).second.deviation(param2_list[1]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err11 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[1]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err12 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[1]);
            (*shape_param).second.deviation(param2_list[3]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err13 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[0]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err20 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[1]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err21 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[3]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err23 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[2]);
            (*shape_param).second.deviation(param2_list[4]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err24 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[3]);
            (*shape_param).second.deviation(param2_list[1]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err31 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[3]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err32 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[3]);
            (*shape_param).second.deviation(param2_list[3]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err33 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            (*shape_param).second.mean(param1_list[4]);
            (*shape_param).second.deviation(param2_list[2]);
            if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
              if(mpi_rank == 0)
                std::cerr << "error: could not finish successfully" << std::endl;
              return false;
            } // if
            for(unsigned int iy = 0; iy < nqy_; ++ iy) {
              // assuming nqz_ == 1 ...
              temp_zcuts[iy] = data[nqx_ * iy];
            } // for
            delete[] data; data = NULL;
            real_t err42 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);

            // 22  0
            // 02  1mm
            // 11  1m2m
            // 12  1m
            // 13  1m2p
            // 20  2mm
            // 21  2m
            // 23  2p
            // 24  2pp
            // 31  1p2m
            // 32  1p
            // 33  1p2p
            // 42  1pp

            real_t derr1 = (err32 - err12) / (2 * param_deltas[0]);
            real_t derr2 = (err23 - err21) / (2 * param_deltas[1]);
            err = sqrt(derr1 * derr1 + derr2 * derr2);
            std::cout << "++ Iteration: " << iter << ", Error: " << err << std::endl;
            std::cout << "++ Parameter 1: " << param_vals[0]
                  << ", Parameter 2: " << param_vals[1] << std::endl;
            param_error_data.push_back(iter);
            param_error_data.push_back(param_vals[0]);
            param_error_data.push_back(param_vals[1]);
            param_error_data.push_back(err);
            if(err < err_threshold) break;

            real_t herr11 = (err42 + err02 - 2 * err22) /
                      (4 * param_deltas[0] * param_deltas[0]);
            real_t herr12 = (err33 - err13 - (err31 - err11)) /
                      (4 * param_deltas[0] * param_deltas[1]);
            real_t herr21 = (err33 - err13 - (err31 - err11)) /
                      (4 * param_deltas[0] * param_deltas[1]);
            real_t herr22 = (err24 + err20 - 2 * err22) /
                      (4 * param_deltas[1] * param_deltas[1]);
            real_t* herr = new (std::nothrow) real_t[2 * 2];
            herr[0] = herr11;
            herr[1] = herr12;
            herr[2] = herr21;
            herr[3] = herr22;
            real_t* herrinv;
            mldivide(2, herr, herrinv);

            param_vals[0] -= gamma_const * (herrinv[0] * derr1 + herrinv[1] * derr2);
            param_vals[1] -= gamma_const * (herrinv[2] * derr1 + herrinv[3] * derr2);

            delete[] herrinv;
            delete[] herr;
          } // for

          // compute the error surface
          std::vector<std::vector<real_t> >::iterator mean_iter = params.begin();
          std::vector<std::vector<real_t> >::iterator sd_iter = mean_iter + 1;
          std::vector<real_t> err_surface;
          for(std::vector<real_t>::iterator curr_mean = (*mean_iter).begin();
              curr_mean != (*mean_iter).end(); ++ curr_mean) {
            for(std::vector<real_t>::iterator curr_sd = (*sd_iter).begin();
                curr_sd != (*sd_iter).end(); ++ curr_sd) {
              (*shape_param).second.mean(*curr_mean);
              (*shape_param).second.deviation(*curr_sd);
              if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) {
                if(mpi_rank == 0)
                  std::cerr << "error: could not finish successfully" << std::endl;
                return false;
              } // if
              for(unsigned int iy = 0; iy < nqy_; ++ iy) {
                // assuming nqz_ == 1 ...
                temp_zcuts[iy] = data[nqx_ * iy];
              } // for
              delete[] data; data = NULL;
              real_t curr_err = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay);
              err_surface.push_back(*curr_mean);
              err_surface.push_back(*curr_sd);
              err_surface.push_back(curr_err);
            } // for
          } // for

          // write data to files
          // define output filename
          std::stringstream alphai_b, phi_b, tilt_b;
          std::string alphai_s, phi_s, tilt_s;
          alphai_b << alpha_i; alphai_s = alphai_b.str();
          phi_b << phi; phi_s = phi_b.str();
          tilt_b << tilt; tilt_s = tilt_b.str();
          std::string param_error_file(HiGInput::instance().param_pathprefix() +
                        "/" + HiGInput::instance().runname() +
                        "/param_error_ai=" + alphai_s + "_rot=" + phi_s +
                        "_tilt=" + tilt_s + ".dat");
          std::string z_cut_file(HiGInput::instance().param_pathprefix() +
                        "/" + HiGInput::instance().runname() +
                        "/z_cut_ai=" + alphai_s + "_rot=" + phi_s +
                        "_tilt=" + tilt_s + ".dat");
          std::string err_surf_file(HiGInput::instance().param_pathprefix() +
                        "/" + HiGInput::instance().runname() +
                        "/err_surf_ai=" + alphai_s + "_rot=" + phi_s +
                        "_tilt=" + tilt_s + ".dat");
          // write param_error_data
          std::ofstream param_error_f(param_error_file.c_str());
          for(std::vector<real_t>::iterator pei = param_error_data.begin();
              pei != param_error_data.end(); pei += 4) {
            param_error_f << *pei << "\t" << *(pei + 1) << "\t" << *(pei + 2) << "\t"
                    << *(pei + 3) << std::endl;
          } // for
          param_error_f.close();
          // write ref_z_cut and z_cuts
          std::ofstream zcut_f(z_cut_file.c_str());
          for(unsigned int yy = 0; yy < nqy_; ++ yy) {
            zcut_f << ref_z_cut[yy] << "\t";
          } // for
          zcut_f << std::endl;
          for(unsigned int i = 0; i < max_iter; ++ i) {
            for(unsigned int yy = 0; yy < nqy_; ++ yy) {
              zcut_f << z_cuts[i * nqy_ + yy] << "\t";
            } // for
            zcut_f << std::endl;
          } // for
          zcut_f.close();
          // write error surface
          std::ofstream err_surf_f(err_surf_file.c_str());
          for(std::vector<real_t>::iterator surfi = err_surface.begin();
              surfi != err_surface.end(); surfi += 3) {
            err_surf_f << *surfi << "\t" << *(surfi + 1) << "\t" << *(surfi + 2) << std::endl;
          } // for
          err_surf_f.close();

          (*shape_param).second.mean(22.0);
          (*shape_param).second.deviation(7.0);

          param_error_data.clear();
          delete[] temp_zcuts;
          delete[] z_cuts;
          delete[] ref_z_cut;

          std::cout << "parameter values: " << param_vals[0] << ", " << param_vals[1]
                << " [error: " << err << "]" << std::endl;

          // synchronize all procs after each run
          world_comm.Barrier();
        } // for tilt
      } // for phi
    } // for alphai

    return true;
  } // HipGISAXS::fit_all_gisaxs()
Beispiel #27
0
 void globalAdd(MPI::Intracomm& mpiComm, int& value)
 {
   int valCopy = value;
   mpiComm.Allreduce(&valCopy, &value, 1, MPI_INT, MPI_SUM);
 }
Beispiel #28
0
 void globalAdd(MPI::Intracomm& mpiComm, double& value)
 {
   double valCopy = value;
   mpiComm.Allreduce(&valCopy, &value, 1, MPI_DOUBLE, MPI_SUM);
 }
void LocalScalar3D<real>::Load2(BlockManager& blockManager, const int step, const char* label) {
	MPI::Intracomm comm = blockManager.getCommunicator();

	ostringstream ossFileName;
	ossFileName << "./BIN/";
	ossFileName << "dump-";
	ossFileName << label;
	ossFileName << "-";
	ossFileName.width(5);
	ossFileName.setf(ios::fixed);
	ossFileName.fill('0');
	ossFileName << comm.Get_rank();
	ossFileName << "-";
	ossFileName.width(10);
	ossFileName.setf(ios::fixed);
	ossFileName.fill('0');
	ossFileName << step;
	ossFileName << ".bin";

	int nx = 0;
	int ny = 0;
	int nz = 0;
	int nv = 0;
	int ne = 0;
	int nb = 0;

	ifstream ifs;
	ifs.open(ossFileName.str().c_str(), ios::in | ios::binary);
	ifs.read((char*)&nx, sizeof(int));
	ifs.read((char*)&ny, sizeof(int));
	ifs.read((char*)&nz, sizeof(int));
	ifs.read((char*)&nv, sizeof(int));
	ifs.read((char*)&ne, sizeof(int));
	ifs.read((char*)&nb, sizeof(int));

	int cx = nx + 2*nv;
	int cy = ny + 2*nv;
	int cz = nz + 2*nv;

	BlockBase* block = blockManager.getBlock(0);
	::Vec3i size = block->getSize();

	if( nx == size.x && ny == size.y && nz == size.z && nv == vc && ne == 1 && nb == blockManager.getNumBlock() ) {
		for (int id = 0; id < blockManager.getNumBlock(); ++id) {
			block = blockManager.getBlock(id);

			real* pData = GetBlockData(block);

			ifs.read((char*)pData, sizeof(real)*cx*cy*cz);
		}
	} else if( 2*nx == size.x && 2*ny == size.y && 2*nz == size.z && nv == vc && ne == 1 && nb == blockManager.getNumBlock() ) {
		real *pDataS = new real [cx*cy*cz];
		for (int id = 0; id < blockManager.getNumBlock(); ++id) {
			block = blockManager.getBlock(id);

			real* pData = GetBlockData(block);

			ifs.read((char*)pDataS, sizeof(real)*cx*cy*cz);

			int sz[3] = {2*nx, 2*ny, 2*nz};
			sf3d_copy_x2_(
					(real*)pData,
					(real*)pDataS,
					(int*)sz,
					(int*)&vc);
		}
		delete [] pDataS;
	} else {
		Exit(0);
	}

	ifs.close();

	ImposeBoundaryCondition(blockManager);
}
ifstream& FullyDistSpVec<IT,NT>::ReadDistribute (ifstream& infile, int master)
{
	IT total_nnz;
	MPI::Intracomm World = commGrid->GetWorld();
	int neighs = World.Get_size();	// number of neighbors (including oneself)
	int buffperneigh = MEMORYINBYTES / (neighs * (sizeof(IT) + sizeof(NT)));

	int * displs = new int[neighs];
	for (int i=0; i<neighs; ++i)
		displs[i] = i*buffperneigh;

	int * curptrs = NULL; 
	int recvcount = 0;
	IT * inds = NULL; 
	NT * vals = NULL;
	int rank = World.Get_rank();	
	if(rank == master)	// 1 processor only
	{		
		inds = new IT [ buffperneigh * neighs ];
		vals = new NT [ buffperneigh * neighs ];
		curptrs = new int[neighs]; 
		fill_n(curptrs, neighs, 0);	// fill with zero
		if (infile.is_open())
		{
			infile.clear();
			infile.seekg(0);
			infile >> glen >> total_nnz;
			World.Bcast(&glen, 1, MPIType<IT>(), master);			
	
			IT tempind;
			NT tempval;
			double loadval;
			IT cnz = 0;
			while ( (!infile.eof()) && cnz < total_nnz)
			{
				infile >> tempind;
				//infile >> tempval;
				infile >> loadval;
				tempval = static_cast<NT>(loadval);
				tempind--;
				IT locind;
				int rec = Owner(tempind, locind);	// recipient (owner) processor
				inds[ rec * buffperneigh + curptrs[rec] ] = locind;
				vals[ rec * buffperneigh + curptrs[rec] ] = tempval;
				++ (curptrs[rec]);				

				if(curptrs[rec] == buffperneigh || (cnz == (total_nnz-1)) )		// one buffer is full, or file is done !
				{
					// first, send the receive counts ...
					World.Scatter(curptrs, 1, MPI::INT, &recvcount, 1, MPI::INT, master);

					// generate space for own recv data ... (use arrays because vector<bool> is cripled, if NT=bool)
					IT * tempinds = new IT[recvcount];
					NT * tempvals = new NT[recvcount];
					
					// then, send all buffers that to their recipients ...
					World.Scatterv(inds, curptrs, displs, MPIType<IT>(), tempinds, recvcount,  MPIType<IT>(), master); 
					World.Scatterv(vals, curptrs, displs, MPIType<NT>(), tempvals, recvcount,  MPIType<NT>(), master); 
		
					// now push what is ours to tuples
					for(IT i=0; i< recvcount; ++i)
					{					
						ind.push_back( tempinds[i] );	// already offset'd by the sender
						num.push_back( tempvals[i] );
					}

					// reset current pointers so that we can reuse {inds,vals} buffers
					fill_n(curptrs, neighs, 0);
					DeleteAll(tempinds, tempvals);
				}
				++ cnz;
			}
			assert (cnz == total_nnz);
		
			// Signal the end of file to other processors along the diagonal
			fill_n(curptrs, neighs, numeric_limits<int>::max());	
			World.Scatter(curptrs, 1, MPI::INT, &recvcount, 1, MPI::INT, master);
		}