int main(int argc, char* argv[]) { MPI::Init(argc, argv); rank = MPI::COMM_WORLD.Get_rank(); size = MPI::COMM_WORLD.Get_size(); if (size < 2) MPI::COMM_WORLD.Abort(1); if (size < 1+COLS+ROWS) MPI::COMM_WORLD.Abort(1); MPI::Group globalGroup = MPI::COMM_WORLD.Get_group(); if (0 == rank) { int matrix[COLS][ROWS], xirtam[ROWS][COLS]; srand(time(0)); for (int i=0; i<COLS; i++) for (int j=0; j<ROWS; j++) { matrix[i][j] = 9 * (double)rand() / (double)RAND_MAX; xirtam[j][i] = matrix[i][j]; } cout << "random matrica: " << endl; for (int i=0; i<COLS; i++) { for (int j=0; j<ROWS; j++) cout << matrix[i][j] << " "; cout << endl; } } else { MPI::Group group; MPI::Intracomm comm; int j=0, k=0; for (int i=1; i<size; i++) if (i % 2) ranksA[j++] = i; else ranksB[k++] = i; if (rank % 2) group = globalGroup.Incl(size / 2 + size % 2, ranksA); else group = globalGroup.Incl(size / 2, ranksB); comm = MPI::COMM_WORLD.Create(group); int newRank = comm.Get_rank(); pline(); cout << rank << ", " << newRank << ", " << powerSum << endl; fflush(stdout); group.Free(); comm.Free(); } //comm.Free(); MPI::Finalize(); return 0; }
void LocalScalar3D<real>::Dump(BlockManager& blockManager, const int step, const char* label) { ImposeBoundaryCondition(blockManager); MPI::Intracomm comm = blockManager.getCommunicator(); ostringstream ossFileNameTime; ossFileNameTime << "./BIN/"; mkdir(ossFileNameTime.str().c_str(), 0755); #ifdef _BLOCK_IS_LARGE_ #else #endif for (int id = 0; id < blockManager.getNumBlock(); ++id) { BlockBase* block = blockManager.getBlock(id); ::Vec3i size = block->getSize(); Vec3d origin = block->getOrigin(); Vec3d blockSize = block->getBlockSize(); Vec3d cellSize = block->getCellSize(); int level = block->getLevel(); ostringstream ossFileName; ossFileName << "./BIN/"; ossFileName << "dump-"; ossFileName << label; ossFileName << "-"; ossFileName.width(5); ossFileName.setf(ios::fixed); ossFileName.fill('0'); ossFileName << comm.Get_rank(); ossFileName << "-"; ossFileName.width(5); ossFileName.setf(ios::fixed); ossFileName.fill('0'); ossFileName << id; ossFileName << "-"; ossFileName.width(10); ossFileName.setf(ios::fixed); ossFileName.fill('0'); ossFileName << step; ossFileName << ".bin"; int cx = size.x + 2*vc; int cy = size.y + 2*vc; int cz = size.z + 2*vc; int iNE = 1; real* pData = GetBlockData(block); ofstream ofs; ofs.open(ossFileName.str().c_str(), ios::out | ios::binary); ofs.write((char*)&size.x, sizeof(int)); ofs.write((char*)&size.y, sizeof(int)); ofs.write((char*)&size.z, sizeof(int)); ofs.write((char*)&vc , sizeof(int)); ofs.write((char*)&iNE , sizeof(int)); ofs.write((char*)pData , sizeof(real)*cx*cy*cz); ofs.close(); } }
/** \brief * In many situations a rank computes a number of local DOFs. Then all * ranks want to know the number of global DOFs and the starting * displacment number of the DOF numbering in each rank. * * \param[in] mpiComm The MPI communicator. * \param[in] nRankDofs The number of local DOFs. * \param[out] rStartDofs Displacment of the DOF numbering. On rank n * this is the sum of all local DOF numbers in * ranks 0 to n - 1. * \param[out] nOverallDofs Global sum of nRankDofs. Is equal on all * ranks. */ inline void getDofNumbering(MPI::Intracomm& mpiComm, int nRankDofs, int& rStartDofs, int& nOverallDofs) { rStartDofs = 0; nOverallDofs = 0; mpiComm.Scan(&nRankDofs, &rStartDofs, 1, MPI_INT, MPI_SUM); rStartDofs -= nRankDofs; mpiComm.Allreduce(&nRankDofs, &nOverallDofs, 1, MPI_INT, MPI_SUM); }
// not necessary to create a new comm object MPI::Intracomm init_workers(const MPI::Intracomm &comm_world, int managerid) { // get old group MPI::Group world_group = comm_world.Get_group(); // create new group from old group int worker_size = comm_world.Get_size() - 1; int *workers = new int[worker_size]; for (int i = 0, id = 0; i < worker_size; ++i, ++id) { if (id == managerid) ++id; // skip the manager id workers[i] = id; } MPI::Group worker_group = world_group.Incl(worker_size, workers); delete [] workers; return comm_world.Create(worker_group); }
/* * Compute, store and return total number of atoms on all processors. */ void AtomStorage::computeNAtomTotal(MPI::Intracomm& communicator) { // If nAtomTotal is already set, do nothing and return. // if (nAtomTotal_.isSet()) return; int nAtomLocal = nAtom(); int nAtomTotal = 0; communicator.Reduce(&nAtomLocal, &nAtomTotal, 1, MPI::INT, MPI::SUM, 0); if (communicator.Get_rank() !=0) { nAtomTotal = 0; } nAtomTotal_.set(nAtomTotal); }
bool recvData(std::vector<double>& receivedData) { bool isDataReceived = false; if ( intraComm != MPI::COMM_NULL) { MPI::Status status; double buffer[100]; intraComm.Recv(buffer, 100, MPI::DOUBLE, MPI::ANY_SOURCE, /*tag*/ 100, status); int count = status.Get_count(MPI::DOUBLE); receivedData = std::vector<double>(buffer, buffer+count); log.Info() << "RECV [ " << getRank() << " <-- " << status.Get_source() << " ] data : " << receivedData << std::endl; isDataReceived = true; }else { log.Err() << "PID " << getProcessId() << " failed to RECV" << std::endl; } return isDataReceived; }
void MpiFileIo::setIoCommunicator(MPI::Intracomm& communicator) { communicatorPtr_ = &communicator; if (communicator.Get_rank() == 0) { isIoProcessor_ = true; } else { isIoProcessor_ = false; } }
/* * Send a block (nonblocking) */ void MemoryOArchive::iSend(MPI::Intracomm& comm, MPI::Request& req, int dest) { int comm_size = comm.Get_size(); int myRank = comm.Get_rank(); // Preconditions if (dest > comm_size - 1 || dest < 0) { UTIL_THROW("Destination rank out of bounds"); } if (dest == myRank) { UTIL_THROW("Source and desination identical"); } size_t sendBytes = cursor_ - buffer_; size_t* sizePtr = (size_t*)buffer_; *sizePtr = sendBytes; req = comm.Isend(buffer_, sendBytes, MPI::UNSIGNED_CHAR, dest, 5); }
/* * Receive a block. */ void PackedData::recv(MPI::Intracomm& comm, int source) { MPI::Request request; int myRank = comm.Get_rank(); int comm_size = comm.Get_size(); // Preconditons if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } if (source == myRank) { UTIL_THROW("Source and desination identical"); } request = comm.Irecv(begin_, capacity_, MPI::UNSIGNED_CHAR, source, 5); request.Wait(); cursor_ = begin_; }
/* * Send and receive buffer. */ void Buffer::sendRecv(MPI::Intracomm& comm, int source, int dest) { MPI::Request request[2]; int sendBytes = 0; int myRank = comm.Get_rank(); int comm_size = comm.Get_size(); // Preconditions if (dest > comm_size - 1 || dest < 0) { UTIL_THROW("Destination rank out of bounds"); } if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } if (dest == myRank) { UTIL_THROW("Destination and my rank are identical"); } if (source == myRank) { UTIL_THROW("Source and my rank are identical"); } // Start nonblocking receive. request[0] = comm.Irecv(recvBufferBegin_, bufferCapacity_ , MPI::CHAR, source, 5); // Start nonblocking send. sendBytes = sendPtr_ - sendBufferBegin_; request[1] = comm.Isend(sendBufferBegin_, sendBytes , MPI::CHAR, dest, 5); // Wait for completion of receive. request[0].Wait(); recvPtr_ = recvBufferBegin_; // Wait for completion of send. request[1].Wait(); // Update statistics. if (sendBytes > maxSendLocal_) { maxSendLocal_ = sendBytes; } }
/* * Receive a buffer. */ void Buffer::recv(MPI::Intracomm& comm, int source) { MPI::Request request; int myRank = comm.Get_rank(); int comm_size = comm.Get_size(); // Preconditons if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } if (source == myRank) { UTIL_THROW("Source and destination identical"); } request = comm.Irecv(recvBufferBegin_, bufferCapacity_, MPI::CHAR, source, 5); request.Wait(); recvType_ = NONE; recvPtr_ = recvBufferBegin_; }
/* * Send a block. */ void PackedData::send(MPI::Intracomm& comm, int dest) { MPI::Request request; int sendBytes = 0; int comm_size = comm.Get_size(); int myRank = comm.Get_rank(); // Preconditions if (dest > comm_size - 1 || dest < 0) { UTIL_THROW("Destination rank out of bounds"); } if (dest == myRank) { UTIL_THROW("Source and desination identical"); } sendBytes = cursor_ - begin_; request = comm.Isend(begin_, sendBytes, MPI::UNSIGNED_CHAR, dest, 5); request.Wait(); }
int getRank() const { int rank = Network::INVALID_VALUE; if ( intraComm != MPI::COMM_NULL) { rank = intraComm.Get_rank(); }else { log.Err() << "getRank() returns invalid\n"; } return rank; }
int getTopology() const { int topology = Network::INVALID_VALUE; if ( intraComm != MPI::COMM_NULL) { topology = intraComm.Get_topology(); }else { log.Err() << "getTopology() returns invalid\n"; } return topology; }
int getSize() const { int size = Network::INVALID_VALUE; if ( intraComm != MPI::COMM_NULL) { size = intraComm.Get_size(); }else { log.Err() << "getSize() returns invalid\n"; } return size; }
/* * Reduce (add) distributions from multiple MPI processors. */ void Distribution::reduce(MPI::Intracomm& communicator, int root) { long* totHistogram = new long[nBin_]; communicator.Reduce(histogram_.cArray(), totHistogram, nBin_, MPI::LONG, MPI::SUM, root); if (communicator.Get_rank() == root) { for (int i=0; i < nBin_; ++i) { histogram_[i] = totHistogram[i]; } } else { for (int i=0; i < nBin_; ++i) { histogram_[i] = 0.0; } } delete totHistogram; long totSample; communicator.Reduce(&nSample_, &totSample, 1, MPI::LONG, MPI::SUM, root); if (communicator.Get_rank() == root) { nSample_ = totSample; } else { nSample_ = 0; } long totReject; communicator.Reduce(&nReject_, &totReject, 1, MPI::LONG, MPI::SUM, root); if (communicator.Get_rank() == root) { nReject_ = totReject; } else { nReject_ = 0; } }
/* * Broadcast a buffer. */ void Buffer::bcast(MPI::Intracomm& comm, int source) { int comm_size = comm.Get_size(); int myRank = comm.Get_rank(); if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } int sendBytes; if (myRank == source) { sendBytes = sendPtr_ - sendBufferBegin_; comm.Bcast(&sendBytes, 1, MPI::INT, source); comm.Bcast(sendBufferBegin_, sendBytes, MPI::CHAR, source); sendPtr_ = sendBufferBegin_; sendType_ = NONE; } else { comm.Bcast(&sendBytes, 1, MPI::INT, source); comm.Bcast(recvBufferBegin_, sendBytes, MPI::CHAR, source); recvPtr_ = recvBufferBegin_; recvType_ = NONE; } if (sendBytes > maxSendLocal_) { maxSendLocal_ = sendBytes; } }
// Metropolis-Hastings population size resampling; not used anymore void resample_popsizes_mh(ArgModel *model, const LocalTrees *trees, bool sample_popsize_recomb, double heat) { list<PopsizeConfigParam> &l = model->popsize_config.params; double curr_like = sample_popsize_recomb ? calc_arg_prior(model, trees) : calc_arg_prior_recomb_integrate(model, trees, NULL, NULL, NULL); #ifdef ARGWEAVER_MPI MPI::Intracomm *comm = model->mc3.group_comm; int rank = comm->Get_rank(); comm->Reduce(rank == 0 ? MPI_IN_PLACE : &curr_like, &curr_like, 1, MPI::DOUBLE, MPI_SUM, 0); #endif for (int rep=0; rep < model->popsize_config.numsample; rep++) { int idx=0; for (list<PopsizeConfigParam>::iterator it = l.begin(); it != l.end(); it++) { curr_like = resample_single_popsize_mh(model, trees, sample_popsize_recomb, heat, it, curr_like, idx++); } } }
/// Octree情報を他rankにブロードキャスト. void BCMOctree::broadcast(MPI::Intracomm& comm) { assert(comm.Get_rank() == 0); rootGrid->broadcast(comm); int numLeafNode = leafNodeArray.size(); int ibuf[2]; ibuf[0] = numLeafNode; ibuf[1] = ordering; comm.Bcast(&ibuf, 2, MPI::INT, 0); size_t size = Pedigree::GetSerializeSize(); unsigned char* buf = new unsigned char[size * numLeafNode]; size_t ip = 0; for (int id = 0; id < rootGrid->getSize(); id++) { packPedigrees(rootNodes[id], ip, buf); } comm.Bcast(buf, size*numLeafNode, MPI::BYTE, 0); delete[] buf; }
/* * Receive a block. */ void MemoryIArchive::recv(MPI::Intracomm& comm, int source) { int myRank = comm.Get_rank(); int comm_size = comm.Get_size(); // Preconditions if (source > comm_size - 1 || source < 0) { UTIL_THROW("Source rank out of bounds"); } if (source == myRank) { UTIL_THROW("Source and desination identical"); } size_t recvCapacity = capacity_ + sizeof(size_t); comm.Recv(buffer_, recvCapacity, MPI::UNSIGNED_CHAR, source, 5); begin_ = buffer_ + sizeof(size_t); cursor_ = begin_; size_t* sizePtr = (size_t*) buffer_; size_t size = *sizePtr; end_ = buffer_ + size; }
FullyDistSpVec<IT, IT> FullyDistSpVec<IT, NT>::sort() { MPI::Intracomm World = commGrid->GetWorld(); FullyDistSpVec<IT,IT> temp(commGrid); IT nnz = getlocnnz(); pair<NT,IT> * vecpair = new pair<NT,IT>[nnz]; int nprocs = World.Get_size(); int rank = World.Get_rank(); IT * dist = new IT[nprocs]; dist[rank] = nnz; World.Allgather(MPI::IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>()); IT sizeuntil = accumulate(dist, dist+rank, 0); for(IT i=0; i< nnz; ++i) { vecpair[i].first = num[i]; // we'll sort wrt numerical values vecpair[i].second = ind[i] + sizeuntil; } SpParHelper::MemoryEfficientPSort(vecpair, nnz, dist, World); vector< IT > nind(nnz); vector< IT > nnum(nnz); for(IT i=0; i< nnz; ++i) { num[i] = vecpair[i].first; // sorted range (change the object itself) nind[i] = ind[i]; // make sure the sparsity distribution is the same nnum[i] = vecpair[i].second; // inverse permutation stored as numerical values } delete [] vecpair; delete [] dist; temp.NOT_FOUND = NOT_FOUND; temp.glen = glen; temp.ind = nind; temp.num = nnum; return temp; }
/* * Send a buffer. */ void Buffer::send(MPI::Intracomm& comm, int dest) { MPI::Request request; int sendBytes = 0; int comm_size = comm.Get_size(); int myRank = comm.Get_rank(); // Preconditions if (dest > comm_size - 1 || dest < 0) { UTIL_THROW("Destination rank out of bounds"); } if (dest == myRank) { UTIL_THROW("Source and destination identical"); } sendBytes = sendPtr_ - sendBufferBegin_; request = comm.Isend(sendBufferBegin_, sendBytes, MPI::CHAR, dest, 5); request.Wait(); // Update statistics. if (sendBytes > maxSendLocal_) { maxSendLocal_ = sendBytes; } }
bool sendDataTo(const std::vector<double>& inData, int dest) { bool isDataSent = false; if ( intraComm != MPI::COMM_NULL) { log.Info() << "SEND [ " << getRank() << " --> " << dest << " ] data : " << inData << std::endl; intraComm.Send( &(inData[0]), inData.size(), MPI::DOUBLE, dest, /*tag*/ 100); isDataSent = true; }else { log.Err() << "PID " << getProcessId() << " failed to SEND\n"; } return isDataSent; }
void transfH_MPI(mblock<double>** Ap, mblock<float>** A, unsigned* seq_part) { unsigned rank = COMM_AHMED.Get_rank(), nproc = COMM_AHMED.Get_size(); unsigned info[8]; if (rank==0) { copyH(seq_part[1]-seq_part[0], Ap, A); for (unsigned j=1; j<nproc; ++j) { for (unsigned i=seq_part[j]; i<seq_part[j+1]; ++i) { COMM_AHMED.Recv(info, 8, MPI::UNSIGNED, j, 7); A[i] = new mblock<float>(info[0], info[1]); float* tmp = NULL; if (info[7]) { tmp = new float[info[7]]; COMM_AHMED.Recv(tmp, info[7], MPI::FLOAT, j, 8); } A[i]->cpy_mbl(info+2, tmp); delete [] tmp; } } } else { for (unsigned i=seq_part[rank]; i<seq_part[rank+1]; ++i) { mblock<double>* p = Ap[i-seq_part[rank]]; info[0] = p->getn1(); info[1] = p->getn2(); p->get_prop(info+2); info[7] = p->nvals(); COMM_AHMED.Send(info, 8, MPI::UNSIGNED, 0, 7); if (info[7]) { float* tmp = new float[info[7]]; for (unsigned i=0; i<info[7]; ++i) tmp[i] = (float) p->getdata()[i]; COMM_AHMED.Send(tmp, info[7], MPI::FLOAT, 0, 8); delete [] tmp; } } } }
FullyDistVec<IT,NT> FullyDistSpVec<IT,NT>::operator() (const FullyDistVec<IT,IT> & ri) const { MPI::Intracomm World = commGrid->GetWorld(); // FullyDistVec ( shared_ptr<CommGrid> grid, IT globallen, NT initval, NT id); FullyDistVec<IT,NT> Indexed(ri.commGrid, ri.glen, zero, zero); int nprocs = World.Get_size(); unordered_map<IT, IT> revr_map; // inverted index that maps indices of *this to indices of output vector< vector<IT> > data_req(nprocs); IT locnnz = ri.LocArrSize(); // ABAB: Input sanity check int local = 1; int whole = 1; for(IT i=0; i < locnnz; ++i) { if(ri.arr[i] >= glen || ri.arr[i] < 0) { local = 0; } } World.Allreduce( &local, &whole, 1, MPI::INT, MPI::BAND); if(whole == 0) { throw outofrangeexception(); } for(IT i=0; i < locnnz; ++i) { IT locind; int owner = Owner(ri.arr[i], locind); // numerical values in ri are 0-based data_req[owner].push_back(locind); revr_map.insert(typename unordered_map<IT, IT>::value_type(locind, i)); } IT * sendbuf = new IT[locnnz]; int * sendcnt = new int[nprocs]; int * sdispls = new int[nprocs]; for(int i=0; i<nprocs; ++i) sendcnt[i] = data_req[i].size(); int * rdispls = new int[nprocs]; int * recvcnt = new int[nprocs]; World.Alltoall(sendcnt, 1, MPI::INT, recvcnt, 1, MPI::INT); // share the request counts sdispls[0] = 0; rdispls[0] = 0; for(int i=0; i<nprocs-1; ++i) { sdispls[i+1] = sdispls[i] + sendcnt[i]; rdispls[i+1] = rdispls[i] + recvcnt[i]; } IT totrecv = accumulate(recvcnt,recvcnt+nprocs,0); IT * recvbuf = new IT[totrecv]; for(int i=0; i<nprocs; ++i) { copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]); vector<IT>().swap(data_req[i]); } World.Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>()); // request data // We will return the requested data, // our return can be at most as big as the request // and smaller if we are missing some elements IT * indsback = new IT[totrecv]; NT * databack = new NT[totrecv]; int * ddispls = new int[nprocs]; copy(rdispls, rdispls+nprocs, ddispls); for(int i=0; i<nprocs; ++i) { // this is not the most efficient method because it scans ind vector nprocs = sqrt(p) times IT * it = set_intersection(recvbuf+rdispls[i], recvbuf+rdispls[i]+recvcnt[i], ind.begin(), ind.end(), indsback+rdispls[i]); recvcnt[i] = (it - (indsback+rdispls[i])); // update with size of the intersection IT vi = 0; for(int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j) // fetch the numerical values { // indsback is a subset of ind while(indsback[j] > ind[vi]) ++vi; databack[j] = num[vi++]; } } DeleteAll(recvbuf, ddispls); NT * databuf = new NT[ri.LocArrSize()]; World.Alltoall(recvcnt, 1, MPI::INT, sendcnt, 1, MPI::INT); // share the response counts, overriding request counts World.Alltoallv(indsback, recvcnt, rdispls, MPIType<IT>(), sendbuf, sendcnt, sdispls, MPIType<IT>()); // send indices World.Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>()); // send data DeleteAll(rdispls, recvcnt, indsback, databack); // Now create the output from databuf (holds numerical values) and sendbuf (holds indices) // arr is already resized during its construction for(int i=0; i<nprocs; ++i) { // data will come globally sorted from processors // i.e. ind owned by proc_i is always smaller than // ind owned by proc_j for j < i for(int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j) { typename unordered_map<IT,IT>::iterator it = revr_map.find(sendbuf[j]); Indexed.arr[it->second] = databuf[j]; // cout << it->second << "(" << sendbuf[j] << "):" << databuf[j] << endl; } } DeleteAll(sdispls, sendcnt, sendbuf, databuf); return Indexed; }
// currently only hacked for spheres, with radius and sd as two parameters bool HipGISAXS::fit_steepest_descent(real_t zcut, real_t radius_min, real_t radius_max, real_t radius_num, real_t sd_min, real_t sd_max, real_t sd_num, unsigned int dim, MPI::Intracomm& world_comm, int x_min, int x_max, int x_step) { int mpi_rank = world_comm.Get_rank(); if(!init_steepest_fit(world_comm, zcut)) return false; int num_alphai = 0, num_phi = 0, num_tilt = 0;; real_t alphai_min, alphai_max, alphai_step; HiGInput::instance().scattering_alphai(alphai_min, alphai_max, alphai_step); if(alphai_max < alphai_min) alphai_max = alphai_min; if(alphai_min == alphai_max || alphai_step == 0) num_alphai = 1; else num_alphai = (alphai_max - alphai_min) / alphai_step + 1; real_t phi_min, phi_max, phi_step; HiGInput::instance().scattering_inplanerot(phi_min, phi_max, phi_step); if(phi_step == 0) num_phi = 1; else num_phi = (phi_max - phi_min) / phi_step + 1; real_t tilt_min, tilt_max, tilt_step; HiGInput::instance().scattering_tilt(tilt_min, tilt_max, tilt_step); if(tilt_step == 0) num_tilt = 1; else num_tilt = (tilt_max - tilt_min) / tilt_step + 1; std::cout << "** Num alphai: " << num_alphai << std::endl << "** Num phi: " << num_phi << std::endl << "** Num tilt: " << num_tilt << std::endl; // prepare parameters std::vector<std::vector<real_t> > params; int num_params = 2; std::vector<real_t> temp; real_t deltap = 0.0; if(radius_num <= 1) temp.push_back(radius_min); else { deltap = fabs(radius_max - radius_min) / (radius_num - 1); for(int i = 0; i < radius_num; ++ i) { temp.push_back(radius_min + i * deltap); } // for } // if-else params.push_back(temp); temp.clear(); if(sd_num <= 1) temp.push_back(sd_min); else { deltap = fabs(sd_max - sd_min) / (sd_num - 1); for(int i = 0; i < sd_num; ++ i) { temp.push_back(sd_min + i * deltap); } // for } // if-else params.push_back(temp); temp.clear(); // this will work only on one shape and one structure const real_t err_threshold = 1e-8; const unsigned int max_iter = 200; std::vector<real_t> param_vals; //param_vals.push_back(16.0); //param_vals.push_back(6.0); param_vals.push_back(23.0); param_vals.push_back(2.0); std::vector<real_t> param_deltas; param_deltas.push_back(0.05); param_deltas.push_back(0.05); real_t gamma_const = 0.05; real_t qdeltay = QGrid::instance().delta_y(); real_t alpha_i = alphai_min; // high level of parallelism here (alphai, phi, tilt) for dynamicity ... for(int i = 0; i < num_alphai; i ++, alpha_i += alphai_step) { real_t alphai = alpha_i * PI_ / 180; real_t phi = phi_min; for(int j = 0; j < num_phi; j ++, phi += phi_step) { real_t tilt = tilt_min; for(int k = 0; k < num_tilt; k ++, tilt += tilt_step) { std::cout << "-- Computing reference GISAXS " << i * num_phi * num_tilt + j * num_tilt + k + 1 << " / " << num_alphai * num_phi * num_tilt << " [alphai = " << alpha_i << ", phi = " << phi << ", tilt = " << tilt << "] ..." << std::endl; /* run the reference gisaxs simulation using input params */ real_t* ref_data = NULL; if(!run_gisaxs(alpha_i, alphai, phi, tilt, ref_data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if if(dim != 1) { std::cerr << "uh-oh: only 1D is supported for now" << std::endl; return false; } // if real_t* ref_z_cut = new (std::nothrow) real_t[nqy_]; for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... ref_z_cut[iy] = ref_data[nqx_ * iy + 0]; } // for delete[] ref_data; // this will store z cut values for each iteration for plotting later real_t* z_cuts = new (std::nothrow) real_t[nqy_ * max_iter]; real_t* temp_zcuts = new (std::nothrow) real_t[nqy_]; // do some preprocessing // start the main loop, bound by max_iter and err_threshold // compute gisaxs for current parameter values // compute the neighbors parameter values // for 12 combinations of current and neighbors, compute gisaxs and error // compute the derivatives (gradient) and error stuff // update parameter values // compute the error surface real_t err = 10.0; std::vector<real_t> param1_list; std::vector<real_t> param2_list; structure_iterator_t structure_iter = HiGInput::instance().structure_begin(); Structure* structure = &((*structure_iter).second); Shape* shape = HiGInput::instance().shape(*structure); shape_param_iterator_t shape_param = (*shape).param_begin(); real_t* data = NULL; std::vector<real_t> param_error_data; for(unsigned int iter = 0; iter < max_iter; ++ iter) { param1_list.clear(); param1_list.push_back(param_vals[0] - 2 * param_deltas[0]); // p1mm param1_list.push_back(param_vals[0] - param_deltas[0]); // p1m param1_list.push_back(param_vals[0]); // p1 param1_list.push_back(param_vals[0] + param_deltas[0]); // p1p param1_list.push_back(param_vals[0] + 2 * param_deltas[0]); // p1pp param2_list.clear(); param2_list.push_back(param_vals[1] - 2 * param_deltas[1]); // p2mm param2_list.push_back(param_vals[1] - param_deltas[1]); // p2m param2_list.push_back(param_vals[1]); // p2 param2_list.push_back(param_vals[1] + param_deltas[1]); // p2p param2_list.push_back(param_vals[1] + 2 * param_deltas[1]); // p2pp // current point (*shape_param).second.mean(param1_list[2]); (*shape_param).second.deviation(param2_list[2]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... z_cuts[iter * nqy_ + iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err22 = compute_cut_fit_error(z_cuts + iter * nqy_, ref_z_cut, qdeltay); // 12 neighbors (*shape_param).second.mean(param1_list[0]); (*shape_param).second.deviation(param2_list[2]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err02 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[1]); (*shape_param).second.deviation(param2_list[1]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err11 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[1]); (*shape_param).second.deviation(param2_list[2]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err12 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[1]); (*shape_param).second.deviation(param2_list[3]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err13 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[2]); (*shape_param).second.deviation(param2_list[0]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err20 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[2]); (*shape_param).second.deviation(param2_list[1]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err21 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[2]); (*shape_param).second.deviation(param2_list[3]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err23 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[2]); (*shape_param).second.deviation(param2_list[4]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err24 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[3]); (*shape_param).second.deviation(param2_list[1]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err31 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[3]); (*shape_param).second.deviation(param2_list[2]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err32 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[3]); (*shape_param).second.deviation(param2_list[3]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err33 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); (*shape_param).second.mean(param1_list[4]); (*shape_param).second.deviation(param2_list[2]); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t err42 = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); // 22 0 // 02 1mm // 11 1m2m // 12 1m // 13 1m2p // 20 2mm // 21 2m // 23 2p // 24 2pp // 31 1p2m // 32 1p // 33 1p2p // 42 1pp real_t derr1 = (err32 - err12) / (2 * param_deltas[0]); real_t derr2 = (err23 - err21) / (2 * param_deltas[1]); err = sqrt(derr1 * derr1 + derr2 * derr2); std::cout << "++ Iteration: " << iter << ", Error: " << err << std::endl; std::cout << "++ Parameter 1: " << param_vals[0] << ", Parameter 2: " << param_vals[1] << std::endl; param_error_data.push_back(iter); param_error_data.push_back(param_vals[0]); param_error_data.push_back(param_vals[1]); param_error_data.push_back(err); if(err < err_threshold) break; real_t herr11 = (err42 + err02 - 2 * err22) / (4 * param_deltas[0] * param_deltas[0]); real_t herr12 = (err33 - err13 - (err31 - err11)) / (4 * param_deltas[0] * param_deltas[1]); real_t herr21 = (err33 - err13 - (err31 - err11)) / (4 * param_deltas[0] * param_deltas[1]); real_t herr22 = (err24 + err20 - 2 * err22) / (4 * param_deltas[1] * param_deltas[1]); real_t* herr = new (std::nothrow) real_t[2 * 2]; herr[0] = herr11; herr[1] = herr12; herr[2] = herr21; herr[3] = herr22; real_t* herrinv; mldivide(2, herr, herrinv); param_vals[0] -= gamma_const * (herrinv[0] * derr1 + herrinv[1] * derr2); param_vals[1] -= gamma_const * (herrinv[2] * derr1 + herrinv[3] * derr2); delete[] herrinv; delete[] herr; } // for // compute the error surface std::vector<std::vector<real_t> >::iterator mean_iter = params.begin(); std::vector<std::vector<real_t> >::iterator sd_iter = mean_iter + 1; std::vector<real_t> err_surface; for(std::vector<real_t>::iterator curr_mean = (*mean_iter).begin(); curr_mean != (*mean_iter).end(); ++ curr_mean) { for(std::vector<real_t>::iterator curr_sd = (*sd_iter).begin(); curr_sd != (*sd_iter).end(); ++ curr_sd) { (*shape_param).second.mean(*curr_mean); (*shape_param).second.deviation(*curr_sd); if(!run_gisaxs(alpha_i, alphai, phi, tilt, data, world_comm)) { if(mpi_rank == 0) std::cerr << "error: could not finish successfully" << std::endl; return false; } // if for(unsigned int iy = 0; iy < nqy_; ++ iy) { // assuming nqz_ == 1 ... temp_zcuts[iy] = data[nqx_ * iy]; } // for delete[] data; data = NULL; real_t curr_err = compute_cut_fit_error(temp_zcuts, ref_z_cut, qdeltay); err_surface.push_back(*curr_mean); err_surface.push_back(*curr_sd); err_surface.push_back(curr_err); } // for } // for // write data to files // define output filename std::stringstream alphai_b, phi_b, tilt_b; std::string alphai_s, phi_s, tilt_s; alphai_b << alpha_i; alphai_s = alphai_b.str(); phi_b << phi; phi_s = phi_b.str(); tilt_b << tilt; tilt_s = tilt_b.str(); std::string param_error_file(HiGInput::instance().param_pathprefix() + "/" + HiGInput::instance().runname() + "/param_error_ai=" + alphai_s + "_rot=" + phi_s + "_tilt=" + tilt_s + ".dat"); std::string z_cut_file(HiGInput::instance().param_pathprefix() + "/" + HiGInput::instance().runname() + "/z_cut_ai=" + alphai_s + "_rot=" + phi_s + "_tilt=" + tilt_s + ".dat"); std::string err_surf_file(HiGInput::instance().param_pathprefix() + "/" + HiGInput::instance().runname() + "/err_surf_ai=" + alphai_s + "_rot=" + phi_s + "_tilt=" + tilt_s + ".dat"); // write param_error_data std::ofstream param_error_f(param_error_file.c_str()); for(std::vector<real_t>::iterator pei = param_error_data.begin(); pei != param_error_data.end(); pei += 4) { param_error_f << *pei << "\t" << *(pei + 1) << "\t" << *(pei + 2) << "\t" << *(pei + 3) << std::endl; } // for param_error_f.close(); // write ref_z_cut and z_cuts std::ofstream zcut_f(z_cut_file.c_str()); for(unsigned int yy = 0; yy < nqy_; ++ yy) { zcut_f << ref_z_cut[yy] << "\t"; } // for zcut_f << std::endl; for(unsigned int i = 0; i < max_iter; ++ i) { for(unsigned int yy = 0; yy < nqy_; ++ yy) { zcut_f << z_cuts[i * nqy_ + yy] << "\t"; } // for zcut_f << std::endl; } // for zcut_f.close(); // write error surface std::ofstream err_surf_f(err_surf_file.c_str()); for(std::vector<real_t>::iterator surfi = err_surface.begin(); surfi != err_surface.end(); surfi += 3) { err_surf_f << *surfi << "\t" << *(surfi + 1) << "\t" << *(surfi + 2) << std::endl; } // for err_surf_f.close(); (*shape_param).second.mean(22.0); (*shape_param).second.deviation(7.0); param_error_data.clear(); delete[] temp_zcuts; delete[] z_cuts; delete[] ref_z_cut; std::cout << "parameter values: " << param_vals[0] << ", " << param_vals[1] << " [error: " << err << "]" << std::endl; // synchronize all procs after each run world_comm.Barrier(); } // for tilt } // for phi } // for alphai return true; } // HipGISAXS::fit_all_gisaxs()
void globalAdd(MPI::Intracomm& mpiComm, int& value) { int valCopy = value; mpiComm.Allreduce(&valCopy, &value, 1, MPI_INT, MPI_SUM); }
void globalAdd(MPI::Intracomm& mpiComm, double& value) { double valCopy = value; mpiComm.Allreduce(&valCopy, &value, 1, MPI_DOUBLE, MPI_SUM); }
void LocalScalar3D<real>::Load2(BlockManager& blockManager, const int step, const char* label) { MPI::Intracomm comm = blockManager.getCommunicator(); ostringstream ossFileName; ossFileName << "./BIN/"; ossFileName << "dump-"; ossFileName << label; ossFileName << "-"; ossFileName.width(5); ossFileName.setf(ios::fixed); ossFileName.fill('0'); ossFileName << comm.Get_rank(); ossFileName << "-"; ossFileName.width(10); ossFileName.setf(ios::fixed); ossFileName.fill('0'); ossFileName << step; ossFileName << ".bin"; int nx = 0; int ny = 0; int nz = 0; int nv = 0; int ne = 0; int nb = 0; ifstream ifs; ifs.open(ossFileName.str().c_str(), ios::in | ios::binary); ifs.read((char*)&nx, sizeof(int)); ifs.read((char*)&ny, sizeof(int)); ifs.read((char*)&nz, sizeof(int)); ifs.read((char*)&nv, sizeof(int)); ifs.read((char*)&ne, sizeof(int)); ifs.read((char*)&nb, sizeof(int)); int cx = nx + 2*nv; int cy = ny + 2*nv; int cz = nz + 2*nv; BlockBase* block = blockManager.getBlock(0); ::Vec3i size = block->getSize(); if( nx == size.x && ny == size.y && nz == size.z && nv == vc && ne == 1 && nb == blockManager.getNumBlock() ) { for (int id = 0; id < blockManager.getNumBlock(); ++id) { block = blockManager.getBlock(id); real* pData = GetBlockData(block); ifs.read((char*)pData, sizeof(real)*cx*cy*cz); } } else if( 2*nx == size.x && 2*ny == size.y && 2*nz == size.z && nv == vc && ne == 1 && nb == blockManager.getNumBlock() ) { real *pDataS = new real [cx*cy*cz]; for (int id = 0; id < blockManager.getNumBlock(); ++id) { block = blockManager.getBlock(id); real* pData = GetBlockData(block); ifs.read((char*)pDataS, sizeof(real)*cx*cy*cz); int sz[3] = {2*nx, 2*ny, 2*nz}; sf3d_copy_x2_( (real*)pData, (real*)pDataS, (int*)sz, (int*)&vc); } delete [] pDataS; } else { Exit(0); } ifs.close(); ImposeBoundaryCondition(blockManager); }
ifstream& FullyDistSpVec<IT,NT>::ReadDistribute (ifstream& infile, int master) { IT total_nnz; MPI::Intracomm World = commGrid->GetWorld(); int neighs = World.Get_size(); // number of neighbors (including oneself) int buffperneigh = MEMORYINBYTES / (neighs * (sizeof(IT) + sizeof(NT))); int * displs = new int[neighs]; for (int i=0; i<neighs; ++i) displs[i] = i*buffperneigh; int * curptrs = NULL; int recvcount = 0; IT * inds = NULL; NT * vals = NULL; int rank = World.Get_rank(); if(rank == master) // 1 processor only { inds = new IT [ buffperneigh * neighs ]; vals = new NT [ buffperneigh * neighs ]; curptrs = new int[neighs]; fill_n(curptrs, neighs, 0); // fill with zero if (infile.is_open()) { infile.clear(); infile.seekg(0); infile >> glen >> total_nnz; World.Bcast(&glen, 1, MPIType<IT>(), master); IT tempind; NT tempval; double loadval; IT cnz = 0; while ( (!infile.eof()) && cnz < total_nnz) { infile >> tempind; //infile >> tempval; infile >> loadval; tempval = static_cast<NT>(loadval); tempind--; IT locind; int rec = Owner(tempind, locind); // recipient (owner) processor inds[ rec * buffperneigh + curptrs[rec] ] = locind; vals[ rec * buffperneigh + curptrs[rec] ] = tempval; ++ (curptrs[rec]); if(curptrs[rec] == buffperneigh || (cnz == (total_nnz-1)) ) // one buffer is full, or file is done ! { // first, send the receive counts ... World.Scatter(curptrs, 1, MPI::INT, &recvcount, 1, MPI::INT, master); // generate space for own recv data ... (use arrays because vector<bool> is cripled, if NT=bool) IT * tempinds = new IT[recvcount]; NT * tempvals = new NT[recvcount]; // then, send all buffers that to their recipients ... World.Scatterv(inds, curptrs, displs, MPIType<IT>(), tempinds, recvcount, MPIType<IT>(), master); World.Scatterv(vals, curptrs, displs, MPIType<NT>(), tempvals, recvcount, MPIType<NT>(), master); // now push what is ours to tuples for(IT i=0; i< recvcount; ++i) { ind.push_back( tempinds[i] ); // already offset'd by the sender num.push_back( tempvals[i] ); } // reset current pointers so that we can reuse {inds,vals} buffers fill_n(curptrs, neighs, 0); DeleteAll(tempinds, tempvals); } ++ cnz; } assert (cnz == total_nnz); // Signal the end of file to other processors along the diagonal fill_n(curptrs, neighs, numeric_limits<int>::max()); World.Scatter(curptrs, 1, MPI::INT, &recvcount, 1, MPI::INT, master); }