void GridTable::constructor(int rank,Parameters*parameters){ m_parameters=parameters; m_kmerAcademy.constructor(rank,m_parameters); m_size=0; uint64_t buckets=m_parameters->getNumberOfBuckets(); int bucketsPerGroup=m_parameters->getNumberOfBucketsPerGroup(); double loadFactorThreshold=m_parameters->getLoadFactorThreshold(); cout<<"[GridTable] buckets="<<buckets<<" bucketsPerGroup="<<bucketsPerGroup; cout<<" loadFactorThreshold="<<loadFactorThreshold<<endl; m_hashTable.constructor(buckets,"RAY_MALLOC_TYPE_GRID_TABLE", m_parameters->showMemoryAllocations(),m_parameters->getRank(), bucketsPerGroup,loadFactorThreshold ); if(m_parameters->hasOption("-hash-table-verbosity")) m_hashTable.toggleVerbosity(); m_inserted=false; if(m_parameters->showMemoryUsage()){ showMemoryUsage(rank); } m_findOperations=0; m_verbose=false; }
void SchreyerFrame::show(int len) const { std::cout << "#levels=" << mFrame.mLevels.size() << " currentLevel=" << currentLevel() << std::endl; for (int i=0; i<mFrame.mLevels.size(); i++) { auto& myframe = level(i); auto& myorder = schreyerOrder(i); if (myframe.size() == 0) continue; std::cout << "--- level " << i << " ------" << std::endl; for (int j=0; j<myframe.size(); j++) { std::cout << " " << j << " " << myframe[j].mDegree << " (" << myframe[j].mBegin << "," << myframe[j].mEnd << ") " << std::flush; std::cout << "(size:" << myframe[j].mSyzygy.len << ") ["; monoid().showAlpha(myorder.mTotalMonom[j]); std::cout << " " << myorder.mTieBreaker[j] << "] "; if (len == 0 or myframe[j].mSyzygy.len == 0) monoid().showAlpha(myframe[j].mMonom); else display_poly(stdout, ring(), myframe[j].mSyzygy); std::cout << std::endl; } } showMemoryUsage(); }
void GridTable::constructor(int rank,Parameters*parameters){ m_parameters=parameters; m_kmerAcademy.constructor(rank,m_parameters); m_size=0; m_hashTable.constructor(RAY_MALLOC_TYPE_GRID_TABLE, m_parameters->showMemoryAllocations(),m_parameters->getRank()); m_inserted=false; if(m_parameters->showMemoryUsage()){ showMemoryUsage(rank); } }
void SeedingData::computeSeeds(){ if(!m_initiatedIterator){ m_last=time(NULL); m_SEEDING_i=0; m_activeWorkerIterator=m_activeWorkers.begin(); m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters); m_initiatedIterator=true; m_maximumAliveWorkers=30000; #ifdef ASSERT m_splayTreeIterator.hasNext(); #endif } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } // flush all mode is necessary to empty buffers and // restart things from scratch.. // 1. iterate on active workers if(m_activeWorkerIterator!=m_activeWorkers.end()){ uint64_t workerId=*m_activeWorkerIterator; #ifdef ASSERT assert(m_aliveWorkers.count(workerId)>0); assert(!m_aliveWorkers[workerId].isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers[workerId].work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers[workerId].isDone()){ m_workersDone.push_back(workerId); vector<Kmer> seed=*(m_aliveWorkers[workerId].getSeed()); int nucleotides=seed.size()+(m_wordSize)-1; // only consider the long ones. if(nucleotides>=m_parameters->getMinimumContigLength()){ Kmer firstVertex=seed[0]; Kmer lastVertex=seed[seed.size()-1]; Kmer firstReverse=m_parameters->_complementVertex(&lastVertex); if(firstVertex<firstReverse){ printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size()); fflush(stdout); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } m_SEEDING_seeds.push_back(seed); } } } m_activeWorkerIterator++; }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_SEEDING_i<(uint64_t)m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_SEEDING_i % 100000 ==0){ printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size()); fflush(stdout); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } } #ifdef ASSERT if(m_SEEDING_i==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } #endif Vertex*node=m_splayTreeIterator.next(); Kmer vertexKey=*(m_splayTreeIterator.getKey()); int coverage=node->getCoverage(&vertexKey); int minimum=5; if(coverage<minimum){ m_completedJobs++; }else{ m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i); m_activeWorkers.insert(m_SEEDING_i); } int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_SEEDING_i++; // skip the reverse complement as we don't really need it anyway. }else{ m_virtualCommunicator->forceFlush(); } } // brace yourself for the next round m_activeWorkerIterator=m_activeWorkers.begin(); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_subgraph->size()==m_completedJobs){ (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size()); fflush(stdout); printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size()); fflush(stdout); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); fflush(stdout); m_virtualCommunicator->printStatistics(); Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(aMessage); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif // sort the seeds by length std::sort(m_SEEDING_seeds.begin(),m_SEEDING_seeds.end(),myComparator_sort); } }
void SeedingData::call_RAY_SLAVE_MODE_START_SEEDING(){ if(!m_initiatedIterator){ m_last=time(NULL); m_SEEDING_i=0; m_activeWorkerIterator=m_activeWorkers.begin(); m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters); m_initiatedIterator=true; m_maximumAliveWorkers=32768; #ifdef ASSERT m_splayTreeIterator.hasNext(); #endif m_virtualCommunicator->resetCounters(); } if(!m_checkedCheckpoint){ if(m_parameters->hasCheckpoint("Seeds")){ cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl; (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(&aMessage); loadCheckpoint(); return; } m_checkedCheckpoint=true; } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } // flush all mode is necessary to empty buffers and // restart things from scratch.. // 1. iterate on active workers if(m_activeWorkerIterator!=m_activeWorkers.end()){ WorkerHandle workerId=*m_activeWorkerIterator; #ifdef ASSERT assert(m_aliveWorkers.count(workerId)>0); assert(!m_aliveWorkers[workerId].isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers[workerId].work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers[workerId].isDone()){ m_workersDone.push_back(workerId); GraphPath*seed=m_aliveWorkers[workerId].getSeed(); int nucleotides=getNumberOfNucleotides(seed->size(),m_wordSize); if(seed->size() > 0 && m_debugSeeds){ cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl; } #ifdef ASSERT assert(nucleotides==0 || nucleotides>=m_wordSize); #endif SeedWorker*worker=&(m_aliveWorkers[workerId]); if(worker->isHeadADeadEnd() && worker->isTailADeadEnd()){ m_skippedObjectsWithTwoDeadEnds++; }else if(worker->isHeadADeadEnd()){ m_skippedObjectsWithDeadEndForHead++; }else if(worker->isTailADeadEnd()){ m_skippedObjectsWithDeadEndForTail++; }else if(worker->isBubbleWeakComponent()){ m_skippedObjectsWithBubbleWeakComponent++; // only consider the long ones. }else if(nucleotides>=m_parameters->getMinimumContigLength()){ #ifdef SHOW_DISCOVERIES printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size()); #endif #ifdef ASSERT assert(seed->size()>0); #endif Kmer firstVertex; seed->at(0,&firstVertex); Kmer lastVertex; seed->at(seed->size()-1,&lastVertex); Kmer firstReverse=m_parameters->_complementVertex(&lastVertex); int minimumNucleotidesForVerbosity=1024; bool verbose=nucleotides>=minimumNucleotidesForVerbosity; if(m_debugSeeds){ verbose=true; } if(firstVertex<firstReverse){ if(verbose){ printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed->size()); } if(m_parameters->showMemoryUsage() && verbose){ showMemoryUsage(m_rank); } GraphPath*theSeed=seed; theSeed->computePeakCoverage(); CoverageDepth peakCoverage=theSeed->getPeakCoverage(); if(verbose) cout<<"Got a seed, peak coverage: "<<peakCoverage; /* ignore the seed if it has too much coverage. */ if(peakCoverage >= m_minimumSeedCoverageDepth && peakCoverage <= m_parameters->getMaximumSeedCoverage()){ if(verbose) cout<<", adding seed."<<endl; m_SEEDING_seeds.push_back(*theSeed); m_eligiblePaths++; }else{ if(verbose) cout<<", ignoring seed."<<endl; m_skippedNotEnoughCoverage++; } }else{ m_skippedNotMine++; } }else{ m_skippedTooShort++; } } m_activeWorkerIterator++; }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_SEEDING_i<m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_SEEDING_i % 100000 ==0){ printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size()); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } } #ifdef ASSERT if(m_SEEDING_i==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } #endif m_splayTreeIterator.next(); Kmer vertexKey=*(m_splayTreeIterator.getKey()); m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i, RAY_MPI_TAG_GET_VERTEX_EDGES_COMPACT, RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE ); if(m_debugSeeds) m_aliveWorkers[m_SEEDING_i].enableDebugMode(); m_activeWorkers.insert(m_SEEDING_i); int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_SEEDING_i++; // skip the reverse complement as we don't really need it anyway. }else{ m_virtualCommunicator->forceFlush(); } } // brace yourself for the next round m_activeWorkerIterator=m_activeWorkers.begin(); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_subgraph->size()==m_completedJobs){ printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size()); printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size()); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); m_virtualCommunicator->printStatistics(); cout<<"Rank "<<m_rank<<" runtime statistics for seeding algorithm: "<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of dead end for head: "<<m_skippedObjectsWithDeadEndForHead<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of dead end for tail: "<<m_skippedObjectsWithDeadEndForTail<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of two dead ends: "<<m_skippedObjectsWithTwoDeadEnds<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of bubble weak component: "<<m_skippedObjectsWithBubbleWeakComponent<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of short length: "<<m_skippedTooShort<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of bad ownership: "<<m_skippedNotMine<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of low coverage: "<<m_skippedNotEnoughCoverage<<endl; cout<<"Rank "<<m_rank<<" Eligible paths: "<<m_eligiblePaths<<endl; #ifdef ASSERT assert(m_eligiblePaths==(int)m_SEEDING_seeds.size()); #endif (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(&aMessage); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif // sort the seeds by length std::sort(m_SEEDING_seeds.begin(), m_SEEDING_seeds.end(),myComparator_sort); } }
void SchreyerFrame::start_computation(StopConditions& stop) { // This is the computation of the non-minimal maps themselves decltype(timer()) timeA, timeB; // if (level(0).size() == 0) // mState = Done;; computeFrame(); if (M2_gbTrace >= 1) { std::cout << "computation status after computing frame: " << std::endl; mComputationStatus.output(); } int top_slanted_degree = mHiSlantedDegree; if (stop.stop_after_degree and mHiSlantedDegree > stop.degree_limit->array[0]) top_slanted_degree = stop.degree_limit->array[0]; computeSyzygies(top_slanted_degree ,mMaxLength); if (M2_gbTrace >= 1) { showMemoryUsage(); std::cout << "total time for make matrix: " << timeMakeMatrix << std::endl; std::cout << "total time for sort matrix: " << timeSortMatrix << std::endl; std::cout << "total time for reorder matrix: " << timeReorderMatrix << std::endl; std::cout << "total time for gauss matrix: " << timeGaussMatrix << std::endl; std::cout << "total time for clear matrix: " << timeClearMatrix << std::endl; std::cout << "total time for reset hash table: " << timeResetHashTable << std::endl; std::cout << "total time for computing ranks: " << timeComputeRanks << std::endl; } return; #if 0 if (M2_gbTrace >= 1) { std::cout << "computation status after computing syzygies: " << std::endl; mComputationStatus.output(); } timeA = timer(); computeRanks(mHiSlantedDegree, mMaxLength); timeB = timer(); timeComputeRanks += seconds(timeB-timeA); if (M2_gbTrace >= 1) { std::cout << "computation status after computing ranks: " << std::endl; mComputationStatus.output(); } // This next part needs to be computed after the frame, as otherwise mHiSlantedDegree isn't yet set. int top_slanted_degree = 0; top_slanted_degree = mHiSlantedDegree; if (stop.stop_after_degree and mHiSlantedDegree > stop.degree_limit->array[0]) top_slanted_degree = stop.degree_limit->array[0]; while (true) { switch (mState) { case Initializing: break; case Frame: std::cerr << "ERROR: should not get to this point anymore..." << std::endl; if (M2_gbTrace >= 1) std::cout << "maxsize = " << mFrame.mLevels.size() << " and mCurrentLevel = " << mCurrentLevel << std::endl; if (mCurrentLevel >= mFrame.mLevels.size() or computeNextLevel() == 0) { //show(6); mState = Matrices; mCurrentLevel = 2; getBounds(mLoSlantedDegree, mHiSlantedDegree, mMaxLength); mSlantedDegree = mLoSlantedDegree; setBettiDisplays(); if (M2_gbTrace >= 1) { std::cout << "non-minimal betti: " << std::endl; mBettiNonminimal.output(); } //for (int i=0; i<mMinimalizeTODO.size(); i++) // { // auto a = mMinimalizeTODO[i]; // std::cout << "(" << a.first << "," << a.second << ") "; // } // std::cout << std::endl; } break; case Matrices: if (M2_gbTrace >= 1) std::cout << "start_computation: entering matrices(" << mSlantedDegree << ", " << mCurrentLevel << ")" << std::endl; if (stop.always_stop) return; if (mCurrentLevel > mMaxLength) { mCurrentLevel = 2; mSlantedDegree++; if (mSlantedDegree > top_slanted_degree) { if (M2_gbTrace >= 1) showMemoryUsage(); #if 0 debugCheckOrderAll(); #endif timeA = timer(); for (auto it=mMinimalizeTODO.cbegin(); it != mMinimalizeTODO.cend(); ++it) { int rk = rank(it->first, it->second); mBettiMinimal.entry(it->first, it->second) -= rk; mBettiMinimal.entry(it->first+1, it->second-1) -= rk; } timeB = timer(); timeComputeRanks += seconds(timeB-timeA); mState = Done; if (M2_gbTrace >= 1) mBettiMinimal.output(); break; } // if (stop.stop_after_degree and mSlantedDegree > stop.degree_limit->array[0]) // return; } if (M2_gbTrace >= 2) { std::cout << "construct(" << mSlantedDegree << ", " << mCurrentLevel << ")..." << std::flush; } mComputer.construct(mCurrentLevel, mSlantedDegree+mCurrentLevel); if (M2_gbTrace >= 2) { std::cout << "done" << std::endl; } ///std::cout << "Number of distinct monomials so far = " << mAllMonomials.count() << std::endl; mCurrentLevel++; break; case Done: if (M2_gbTrace >= 1) { std::cout << "total time for make matrix: " << timeMakeMatrix << std::endl; std::cout << "total time for sort matrix: " << timeSortMatrix << std::endl; std::cout << "total time for reorder matrix: " << timeReorderMatrix << std::endl; std::cout << "total time for gauss matrix: " << timeGaussMatrix << std::endl; std::cout << "total time for clear matrix: " << timeClearMatrix << std::endl; std::cout << "total time for reset hash table: " << timeResetHashTable << std::endl; std::cout << "total time for computing ranks: " << timeComputeRanks << std::endl; } return; default: break; } } #endif }
BettiDisplay SchreyerFrame::minimalBettiNumbers( bool stop_after_degree, int top_slanted_degree, int length_limit ) { // The lo degree will be: mLoSlantedDegree. // The highest slanted degree will either be mHiSlantedDegree, or top_slanted_degree (minimum of these two). // The length we need to compute to is either maxLevel(), or length_limit+1. // We set maxlevel to length_limit. We insist that length_limit <= maxLevel() - 2. // Here is what needs to be computed: // lo: . . . . . . . // . . . . . . . /// hi: . . . . . . // Each dot in all rows other than 'hi' needs to have syzygies computed for it. // if hi == mHiSlantedDegree, then we do NOT need to compute syzygies in this last row. // else we need to compute syzygies in these rows, EXCEPT not at level maxlevel+1 computeFrame(); int top_degree; // slanted degree if (stop_after_degree) { top_degree = std::min(top_slanted_degree, mHiSlantedDegree); top_degree = std::max(mLoSlantedDegree, top_degree); } else { top_degree = mHiSlantedDegree; } // First: if length_limit is too low, extend the Frame if (length_limit >= maxLevel()) { std::cout << "WARNING: cannot extend resolution length" << std::endl; length_limit = maxLevel()-1; // Extend the length of the Frame, change mMaxLength, possibly mHiSlantedDegree // increase mComputationStatus if needed, mMinimalBetti, ... // computeFrame() } // What needs to be computed? // lodeg..hideg, level: 0..maxlevel. Note: need to compute at level maxlevel+1 in order to get min betti numbers at // level maxlevel. // Also note: if hideg is the highest degree that occurs in the frame, we do not need to compute any matrices for these. for (int deg=mLoSlantedDegree; deg <= top_degree-1; deg++) for (int lev=1; lev<=length_limit+1; lev++) computeRank(deg, lev); for (int lev=1; lev<=length_limit; lev++) computeRank(top_degree, lev); if (M2_gbTrace >= 1) { showMemoryUsage(); std::cout << "total setPoly: " << poly_constructor::ncalls << std::endl; std::cout << "total setPolyFromArray: " << poly_constructor::ncalls_fromarray << std::endl; std::cout << "total ~poly: " << poly::npoly_destructor << std::endl; std::cout << "total time for make matrix: " << timeMakeMatrix << std::endl; std::cout << "total time for sort matrix: " << timeSortMatrix << std::endl; std::cout << "total time for reorder matrix: " << timeReorderMatrix << std::endl; std::cout << "total time for gauss matrix: " << timeGaussMatrix << std::endl; std::cout << "total time for clear matrix: " << timeClearMatrix << std::endl; std::cout << "total time for reset hash table: " << timeResetHashTable << std::endl; std::cout << "total time for computing ranks: " << timeComputeRanks << std::endl; } BettiDisplay B(mBettiMinimal); // copy B.resize(mLoSlantedDegree, top_degree, length_limit); return B; }
void SeedingData::computeSeeds(){ if(!m_initiatedIterator){ m_last=time(NULL); m_SEEDING_i=0; m_activeWorkerIterator=m_activeWorkers.begin(); m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters); m_initiatedIterator=true; m_maximumAliveWorkers=32768; #ifdef ASSERT m_splayTreeIterator.hasNext(); #endif } if(!m_checkedCheckpoint){ if(m_parameters->hasCheckpoint("Seeds")){ cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl; (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(aMessage); loadCheckpoint(); return; } m_checkedCheckpoint=true; } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } // flush all mode is necessary to empty buffers and // restart things from scratch.. // 1. iterate on active workers if(m_activeWorkerIterator!=m_activeWorkers.end()){ uint64_t workerId=*m_activeWorkerIterator; #ifdef ASSERT assert(m_aliveWorkers.count(workerId)>0); assert(!m_aliveWorkers[workerId].isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers[workerId].work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers[workerId].isDone()){ m_workersDone.push_back(workerId); vector<Kmer> seed=*(m_aliveWorkers[workerId].getSeed()); int nucleotides=seed.size()+(m_wordSize)-1; if(seed.size() > 0 && m_parameters->debugSeeds()){ cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl; } // only consider the long ones. if(nucleotides>=m_parameters->getMinimumContigLength()){ #ifdef SHOW_DISCOVERIES printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size()); #endif Kmer firstVertex=seed[0]; Kmer lastVertex=seed[seed.size()-1]; Kmer firstReverse=m_parameters->_complementVertex(&lastVertex); if(firstVertex<firstReverse){ printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed.size()); fflush(stdout); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } m_SEEDING_seeds.push_back(seed); } } } m_activeWorkerIterator++; }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_SEEDING_i<(uint64_t)m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_SEEDING_i % 100000 ==0){ printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size()); fflush(stdout); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } } #ifdef ASSERT if(m_SEEDING_i==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } #endif m_splayTreeIterator.next(); Kmer vertexKey=*(m_splayTreeIterator.getKey()); m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i); m_activeWorkers.insert(m_SEEDING_i); int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_SEEDING_i++; // skip the reverse complement as we don't really need it anyway. }else{ m_virtualCommunicator->forceFlush(); } } // brace yourself for the next round m_activeWorkerIterator=m_activeWorkers.begin(); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_subgraph->size()==m_completedJobs){ printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size()); fflush(stdout); printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size()); fflush(stdout); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); fflush(stdout); m_virtualCommunicator->printStatistics(); (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(aMessage); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif // sort the seeds by length std::sort(m_SEEDING_seeds.begin(),m_SEEDING_seeds.end(),myComparator_sort); /** write seeds for debugging purposes */ if(m_parameters->hasOption("-write-seeds")){ ostringstream fileName; fileName<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".RaySeeds.fasta"; ofstream f(fileName.str().c_str()); for(int i=0;i<(int)m_SEEDING_seeds.size();i++){ uint64_t id=getPathUniqueId(m_parameters->getRank(),i); f<<">RaySeed-"<<id<<endl; f<<addLineBreaks(convertToString(&(m_SEEDING_seeds[i]), m_parameters->getWordSize(),m_parameters->getColorSpaceMode()), m_parameters->getColumns()); } f.close(); } } }
void SequencesIndexer::attachReads(ArrayOfReads*m_myReads, RingAllocator*m_outboxAllocator, StaticVector*m_outbox, int*m_mode, int m_wordSize, int m_size, int m_rank ){ if(!m_initiatedIterator){ m_theSequenceId=0; m_activeWorkerIterator.constructor(&m_activeWorkers); m_initiatedIterator=true; m_maximumAliveWorkers=30000; } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } if(m_activeWorkerIterator.hasNext()){ uint64_t workerId=m_activeWorkerIterator.next()->getKey(); #ifdef ASSERT assert(m_aliveWorkers.find(workerId,false)!=NULL); assert(!m_aliveWorkers.find(workerId,false)->getValue()->isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers.find(workerId,false)->getValue()->isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers.find(workerId,false)->getValue()->work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers.find(workerId,false)->getValue()->isDone()){ m_workersDone.push_back(workerId); } }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.size()==0){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_theSequenceId<(int)m_myReads->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_theSequenceId%10000==0){ printf("Rank %i is selecting optimal read markers [%i/%i]\n",m_rank,m_theSequenceId+1,(int)m_myReads->size()); fflush(stdout); if(m_parameters->showMemoryUsage()) showMemoryUsage(m_rank); } #ifdef ASSERT if(m_theSequenceId==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } #endif char sequence[4000]; #ifdef ASSERT assert(m_theSequenceId<(int)m_myReads->size()); #endif m_myReads->at(m_theSequenceId)->getSeq(sequence,m_parameters->getColorSpaceMode(),false); bool flag; m_aliveWorkers.insert(m_theSequenceId,&m_workAllocator,&flag)->getValue()->constructor(m_theSequenceId,sequence,m_parameters,m_outboxAllocator,m_virtualCommunicator, m_theSequenceId,m_myReads,&m_workAllocator); m_activeWorkers.insert(m_theSequenceId,&m_workAllocator,&flag); int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_theSequenceId++; }else{ m_virtualCommunicator->forceFlush(); } } m_activeWorkerIterator.constructor(&m_activeWorkers); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_myReads->size()==m_completedJobs){ printf("Rank %i is selecting optimal read markers [%i/%i] (completed)\n",m_rank,(int)m_myReads->size(),(int)m_myReads->size()); fflush(stdout); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); fflush(stdout); (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_MASTER_IS_DONE_ATTACHING_READS_REPLY,m_rank); m_outbox->push_back(aMessage); m_virtualCommunicator->printStatistics(); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif int freed=m_workAllocator.getNumberOfChunks()*m_workAllocator.getChunkSize(); m_workAllocator.clear(); if(m_parameters->showMemoryUsage()){ cout<<"Rank "<<m_parameters->getRank()<<": Freeing unused assembler memory: "<<freed/1024<<" KiB freed"<<endl; showMemoryUsage(m_rank); } } }
void SequencesIndexer::call_RAY_SLAVE_MODE_INDEX_SEQUENCES(){ if(!m_initiatedIterator){ m_theSequenceId=0; m_activeWorkerIterator.constructor(&m_activeWorkers); m_initiatedIterator=true; m_maximumAliveWorkers=32768; m_virtualCommunicator->resetCounters(); } if(!m_checkedCheckpoint){ if(m_parameters->hasCheckpoint("OptimalMarkers") && m_parameters->hasCheckpoint("ReadOffsets")){ cout<<"Rank "<<m_parameters->getRank()<<": checkpoint OptimalMarkers exists, not selecting markers."<<endl; (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_MASTER_IS_DONE_ATTACHING_READS_REPLY,m_rank); m_outbox->push_back(&aMessage); return; } m_checkedCheckpoint=true; } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } if(m_activeWorkerIterator.hasNext()){ WorkerHandle workerId=m_activeWorkerIterator.next()->getKey(); #ifdef ASSERT assert(m_aliveWorkers.find(workerId,false)!=NULL); assert(!m_aliveWorkers.find(workerId,false)->getValue()->isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers.find(workerId,false)->getValue()->isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers.find(workerId,false)->getValue()->work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers.find(workerId,false)->getValue()->isDone()){ m_workersDone.push_back(workerId); } }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.size()==0){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_theSequenceId<(int)m_myReads->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_theSequenceId%100000==0){ printf("Rank %i is selecting optimal read markers [%i/%i]\n",m_rank,m_theSequenceId+1,(int)m_myReads->size()); m_derivative.addX(m_theSequenceId); m_derivative.printStatus(SLAVE_MODES[RAY_SLAVE_MODE_INDEX_SEQUENCES],RAY_SLAVE_MODE_INDEX_SEQUENCES); m_derivative.printEstimatedTime(m_myReads->size()); if(m_parameters->showMemoryUsage()) showMemoryUsage(m_rank); } #ifdef ASSERT if(m_theSequenceId==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } assert(m_theSequenceId<(int)m_myReads->size()); #endif bool flag; m_aliveWorkers.insert(m_theSequenceId,&m_workAllocator,&flag)->getValue()->constructor(m_theSequenceId,m_parameters,m_outboxAllocator,m_virtualCommunicator, m_theSequenceId,m_myReads,&m_workAllocator,&m_readMarkerFile,&m_forwardStatistics, &m_reverseStatistics, RAY_MPI_TAG_ATTACH_SEQUENCE, RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE ); m_activeWorkers.insert(m_theSequenceId,&m_workAllocator,&flag); int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_theSequenceId++; }else{ m_virtualCommunicator->forceFlush(); } } m_activeWorkerIterator.constructor(&m_activeWorkers); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_myReads->size()==m_completedJobs){ printf("Rank %i is selecting optimal read markers [%i/%i] (completed)\n",m_rank,(int)m_myReads->size(),(int)m_myReads->size()); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_MASTER_IS_DONE_ATTACHING_READS_REPLY,m_rank); m_outbox->push_back(&aMessage); m_derivative.writeFile(&cout); m_virtualCommunicator->printStatistics(); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif int freed=m_workAllocator.getNumberOfChunks()*m_workAllocator.getChunkSize(); m_workAllocator.clear(); if(m_parameters->showMemoryUsage()){ cout<<"Rank "<<m_parameters->getRank()<<": Freeing unused assembler memory: "<<freed/1024<<" KiB freed"<<endl; showMemoryUsage(m_rank); } if(m_parameters->hasOption("-write-read-markers")){ m_readMarkerFile.close(); } if(m_parameters->hasOption("-write-marker-summary")){ ostringstream file1; file1<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".ForwardMarkerSummary.txt"; string fileName1=file1.str(); ofstream f1(fileName1.c_str()); for(map<int,map<int,int> >::iterator i=m_forwardStatistics.begin();i!=m_forwardStatistics.end();i++){ int offset=i->first; for(map<int,int>::iterator j=i->second.begin();j!=i->second.end();j++){ int coverage=j->first; int count=j->second; f1<<offset<<" "<<coverage<<" "<<count<<endl; } } f1.close(); ostringstream file2; file2<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".ReverseMarkerSummary.txt"; string fileName2=file2.str(); ofstream f2(fileName2.c_str()); for(map<int,map<int,int> >::iterator i=m_reverseStatistics.begin();i!=m_reverseStatistics.end();i++){ int offset=i->first; for(map<int,int>::iterator j=i->second.begin();j!=i->second.end();j++){ int coverage=j->first; int count=j->second; f2<<offset<<" "<<coverage<<" "<<count<<endl; } } f2.close(); } m_forwardStatistics.clear(); m_reverseStatistics.clear(); } }