void SeedingData::call_RAY_SLAVE_MODE_SEND_SEED_LENGTHS(){ if(!m_initialized){ for(int i=0;i<(int)m_SEEDING_seeds.size();i++){ int length=getNumberOfNucleotides(m_SEEDING_seeds[i].size(), m_parameters->getWordSize()); m_slaveSeedLengths[length]++; } m_iterator=m_slaveSeedLengths.begin(); m_initialized=true; m_communicatorWasTriggered=false; m_virtualCommunicator->resetCounters(); } if(m_inbox->size()==1&&(*m_inbox)[0]->getTag()==RAY_MPI_TAG_SEND_SEED_LENGTHS_REPLY) m_communicatorWasTriggered=false; if(m_communicatorWasTriggered) return; if(m_iterator==m_slaveSeedLengths.end()){ Message aMessage(NULL,0,MASTER_RANK, RAY_MPI_TAG_IS_DONE_SENDING_SEED_LENGTHS,getRank()); m_outbox->push_back(aMessage); (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; return; } MessageUnit*messageBuffer=(MessageUnit*)m_outboxAllocator->allocate(MAXIMUM_MESSAGE_SIZE_IN_BYTES); int maximumPairs=MAXIMUM_MESSAGE_SIZE_IN_BYTES/sizeof(MessageUnit)/2; int i=0; while(i<maximumPairs && m_iterator!=m_slaveSeedLengths.end()){ int length=m_iterator->first; int count=m_iterator->second; messageBuffer[2*i]=length; messageBuffer[2*i+1]=count; i++; m_iterator++; } Message aMessage(messageBuffer,2*i,MASTER_RANK, RAY_MPI_TAG_SEND_SEED_LENGTHS,getRank()); m_outbox->push_back(aMessage); }
void SeedingData::call_RAY_SLAVE_MODE_START_SEEDING(){ if(!m_initiatedIterator){ m_last=time(NULL); m_SEEDING_i=0; m_activeWorkerIterator=m_activeWorkers.begin(); m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters); m_initiatedIterator=true; m_maximumAliveWorkers=32768; #ifdef ASSERT m_splayTreeIterator.hasNext(); #endif m_virtualCommunicator->resetCounters(); } if(!m_checkedCheckpoint){ if(m_parameters->hasCheckpoint("Seeds")){ cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl; (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(&aMessage); loadCheckpoint(); return; } m_checkedCheckpoint=true; } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } // flush all mode is necessary to empty buffers and // restart things from scratch.. // 1. iterate on active workers if(m_activeWorkerIterator!=m_activeWorkers.end()){ WorkerHandle workerId=*m_activeWorkerIterator; #ifdef ASSERT assert(m_aliveWorkers.count(workerId)>0); assert(!m_aliveWorkers[workerId].isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers[workerId].work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers[workerId].isDone()){ m_workersDone.push_back(workerId); GraphPath*seed=m_aliveWorkers[workerId].getSeed(); int nucleotides=getNumberOfNucleotides(seed->size(),m_wordSize); if(seed->size() > 0 && m_debugSeeds){ cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl; } #ifdef ASSERT assert(nucleotides==0 || nucleotides>=m_wordSize); #endif SeedWorker*worker=&(m_aliveWorkers[workerId]); if(worker->isHeadADeadEnd() && worker->isTailADeadEnd()){ m_skippedObjectsWithTwoDeadEnds++; }else if(worker->isHeadADeadEnd()){ m_skippedObjectsWithDeadEndForHead++; }else if(worker->isTailADeadEnd()){ m_skippedObjectsWithDeadEndForTail++; }else if(worker->isBubbleWeakComponent()){ m_skippedObjectsWithBubbleWeakComponent++; // only consider the long ones. }else if(nucleotides>=m_parameters->getMinimumContigLength()){ #ifdef SHOW_DISCOVERIES printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size()); #endif #ifdef ASSERT assert(seed->size()>0); #endif Kmer firstVertex; seed->at(0,&firstVertex); Kmer lastVertex; seed->at(seed->size()-1,&lastVertex); Kmer firstReverse=m_parameters->_complementVertex(&lastVertex); int minimumNucleotidesForVerbosity=1024; bool verbose=nucleotides>=minimumNucleotidesForVerbosity; if(m_debugSeeds){ verbose=true; } if(firstVertex<firstReverse){ if(verbose){ printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed->size()); } if(m_parameters->showMemoryUsage() && verbose){ showMemoryUsage(m_rank); } GraphPath*theSeed=seed; theSeed->computePeakCoverage(); CoverageDepth peakCoverage=theSeed->getPeakCoverage(); if(verbose) cout<<"Got a seed, peak coverage: "<<peakCoverage; /* ignore the seed if it has too much coverage. */ if(peakCoverage >= m_minimumSeedCoverageDepth && peakCoverage <= m_parameters->getMaximumSeedCoverage()){ if(verbose) cout<<", adding seed."<<endl; m_SEEDING_seeds.push_back(*theSeed); m_eligiblePaths++; }else{ if(verbose) cout<<", ignoring seed."<<endl; m_skippedNotEnoughCoverage++; } }else{ m_skippedNotMine++; } }else{ m_skippedTooShort++; } } m_activeWorkerIterator++; }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_SEEDING_i<m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_SEEDING_i % 100000 ==0){ printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size()); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } } #ifdef ASSERT if(m_SEEDING_i==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } #endif m_splayTreeIterator.next(); Kmer vertexKey=*(m_splayTreeIterator.getKey()); m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i, RAY_MPI_TAG_GET_VERTEX_EDGES_COMPACT, RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE ); if(m_debugSeeds) m_aliveWorkers[m_SEEDING_i].enableDebugMode(); m_activeWorkers.insert(m_SEEDING_i); int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_SEEDING_i++; // skip the reverse complement as we don't really need it anyway. }else{ m_virtualCommunicator->forceFlush(); } } // brace yourself for the next round m_activeWorkerIterator=m_activeWorkers.begin(); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_subgraph->size()==m_completedJobs){ printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size()); printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size()); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); m_virtualCommunicator->printStatistics(); cout<<"Rank "<<m_rank<<" runtime statistics for seeding algorithm: "<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of dead end for head: "<<m_skippedObjectsWithDeadEndForHead<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of dead end for tail: "<<m_skippedObjectsWithDeadEndForTail<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of two dead ends: "<<m_skippedObjectsWithTwoDeadEnds<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of bubble weak component: "<<m_skippedObjectsWithBubbleWeakComponent<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of short length: "<<m_skippedTooShort<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of bad ownership: "<<m_skippedNotMine<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of low coverage: "<<m_skippedNotEnoughCoverage<<endl; cout<<"Rank "<<m_rank<<" Eligible paths: "<<m_eligiblePaths<<endl; #ifdef ASSERT assert(m_eligiblePaths==(int)m_SEEDING_seeds.size()); #endif (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(&aMessage); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif // sort the seeds by length std::sort(m_SEEDING_seeds.begin(), m_SEEDING_seeds.end(),myComparator_sort); } }
void SeedingData::call_RAY_SLAVE_MODE_START_SEEDING(){ if(!m_initiatedIterator){ m_last=time(NULL); m_SEEDING_i=0; m_activeWorkerIterator=m_activeWorkers.begin(); m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters); m_initiatedIterator=true; m_maximumAliveWorkers=32768; #ifdef ASSERT m_splayTreeIterator.hasNext(); #endif m_virtualCommunicator->resetCounters(); } if(!m_checkedCheckpoint){ if(m_parameters->hasCheckpoint("Seeds")){ cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl; (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(aMessage); loadCheckpoint(); return; } m_checkedCheckpoint=true; } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } // flush all mode is necessary to empty buffers and // restart things from scratch.. // 1. iterate on active workers if(m_activeWorkerIterator!=m_activeWorkers.end()){ WorkerHandle workerId=*m_activeWorkerIterator; #ifdef ASSERT assert(m_aliveWorkers.count(workerId)>0); assert(!m_aliveWorkers[workerId].isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers[workerId].work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers[workerId].isDone()){ m_workersDone.push_back(workerId); vector<Kmer> seed=*(m_aliveWorkers[workerId].getSeed()); vector<int>*coverageValues=m_aliveWorkers[workerId].getCoverageVector(); #ifdef ASSERT assert(seed.size() == coverageValues->size()); #endif int nucleotides=getNumberOfNucleotides(seed.size(),m_wordSize); if(seed.size() > 0 && m_parameters->debugSeeds()){ cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl; } #ifdef ASSERT assert(nucleotides==0 || nucleotides>=m_wordSize); #endif // only consider the long ones. if(nucleotides>=m_parameters->getMinimumContigLength()){ #ifdef SHOW_DISCOVERIES printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size()); #endif #ifdef ASSERT assert(seed.size()>0); #endif Kmer firstVertex=seed[0]; Kmer lastVertex=seed[seed.size()-1]; Kmer firstReverse=m_parameters->_complementVertex(&lastVertex); if(firstVertex<firstReverse){ printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed.size()); fflush(stdout); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } AssemblySeed theSeed; for(int i=0;i<(int)seed.size();i++){ theSeed.push_back(&(seed[i])); theSeed.addCoverageValue(coverageValues->at(i)); } theSeed.computePeakCoverage(); CoverageDepth peakCoverage=theSeed.getPeakCoverage(); cout<<"Got a seed, peak coverage: "<<peakCoverage; /* ignore the seed if it has too much coverage. */ if(peakCoverage <= m_parameters->getMaximumSeedCoverage()){ cout<<", adding seed."<<endl; m_SEEDING_seeds.push_back(theSeed); }else{ cout<<", ignoring seed."<<endl; } } } } m_activeWorkerIterator++; }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_SEEDING_i<m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_SEEDING_i % 100000 ==0){ printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size()); fflush(stdout); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } } #ifdef ASSERT if(m_SEEDING_i==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } #endif m_splayTreeIterator.next(); Kmer vertexKey=*(m_splayTreeIterator.getKey()); m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i, RAY_MPI_TAG_GET_VERTEX_EDGES_COMPACT, RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE ); m_activeWorkers.insert(m_SEEDING_i); int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_SEEDING_i++; // skip the reverse complement as we don't really need it anyway. }else{ m_virtualCommunicator->forceFlush(); } } // brace yourself for the next round m_activeWorkerIterator=m_activeWorkers.begin(); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_subgraph->size()==m_completedJobs){ printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size()); fflush(stdout); printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size()); fflush(stdout); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); fflush(stdout); m_virtualCommunicator->printStatistics(); (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(aMessage); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif // sort the seeds by length std::sort(m_SEEDING_seeds.begin(), m_SEEDING_seeds.end(),myComparator_sort); /** write seeds for debugging purposes */ if(m_parameters->hasOption("-write-seeds")){ ostringstream fileName; fileName<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".RaySeeds.fasta"; ofstream f(fileName.str().c_str()); for(int i=0;i<(int)m_SEEDING_seeds.size();i++){ PathHandle id=getPathUniqueId(m_parameters->getRank(),i); f<<">RaySeed-"<<id<<endl; f<<addLineBreaks(convertToString(m_SEEDING_seeds[i].getVertices(), m_parameters->getWordSize(),m_parameters->getColorSpaceMode()), m_parameters->getColumns()); } f.close(); } } }