void __stdcall RVExtension(char *output, int outputSize, const char *function) { if (!strcmp(function, "version")) { strncpy(output, ACE_FULL_VERSION_STR, outputSize); } else { strncpy(output, addLineBreaks(splitString(function)).c_str(), outputSize); output[outputSize - 1] = '\0'; } }
void SeedingData::computeSeeds(){ if(!m_initiatedIterator){ m_last=time(NULL); m_SEEDING_i=0; m_activeWorkerIterator=m_activeWorkers.begin(); m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters); m_initiatedIterator=true; m_maximumAliveWorkers=32768; #ifdef ASSERT m_splayTreeIterator.hasNext(); #endif } if(!m_checkedCheckpoint){ if(m_parameters->hasCheckpoint("Seeds")){ cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl; (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(aMessage); loadCheckpoint(); return; } m_checkedCheckpoint=true; } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } // flush all mode is necessary to empty buffers and // restart things from scratch.. // 1. iterate on active workers if(m_activeWorkerIterator!=m_activeWorkers.end()){ uint64_t workerId=*m_activeWorkerIterator; #ifdef ASSERT assert(m_aliveWorkers.count(workerId)>0); assert(!m_aliveWorkers[workerId].isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers[workerId].work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers[workerId].isDone()){ m_workersDone.push_back(workerId); vector<Kmer> seed=*(m_aliveWorkers[workerId].getSeed()); int nucleotides=seed.size()+(m_wordSize)-1; if(seed.size() > 0 && m_parameters->debugSeeds()){ cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl; } // only consider the long ones. if(nucleotides>=m_parameters->getMinimumContigLength()){ #ifdef SHOW_DISCOVERIES printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size()); #endif Kmer firstVertex=seed[0]; Kmer lastVertex=seed[seed.size()-1]; Kmer firstReverse=m_parameters->_complementVertex(&lastVertex); if(firstVertex<firstReverse){ printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed.size()); fflush(stdout); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } m_SEEDING_seeds.push_back(seed); } } } m_activeWorkerIterator++; }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_SEEDING_i<(uint64_t)m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_SEEDING_i % 100000 ==0){ printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size()); fflush(stdout); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } } #ifdef ASSERT if(m_SEEDING_i==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } #endif m_splayTreeIterator.next(); Kmer vertexKey=*(m_splayTreeIterator.getKey()); m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i); m_activeWorkers.insert(m_SEEDING_i); int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_SEEDING_i++; // skip the reverse complement as we don't really need it anyway. }else{ m_virtualCommunicator->forceFlush(); } } // brace yourself for the next round m_activeWorkerIterator=m_activeWorkers.begin(); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_subgraph->size()==m_completedJobs){ printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size()); fflush(stdout); printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size()); fflush(stdout); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); fflush(stdout); m_virtualCommunicator->printStatistics(); (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(aMessage); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif // sort the seeds by length std::sort(m_SEEDING_seeds.begin(),m_SEEDING_seeds.end(),myComparator_sort); /** write seeds for debugging purposes */ if(m_parameters->hasOption("-write-seeds")){ ostringstream fileName; fileName<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".RaySeeds.fasta"; ofstream f(fileName.str().c_str()); for(int i=0;i<(int)m_SEEDING_seeds.size();i++){ uint64_t id=getPathUniqueId(m_parameters->getRank(),i); f<<">RaySeed-"<<id<<endl; f<<addLineBreaks(convertToString(&(m_SEEDING_seeds[i]), m_parameters->getWordSize(),m_parameters->getColorSpaceMode()), m_parameters->getColumns()); } f.close(); } } }
void SeedingData::call_RAY_SLAVE_MODE_START_SEEDING(){ if(!m_initiatedIterator){ m_last=time(NULL); m_SEEDING_i=0; m_activeWorkerIterator=m_activeWorkers.begin(); m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters); m_initiatedIterator=true; m_maximumAliveWorkers=32768; #ifdef ASSERT m_splayTreeIterator.hasNext(); #endif m_virtualCommunicator->resetCounters(); } if(!m_checkedCheckpoint){ if(m_parameters->hasCheckpoint("Seeds")){ cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl; (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(&aMessage); loadCheckpoint(); return; } m_checkedCheckpoint=true; } m_virtualCommunicator->processInbox(&m_activeWorkersToRestore); if(!m_virtualCommunicator->isReady()){ return; } // flush all mode is necessary to empty buffers and // restart things from scratch.. // 1. iterate on active workers if(m_activeWorkerIterator!=m_activeWorkers.end()){ WorkerHandle workerId=*m_activeWorkerIterator; #ifdef ASSERT assert(m_aliveWorkers.count(workerId)>0); assert(!m_aliveWorkers[workerId].isDone()); #endif m_virtualCommunicator->resetLocalPushedMessageStatus(); //force the worker to work until he finishes or pushes something on the stack while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){ m_aliveWorkers[workerId].work(); } if(m_virtualCommunicator->getLocalPushedMessageStatus()){ m_waitingWorkers.push_back(workerId); } if(m_aliveWorkers[workerId].isDone()){ m_workersDone.push_back(workerId); GraphPath*seed=m_aliveWorkers[workerId].getSeed(); int nucleotides=getNumberOfNucleotides(seed->size(),m_wordSize); if(seed->size() > 0 && m_debugSeeds){ cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl; } #ifdef ASSERT assert(nucleotides==0 || nucleotides>=m_wordSize); #endif bool skipBecauseOfDeadEnd=m_aliveWorkers[workerId].hasDeadEnd(); if(skipBecauseOfDeadEnd){ m_skippedObjectsWithDeadEnd++; // only consider the long ones. }else if(nucleotides>=m_parameters->getMinimumContigLength()){ #ifdef SHOW_DISCOVERIES printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size()); #endif #ifdef ASSERT assert(seed->size()>0); #endif Kmer firstVertex; seed->at(0,&firstVertex); Kmer lastVertex; seed->at(seed->size()-1,&lastVertex); Kmer firstReverse=m_parameters->_complementVertex(&lastVertex); int minimumNucleotidesForVerbosity=1024; bool verbose=nucleotides>=minimumNucleotidesForVerbosity; if(m_debugSeeds){ verbose=true; } if(firstVertex<firstReverse){ if(verbose){ printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed->size()); } if(m_parameters->showMemoryUsage() && verbose){ showMemoryUsage(m_rank); } GraphPath*theSeed=seed; theSeed->computePeakCoverage(); CoverageDepth peakCoverage=theSeed->getPeakCoverage(); if(verbose) cout<<"Got a seed, peak coverage: "<<peakCoverage; /* ignore the seed if it has too much coverage. */ if(peakCoverage >= m_minimumSeedCoverageDepth && peakCoverage <= m_parameters->getMaximumSeedCoverage()){ if(verbose) cout<<", adding seed."<<endl; m_SEEDING_seeds.push_back(*theSeed); m_eligiblePaths++; }else{ if(verbose) cout<<", ignoring seed."<<endl; m_skippedNotEnoughCoverage++; } }else{ m_skippedNotMine++; } }else{ m_skippedTooShort++; } } m_activeWorkerIterator++; }else{ updateStates(); // add one worker to active workers // reason is that those already in the pool don't communicate anymore -- // as for they need responses. if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){ // there is at least one worker to start // AND // the number of alive workers is below the maximum if(m_SEEDING_i<m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){ if(m_SEEDING_i % 100000 ==0){ printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size()); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } } #ifdef ASSERT if(m_SEEDING_i==0){ assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0); } #endif m_splayTreeIterator.next(); Kmer vertexKey=*(m_splayTreeIterator.getKey()); m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i, RAY_MPI_TAG_GET_VERTEX_EDGES_COMPACT, RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE ); if(m_debugSeeds) m_aliveWorkers[m_SEEDING_i].enableDebugMode(); m_activeWorkers.insert(m_SEEDING_i); int population=m_aliveWorkers.size(); if(population>m_maximumWorkers){ m_maximumWorkers=population; } m_SEEDING_i++; // skip the reverse complement as we don't really need it anyway. }else{ m_virtualCommunicator->forceFlush(); } } // brace yourself for the next round m_activeWorkerIterator=m_activeWorkers.begin(); } #ifdef ASSERT assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers); #endif if((int)m_subgraph->size()==m_completedJobs){ printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size()); printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size()); printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers); m_virtualCommunicator->printStatistics(); cout<<"Rank "<<m_rank<<" runtime statistics for seeding algorithm: "<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of dead ends: "<<m_skippedObjectsWithDeadEnd<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of short length: "<<m_skippedTooShort<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of bad ownership: "<<m_skippedNotMine<<endl; cout<<"Rank "<<m_rank<<" Skipped paths because of low coverage: "<<m_skippedNotEnoughCoverage<<endl; cout<<"Rank "<<m_rank<<" Eligible paths: "<<m_eligiblePaths<<endl; #ifdef ASSERT assert(m_eligiblePaths==(int)m_SEEDING_seeds.size()); #endif (*m_mode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank()); m_outbox->push_back(&aMessage); if(m_parameters->showMemoryUsage()){ showMemoryUsage(m_rank); } #ifdef ASSERT assert(m_aliveWorkers.size()==0); assert(m_activeWorkers.size()==0); #endif // sort the seeds by length std::sort(m_SEEDING_seeds.begin(), m_SEEDING_seeds.end(),myComparator_sort); /** write seeds for debugging purposes */ if(m_parameters->hasOption("-write-seeds")){ ostringstream fileName; fileName<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".RaySeeds.fasta"; ofstream f(fileName.str().c_str()); for(int i=0;i<(int)m_SEEDING_seeds.size();i++){ PathHandle id=getPathUniqueId(m_parameters->getRank(),i); f<<">RaySeed-"<<id<<endl; f<<addLineBreaks(convertToString(&(m_SEEDING_seeds[i]), m_parameters->getWordSize(),m_parameters->getColorSpaceMode()), m_parameters->getColumns()); } f.close(); } } }