Example #1
0
void __stdcall RVExtension(char *output, int outputSize, const char *function) {
    if (!strcmp(function, "version")) {
        strncpy(output, ACE_FULL_VERSION_STR, outputSize);
    } else {
        strncpy(output, addLineBreaks(splitString(function)).c_str(), outputSize);
        output[outputSize - 1] = '\0';
    }
}
Example #2
0
void SeedingData::computeSeeds(){
	if(!m_initiatedIterator){
		m_last=time(NULL);

		m_SEEDING_i=0;

		m_activeWorkerIterator=m_activeWorkers.begin();
		m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters);
		m_initiatedIterator=true;
		m_maximumAliveWorkers=32768;

		#ifdef ASSERT
		m_splayTreeIterator.hasNext();
		#endif
	}

	if(!m_checkedCheckpoint){
		if(m_parameters->hasCheckpoint("Seeds")){
			cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl;
			(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
			Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
			m_outbox->push_back(aMessage);

			loadCheckpoint();

			return;
		}
		m_checkedCheckpoint=true;
	}

	m_virtualCommunicator->processInbox(&m_activeWorkersToRestore);

	if(!m_virtualCommunicator->isReady()){
		return;
	}

	// flush all mode is necessary to empty buffers and 
	// restart things from scratch..

	// 1. iterate on active workers
	if(m_activeWorkerIterator!=m_activeWorkers.end()){
		uint64_t workerId=*m_activeWorkerIterator;
		#ifdef ASSERT
		assert(m_aliveWorkers.count(workerId)>0);
		assert(!m_aliveWorkers[workerId].isDone());
		#endif
		m_virtualCommunicator->resetLocalPushedMessageStatus();

		//force the worker to work until he finishes or pushes something on the stack
		while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_aliveWorkers[workerId].work();
		}

		if(m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_waitingWorkers.push_back(workerId);
		}
		if(m_aliveWorkers[workerId].isDone()){
			m_workersDone.push_back(workerId);
			vector<Kmer> seed=*(m_aliveWorkers[workerId].getSeed());

			int nucleotides=seed.size()+(m_wordSize)-1;

			if(seed.size() > 0 && m_parameters->debugSeeds()){
				cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl;
			}

			// only consider the long ones.
			if(nucleotides>=m_parameters->getMinimumContigLength()){
				#ifdef SHOW_DISCOVERIES
				printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size());
				#endif
				
				Kmer firstVertex=seed[0];
				Kmer lastVertex=seed[seed.size()-1];
				Kmer firstReverse=m_parameters->_complementVertex(&lastVertex);

				if(firstVertex<firstReverse){
					printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed.size());
					fflush(stdout);

					if(m_parameters->showMemoryUsage()){
						showMemoryUsage(m_rank);
					}
					m_SEEDING_seeds.push_back(seed);
				}
			}
		}
		m_activeWorkerIterator++;
	}else{
		updateStates();

		//  add one worker to active workers
		//  reason is that those already in the pool don't communicate anymore -- 
		//  as for they need responses.
		if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){
			// there is at least one worker to start
			// AND
			// the number of alive workers is below the maximum
			if(m_SEEDING_i<(uint64_t)m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){
				if(m_SEEDING_i % 100000 ==0){
					printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size());
					fflush(stdout);

					if(m_parameters->showMemoryUsage()){
						showMemoryUsage(m_rank);
					}
				}
				#ifdef ASSERT
				if(m_SEEDING_i==0){
					assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0);
				}
				#endif

				m_splayTreeIterator.next();
				Kmer vertexKey=*(m_splayTreeIterator.getKey());

				m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i);
				m_activeWorkers.insert(m_SEEDING_i);

				int population=m_aliveWorkers.size();
				if(population>m_maximumWorkers){
					m_maximumWorkers=population;
				}

				m_SEEDING_i++;

				// skip the reverse complement as we don't really need it anyway.
			}else{
				m_virtualCommunicator->forceFlush();
			}
		}

		// brace yourself for the next round
		m_activeWorkerIterator=m_activeWorkers.begin();
	}

	#ifdef ASSERT
	assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers);
	#endif

	if((int)m_subgraph->size()==m_completedJobs){
		printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size());
		fflush(stdout);
		printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size());
		fflush(stdout);
		printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers);
		fflush(stdout);
		m_virtualCommunicator->printStatistics();

		(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
		Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
		m_outbox->push_back(aMessage);

		if(m_parameters->showMemoryUsage()){
			showMemoryUsage(m_rank);
		}

		#ifdef ASSERT
		assert(m_aliveWorkers.size()==0);
		assert(m_activeWorkers.size()==0);
		#endif

		// sort the seeds by length
		std::sort(m_SEEDING_seeds.begin(),m_SEEDING_seeds.end(),myComparator_sort);

		/** write seeds for debugging purposes */
		if(m_parameters->hasOption("-write-seeds")){
			ostringstream fileName;
			fileName<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".RaySeeds.fasta";
			ofstream f(fileName.str().c_str());
			for(int i=0;i<(int)m_SEEDING_seeds.size();i++){
				uint64_t id=getPathUniqueId(m_parameters->getRank(),i);
				f<<">RaySeed-"<<id<<endl;

				f<<addLineBreaks(convertToString(&(m_SEEDING_seeds[i]),
					m_parameters->getWordSize(),m_parameters->getColorSpaceMode()),
					m_parameters->getColumns());
			}
			f.close();
		}

	}
}
Example #3
0
void SeedingData::call_RAY_SLAVE_MODE_START_SEEDING(){
	if(!m_initiatedIterator){
		m_last=time(NULL);

		m_SEEDING_i=0;

		m_activeWorkerIterator=m_activeWorkers.begin();
		m_splayTreeIterator.constructor(m_subgraph,m_wordSize,m_parameters);
		m_initiatedIterator=true;
		m_maximumAliveWorkers=32768;

		#ifdef ASSERT
		m_splayTreeIterator.hasNext();
		#endif


		m_virtualCommunicator->resetCounters();
	}

	if(!m_checkedCheckpoint){
		if(m_parameters->hasCheckpoint("Seeds")){
			cout<<"Rank "<<m_parameters->getRank()<<": checkpoint Seeds exists, not computing seeds."<<endl;
			(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
			Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
			m_outbox->push_back(&aMessage);

			loadCheckpoint();

			return;
		}
		m_checkedCheckpoint=true;
	}

	m_virtualCommunicator->processInbox(&m_activeWorkersToRestore);

	if(!m_virtualCommunicator->isReady()){
		return;
	}

	// flush all mode is necessary to empty buffers and 
	// restart things from scratch..

	// 1. iterate on active workers
	if(m_activeWorkerIterator!=m_activeWorkers.end()){
		WorkerHandle workerId=*m_activeWorkerIterator;
		#ifdef ASSERT
		assert(m_aliveWorkers.count(workerId)>0);
		assert(!m_aliveWorkers[workerId].isDone());
		#endif
		m_virtualCommunicator->resetLocalPushedMessageStatus();

		//force the worker to work until he finishes or pushes something on the stack
		while(!m_aliveWorkers[workerId].isDone()&&!m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_aliveWorkers[workerId].work();
		}

		if(m_virtualCommunicator->getLocalPushedMessageStatus()){
			m_waitingWorkers.push_back(workerId);
		}
		if(m_aliveWorkers[workerId].isDone()){
			m_workersDone.push_back(workerId);
			GraphPath*seed=m_aliveWorkers[workerId].getSeed();

			int nucleotides=getNumberOfNucleotides(seed->size(),m_wordSize);

			if(seed->size() > 0 && m_debugSeeds){
				cout<<"Raw seed length: "<<nucleotides<<" nucleotides"<<endl;
			}

			#ifdef ASSERT
			assert(nucleotides==0 || nucleotides>=m_wordSize);
			#endif

			bool skipBecauseOfDeadEnd=m_aliveWorkers[workerId].hasDeadEnd();

			if(skipBecauseOfDeadEnd){
			
				m_skippedObjectsWithDeadEnd++;

			// only consider the long ones.
			}else if(nucleotides>=m_parameters->getMinimumContigLength()){
				#ifdef SHOW_DISCOVERIES
				printf("Rank %i discovered a seed with %i vertices\n",m_rank,(int)seed.size());
				#endif
				
				#ifdef ASSERT
				assert(seed->size()>0);
				#endif

				Kmer firstVertex;
				seed->at(0,&firstVertex);
				Kmer lastVertex;
				seed->at(seed->size()-1,&lastVertex);
				Kmer firstReverse=m_parameters->_complementVertex(&lastVertex);

				int minimumNucleotidesForVerbosity=1024;

				bool verbose=nucleotides>=minimumNucleotidesForVerbosity;

				if(m_debugSeeds){
					verbose=true;
				}

				if(firstVertex<firstReverse){

					if(verbose){
						printf("Rank %i stored a seed with %i vertices\n",m_rank,(int)seed->size());
					}

					if(m_parameters->showMemoryUsage() && verbose){
						showMemoryUsage(m_rank);
					}

					GraphPath*theSeed=seed;

					theSeed->computePeakCoverage();
		
					CoverageDepth peakCoverage=theSeed->getPeakCoverage();

					if(verbose)
						cout<<"Got a seed, peak coverage: "<<peakCoverage;
	
					/* ignore the seed if it has too much coverage. */
					if(peakCoverage >= m_minimumSeedCoverageDepth
						&& peakCoverage <= m_parameters->getMaximumSeedCoverage()){

						if(verbose)
							cout<<", adding seed."<<endl;

						m_SEEDING_seeds.push_back(*theSeed);
		
						m_eligiblePaths++;
					}else{

						if(verbose)
							cout<<", ignoring seed."<<endl;
			
						m_skippedNotEnoughCoverage++;
					}
				}else{
					m_skippedNotMine++;
				}
			}else{
				m_skippedTooShort++;
			}
		}
		m_activeWorkerIterator++;
	}else{
		updateStates();

		//  add one worker to active workers
		//  reason is that those already in the pool don't communicate anymore -- 
		//  as for they need responses.
		if(!m_virtualCommunicator->getGlobalPushedMessageStatus()&&m_activeWorkers.empty()){
			// there is at least one worker to start
			// AND
			// the number of alive workers is below the maximum
			if(m_SEEDING_i<m_subgraph->size()&&(int)m_aliveWorkers.size()<m_maximumAliveWorkers){
				if(m_SEEDING_i % 100000 ==0){
					printf("Rank %i is creating seeds [%i/%i]\n",getRank(),(int)m_SEEDING_i+1,(int)m_subgraph->size());

					if(m_parameters->showMemoryUsage()){
						showMemoryUsage(m_rank);
					}
				}

				#ifdef ASSERT
				if(m_SEEDING_i==0){
					assert(m_completedJobs==0&&m_activeWorkers.size()==0&&m_aliveWorkers.size()==0);
				}
				#endif

				m_splayTreeIterator.next();
				Kmer vertexKey=*(m_splayTreeIterator.getKey());

				m_aliveWorkers[m_SEEDING_i].constructor(&vertexKey,m_parameters,m_outboxAllocator,m_virtualCommunicator,m_SEEDING_i,
RAY_MPI_TAG_GET_VERTEX_EDGES_COMPACT,
RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE
);
				if(m_debugSeeds)
					m_aliveWorkers[m_SEEDING_i].enableDebugMode();

				m_activeWorkers.insert(m_SEEDING_i);

				int population=m_aliveWorkers.size();
				if(population>m_maximumWorkers){
					m_maximumWorkers=population;
				}

				m_SEEDING_i++;

				// skip the reverse complement as we don't really need it anyway.
			}else{
				m_virtualCommunicator->forceFlush();
			}
		}

		// brace yourself for the next round
		m_activeWorkerIterator=m_activeWorkers.begin();
	}

	#ifdef ASSERT
	assert((int)m_aliveWorkers.size()<=m_maximumAliveWorkers);
	#endif

	if((int)m_subgraph->size()==m_completedJobs){

		printf("Rank %i has %i seeds\n",m_rank,(int)m_SEEDING_seeds.size());
		printf("Rank %i is creating seeds [%i/%i] (completed)\n",getRank(),(int)m_SEEDING_i,(int)m_subgraph->size());
		printf("Rank %i: peak number of workers: %i, maximum: %i\n",m_rank,m_maximumWorkers,m_maximumAliveWorkers);
		m_virtualCommunicator->printStatistics();

		cout<<"Rank "<<m_rank<<" runtime statistics for seeding algorithm: "<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of dead ends: "<<m_skippedObjectsWithDeadEnd<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of short length: "<<m_skippedTooShort<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of bad ownership: "<<m_skippedNotMine<<endl;
		cout<<"Rank "<<m_rank<<" Skipped paths because of low coverage: "<<m_skippedNotEnoughCoverage<<endl;
		cout<<"Rank "<<m_rank<<" Eligible paths: "<<m_eligiblePaths<<endl;

		#ifdef ASSERT
		assert(m_eligiblePaths==(int)m_SEEDING_seeds.size());
		#endif

		(*m_mode)=RAY_SLAVE_MODE_DO_NOTHING;
		Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_SEEDING_IS_OVER,getRank());
		m_outbox->push_back(&aMessage);

		if(m_parameters->showMemoryUsage()){
			showMemoryUsage(m_rank);
		}

		#ifdef ASSERT
		assert(m_aliveWorkers.size()==0);
		assert(m_activeWorkers.size()==0);
		#endif

		// sort the seeds by length
		std::sort(m_SEEDING_seeds.begin(),
			m_SEEDING_seeds.end(),myComparator_sort);

		/** write seeds for debugging purposes */
		if(m_parameters->hasOption("-write-seeds")){
			ostringstream fileName;
			fileName<<m_parameters->getPrefix()<<"Rank"<<m_parameters->getRank()<<".RaySeeds.fasta";
			ofstream f(fileName.str().c_str());

			for(int i=0;i<(int)m_SEEDING_seeds.size();i++){
				PathHandle id=getPathUniqueId(m_parameters->getRank(),i);
				f<<">RaySeed-"<<id<<endl;

				f<<addLineBreaks(convertToString(&(m_SEEDING_seeds[i]),
					m_parameters->getWordSize(),m_parameters->getColorSpaceMode()),
					m_parameters->getColumns());
			}
			f.close();
		}

	}
}