示例#1
0
void SpuriousSeedAnnihilator::call_RAY_SLAVE_MODE_CLEAN_SEEDS(){

	if((!m_debugCode && m_hasCheckpointFilesForSeeds) || m_skip){

		m_core->getSwitchMan()->closeSlaveModeLocally(m_core->getOutbox(),m_core->getRank());
		return;
	}

// Trace was here -> PASS

	#ifdef ASSERT
	assert(m_parameters != NULL);
	assert(m_subgraph != NULL);
	#endif

	// clear graph
	GridTableIterator iterator;
	iterator.constructor(m_subgraph, m_parameters->getWordSize(), m_parameters);

	#ifdef ASSERT
	LargeCount cleared=0;
	#endif

// Trace was here -> PASS

	while(iterator.hasNext()){
		iterator.next();
		Kmer key=*(iterator.getKey());
		m_subgraph->clearDirections(&key);

		#ifdef ASSERT
		cleared++;

		Vertex*node=m_subgraph->find(&key);
		assert(node->getFirstDirection() == NULL);

		#endif
	}

// Trace was here -> PASS

	#ifdef ASSERT
	assert(cleared == m_subgraph->size());
	#endif

// Trace was here -> PASS

	int bytes=m_directionsAllocator->getChunkSize() * m_directionsAllocator->getNumberOfChunks();

	m_directionsAllocator->clear();

	cout<<"Rank "<<m_rank<<" freed "<<bytes/1024<<" KiB from the path memory pool"<<endl;

// Trace was here -> <s>FAIL</s>  PASS

/*
 * Tell another rank that we are done with this.
 */
	m_core->getSwitchMan()->closeSlaveModeLocally(m_core->getOutbox(),m_core->getRank());
}
示例#2
0
/**
 * here we extract the phylogeny colors
 */
void PhylogenyViewer::extractColorsForPhylogeny(){

	GridTableIterator iterator;
	iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters);

	//* only fetch half of the iterated things because we just need one k-mer
	// for any pair of reverse-complement k-mers 
	
	int parity=0;

	while(iterator.hasNext()){

		Vertex*node=iterator.next();
		Kmer key=*(iterator.getKey());

		#ifdef ASSERT
		assert(parity==0 || parity==1);
		#endif

		if(parity==0){
			parity=1;
		}else if(parity==1){
			parity=0;

			continue; // we only need data with parity=0
		}

		VirtualKmerColorHandle color=node->getVirtualColor();
		set<PhysicalKmerColor>*physicalColors=m_colorSet->getPhysicalColors(color);

		for(set<PhysicalKmerColor>::iterator j=physicalColors->begin();
			j!=physicalColors->end();j++){

			PhysicalKmerColor physicalColor=*j;
	
			PhysicalKmerColor nameSpace=physicalColor/COLOR_NAMESPACE_MULTIPLIER;
		
			if(nameSpace==COLOR_NAMESPACE_PHYLOGENY){
				PhysicalKmerColor colorForPhylogeny=physicalColor % COLOR_NAMESPACE_MULTIPLIER;

				m_colorsForPhylogeny.insert(colorForPhylogeny);

				#ifdef DEBUG_PHYLOGENY
				cout<<"[phylogeny] colorForPhylogeny= "<<colorForPhylogeny<<endl;
				#endif
			}
		}
	}
		
	cout<<"Rank "<<m_rank<<" has exactly "<<m_colorsForPhylogeny.size()<<" k-mer physical colors for the phylogeny."<<endl;
	cout<<endl;

	m_extractedColorsForPhylogeny=true;

	m_loadedTaxonsForPhylogeny=false;

	m_totalNumberOfKmerObservations=m_searcher->getTotalNumberOfKmerObservations();
}
示例#3
0
void GeneOntology::fetchRelevantColors(){

	GridTableIterator iterator;
	iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters);

	//* only fetch half of the iterated things because we just need one k-mer
	// for any pair of reverse-complement k-mers 
	
	int parity=0;

	while(iterator.hasNext()){

		Vertex*node=iterator.next();
		Kmer key=*(iterator.getKey());

		#ifdef ASSERT
		assert(parity==0 || parity==1);
		#endif

		if(parity==0){
			parity=1;
		}else if(parity==1){
			parity=0;

			continue; // we only need data with parity=0
		}

		VirtualKmerColorHandle color=node->getVirtualColor();
		set<PhysicalKmerColor>*physicalColors=m_colorSet->getPhysicalColors(color);

		for(set<PhysicalKmerColor>::iterator j=physicalColors->begin();
			j!=physicalColors->end();j++){

			PhysicalKmerColor physicalColor=*j;
	
			PhysicalKmerColor nameSpace=physicalColor/COLOR_NAMESPACE_MULTIPLIER;
		
			if(nameSpace==COLOR_NAMESPACE_EMBL_CDS){
				PhysicalKmerColor colorForPhylogeny=physicalColor % COLOR_NAMESPACE_MULTIPLIER;

				m_colorsForOntology.insert(colorForPhylogeny);

			}
		}
	}
		
	cout<<"Rank "<<m_rank<<" has exactly "<<m_colorsForOntology.size()<<" k-mer physical colors related to EMBL CDS objects."<<endl;
	cout<<endl;


	m_listedRelevantColors=true;

	m_loadedAnnotations=false;

}
示例#4
0
void GeneOntology::countOntologyTermsInGraph(){
	
	m_kmerObservationsWithGeneOntologies=0;

	cout<<"Rank "<<m_rank<<": counting ontology terms in the graph..."<<endl;

	#ifdef ASSERT
	assert(m_ontologyTermFrequencies.size()==0);
	#endif

	GridTableIterator iterator;
	iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters);

	//* only fetch half of the iterated things because we just need one k-mer
	// for any pair of reverse-complement k-mers 
	
	int parity=0;

	while(iterator.hasNext()){

		Vertex*node=iterator.next();
		Kmer key=*(iterator.getKey());

		#ifdef ASSERT
		assert(parity==0 || parity==1);
		#endif

		if(parity==0){
			parity=1;
		}else if(parity==1){
			parity=0;

			continue; // we only need data with parity=0
		}

		VirtualKmerColorHandle color=node->getVirtualColor();
		set<PhysicalKmerColor>*physicalColors=m_colorSet->getPhysicalColors(color);

		int kmerCoverage=node->getCoverage(&key);

		// this is the set of gene ontology terms that 
		// the current k-mer contributes to
		set<GeneOntologyIdentifier> ontologyTerms;

		for(set<PhysicalKmerColor>::iterator j=physicalColors->begin();
			j!=physicalColors->end();j++){

			PhysicalKmerColor physicalColor=*j;
	
			PhysicalKmerColor nameSpace=physicalColor/COLOR_NAMESPACE_MULTIPLIER;
		
			if(nameSpace==COLOR_NAMESPACE_EMBL_CDS){

				PhysicalKmerColor colorForPhylogeny=physicalColor % COLOR_NAMESPACE_MULTIPLIER;
	
				/* the color is in the graph, but no annotations exist... */
				if(m_annotations.count(colorForPhylogeny)==0){
					continue;
				}

				vector<GeneOntologyIdentifier>*terms=NULL;
				terms=&(m_annotations[colorForPhylogeny]);

				int numberOfTerms=terms->size();

				for(int i=0;i<numberOfTerms;i++){

					GeneOntologyIdentifier term=terms->at(i);

					ontologyTerms.insert(term);
				}
			}
		}

		// here, we have a list of gene ontology terms
		// update each of them. 

		int quantity=1;

		for(set<GeneOntologyIdentifier>::iterator i=ontologyTerms.begin();i!=ontologyTerms.end();i++){
			
			GeneOntologyIdentifier term=*i;

			GeneOntologyIdentifier realTerm=dereferenceTerm(term);

			#ifdef BUG_DETERMINISM
			if(term==49){
				cout<<"[BUG_DETERMINISM] viaCounter: incrementOntologyTermFrequency "<<term<<" "<<kmerCoverage<<" "<<quantity<<endl;
			}
			#endif

			incrementOntologyTermFrequency(realTerm,kmerCoverage, quantity);
		}

		// update the total
		if(!ontologyTerms.empty()){
			m_kmerObservationsWithGeneOntologies+=kmerCoverage;
		}
	}

	m_ontologyTermFrequencies_iterator1=m_ontologyTermFrequencies.begin();

	if(m_ontologyTermFrequencies_iterator1!=m_ontologyTermFrequencies.end()){
		m_ontologyTermFrequencies_iterator2=m_ontologyTermFrequencies_iterator1->second.begin();
	}

	m_countOntologyTermsInGraph=true;

	cout<<"Rank "<<m_rank<<": "<<m_ontologyTermFrequencies.size();
	cout<<" have some biological signal"<<endl;
	cout<<"Number of dereferenced alternate handles: "<<m_dereferences<<endl;
	cout<<"Number of k-mer observations with gene ontology terms: ";
	cout<<m_kmerObservationsWithGeneOntologies<<endl;
}
示例#5
0
void CoverageGatherer::writeKmers(){
	#ifdef CONFIG_ASSERT
	LargeCount n=0;
	#endif

	if(m_subgraph->size()==0){
		(*m_slaveMode)=RAY_SLAVE_MODE_DO_NOTHING;
		Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_COVERAGE_END,
			m_parameters->getRank());
		m_outbox->push_back(&aMessage);
		return;
	}
	GridTableIterator iterator;
	iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters);
	FILE* kmerFile=NULL;
	ostringstream buffer;
	ostringstream name;
	name<<m_parameters->getPrefix()<<"/kmers.txt";
	if(m_parameters->getRank()==0)
		kmerFile=fopen(name.str().c_str(),"w"); // create empty file
	else
		kmerFile=fopen(name.str().c_str(),"a"); // append to file

	if(m_parameters->getRank()==MASTER_RANK)
		writeHeader(kmerFile);

	while(iterator.hasNext()){
		Vertex*node=iterator.next();
		Kmer key=*(iterator.getKey());
		CoverageDepth coverage=node->getCoverage(&key);
		m_distributionOfCoverage[coverage]++;
		#ifdef CONFIG_ASSERT
		n++;
		#endif
		string kmerSequence=key.idToWord(m_parameters->getWordSize(),m_parameters->getColorSpaceMode());
		vector<Kmer> parents=node->getIngoingEdges(&key,m_parameters->getWordSize());
		vector<Kmer> children=node->getOutgoingEdges(&key,m_parameters->getWordSize());

		//fprintf(kmerFile,"%s;%i;",kmerSequence.c_str(),coverage);
		buffer << kmerSequence << ";" << coverage << ";";
		for(int i=0;i<(int)parents.size();i++){
			string printableVersion=parents[i].idToWord(m_parameters->getWordSize(),m_parameters->getColorSpaceMode());
			if(i!=0)
				buffer << " ";
				//fprintf(kmerFile," ");

			//fprintf(kmerFile,"%c",printableVersion[0]);
			buffer << printableVersion[0];
		}
		//fprintf(kmerFile,";");
		buffer << ";";
		for(int i=0;i<(int)children.size();i++){
			string printableVersion=children[i].idToWord(m_parameters->getWordSize(),m_parameters->getColorSpaceMode());
			if(i!=0)
				buffer << " ";
				//fprintf(kmerFile," ");

			//fprintf(kmerFile,"%c",printableVersion[m_parameters->getWordSize()-1]);
			buffer << printableVersion[m_parameters->getWordSize()-1];
		}
		//fprintf("\n");
		buffer << endl;
		flushFileOperationBuffer_FILE(false, &buffer, kmerFile, CONFIG_FILE_IO_BUFFER_SIZE);
	}
	flushFileOperationBuffer_FILE(true, &buffer, kmerFile, CONFIG_FILE_IO_BUFFER_SIZE);
	fclose(kmerFile);

	#ifdef CONFIG_ASSERT
	if(n!=m_subgraph->size()){
		cout<<"n="<<n<<" size="<<m_subgraph->size()<<endl;
	}
	assert(n==m_subgraph->size());
	#endif
	m_waiting=false;
	m_coverageIterator=m_distributionOfCoverage.begin();
}
示例#6
0
void CoverageGatherer::call_RAY_SLAVE_MODE_SEND_DISTRIBUTION(){

	if(m_distributionOfCoverage.size()==0){
		#ifdef CONFIG_ASSERT
		LargeCount n=0;
		#endif

		if(m_subgraph->size()==0){
			(*m_slaveMode)=RAY_SLAVE_MODE_DO_NOTHING;
			Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_COVERAGE_END,
				m_parameters->getRank());
			m_outbox->push_back(&aMessage);
			return;
		}
		GridTableIterator iterator;
		iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters);
		while(iterator.hasNext()){
			Vertex*node=iterator.next();
			Kmer key=*(iterator.getKey());
			CoverageDepth coverage=node->getCoverage(&key);
			m_distributionOfCoverage[coverage]++;

			#ifdef CONFIG_ASSERT
			n++;
			#endif
		}

		#ifdef CONFIG_ASSERT
		if(n!=m_subgraph->size()){
			cout<<"Expected (from iterator)="<<n<<" Actual (->size())="<<m_subgraph->size()<<endl;
		}
		assert(n==m_subgraph->size());
		#endif
		m_waiting=false;
		m_coverageIterator=m_distributionOfCoverage.begin();
	}else if(m_waiting){
		if((*m_inbox).size()>0&&(*m_inbox)[0]->getTag()==RAY_MPI_TAG_COVERAGE_DATA_REPLY){
			m_waiting=false;
		}
	}else{
		MessageUnit*messageContent=(MessageUnit*)m_outboxAllocator->allocate(MAXIMUM_MESSAGE_SIZE_IN_BYTES);
		int count=0;
		int maximumElements=MAXIMUM_MESSAGE_SIZE_IN_BYTES/sizeof(MessageUnit);
		while(count<maximumElements && m_coverageIterator!=m_distributionOfCoverage.end()){
			CoverageDepth coverage=m_coverageIterator->first;
			LargeCount numberOfVertices=m_coverageIterator->second;
			messageContent[count]=coverage;
			messageContent[count+1]=numberOfVertices;
			count+=2;
			m_coverageIterator++;
		}

		if(count!=0){
			Message aMessage(messageContent,count,MASTER_RANK,RAY_MPI_TAG_COVERAGE_DATA,
				m_parameters->getRank());

			m_outbox->push_back(&aMessage);
			m_waiting=true;
		}else{
			m_distributionOfCoverage.clear();
			(*m_slaveMode)=RAY_SLAVE_MODE_DO_NOTHING;
			Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_COVERAGE_END,
				m_parameters->getRank());
			m_outbox->push_back(&aMessage);
		}
	}
}
示例#7
0
void PhylogenyViewer::gatherKmerObservations(){

	/* set to true to use only assembled kmers */
	bool useOnlyAssembledKmer=false;

	GridTableIterator iterator;
	iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters);

	//* only fetch half of the iterated things because we just need one k-mer
	// for any pair of reverse-complement k-mers 
	
	int parity=0;

	map<CoverageDepth,LargeCount> frequencies;

	while(iterator.hasNext()){

		#ifdef ASSERT
		assert(parity==0 || parity==1);
		#endif

		Vertex*node=iterator.next();
		Kmer key=*(iterator.getKey());
		
		if(parity==0){
			parity=1;
		}else if(parity==1){
			parity=0;

			continue; // we only need data with parity=0
		}

		// check for assembly paths

		/* here, we just want to find a path with
		* a good progression */

		Direction*a=node->m_directions;
		bool nicelyAssembled=false;

		while(a!=NULL){
			int progression=a->getProgression();

			if(progression>= CONFIG_NICELY_ASSEMBLED_KMER_POSITION){
				nicelyAssembled=true;
			}

			a=a->getNext();
		}

		if(useOnlyAssembledKmer && !nicelyAssembled){
			continue; // the k-mer is not nicely assembled...
		}

		#ifdef ASSERT
		assert(nicelyAssembled || !useOnlyAssembledKmer);
		#endif

		// at this point, we have a nicely assembled k-mer
		
		int kmerCoverage=node->getCoverage(&key);

		VirtualKmerColorHandle color=node->getVirtualColor();
		set<PhysicalKmerColor>*physicalColors=m_colorSet->getPhysicalColors(color);

		vector<TaxonIdentifier> taxons;

		// get a list of taxons associated with this kmer
		for(set<PhysicalKmerColor>::iterator j=physicalColors->begin();
			j!=physicalColors->end();j++){

			PhysicalKmerColor physicalColor=*j;
	
			PhysicalKmerColor nameSpace=physicalColor/COLOR_NAMESPACE_MULTIPLIER;
		
			if(nameSpace==COLOR_NAMESPACE_PHYLOGENY){
				PhysicalKmerColor colorForPhylogeny=physicalColor % COLOR_NAMESPACE_MULTIPLIER;

				#ifdef ASSERT
				if(m_colorsForPhylogeny.count(colorForPhylogeny)==0){
					//cout<<"Error: color "<<colorForPhylogeny<<" should be in m_colorsForPhylogeny which contains "<<m_colorsForPhylogeny.size()<<endl;
				}
				#endif

				//assert(m_colorsForPhylogeny.count(colorForPhylogeny)>0);

				// this means that this genome is not in the taxonomy tree
				if(m_genomeToTaxon.count(colorForPhylogeny)==0){

					if(m_warnings.count(colorForPhylogeny)==0){
						cout<<"Warning, color "<<colorForPhylogeny<<" is not stored, "<<m_genomeToTaxon.size()<<" available. This means that you provided a genome sequence that is not classified in the taxonomy."<<endl;

						#ifdef VERBOSE
						for(map<GenomeIdentifier,TaxonIdentifier>::iterator i=m_genomeToTaxon.begin();i!=m_genomeToTaxon.end();i++){
							cout<<" "<<i->first<<"->"<<i->second;
						}
						cout<<endl;
						#endif
					}

					m_warnings.insert(colorForPhylogeny);

					continue;
				}

				#ifdef ASSERT
				assert(m_genomeToTaxon.count(colorForPhylogeny)>0);
				#endif

				TaxonIdentifier taxon=m_genomeToTaxon[colorForPhylogeny];

				taxons.push_back(taxon);
			}
		}

		classifySignal(&taxons,kmerCoverage,node,&key);

		int count=taxons.size();

		frequencies[count]++;
	}
	
/*
 *
 * TODO: move this in colored operation files
 *
	cout<<endl;
	cout<<"Taxon frequencies (only one DNA strand selected)"<<endl;
	cout<<"Count	Frequency"<<endl;
	for(map<CoverageDepth,LargeCount>::iterator i=frequencies.begin();i!=frequencies.end();i++){
		cout<<""<<i->first<<"	"<<i->second<<endl;
	}

	cout<<"Taxon observations"<<endl;
*/

	//showObservations(&cout);

	m_gatheredObservations=true;

	m_syncedTree=false;
	m_unknownSent=false;
	m_messageReceived=true;
	m_messageSent=false;

	m_countIterator=m_taxonObservations.begin();
}