void SpuriousSeedAnnihilator::call_RAY_SLAVE_MODE_CLEAN_SEEDS(){ if((!m_debugCode && m_hasCheckpointFilesForSeeds) || m_skip){ m_core->getSwitchMan()->closeSlaveModeLocally(m_core->getOutbox(),m_core->getRank()); return; } // Trace was here -> PASS #ifdef ASSERT assert(m_parameters != NULL); assert(m_subgraph != NULL); #endif // clear graph GridTableIterator iterator; iterator.constructor(m_subgraph, m_parameters->getWordSize(), m_parameters); #ifdef ASSERT LargeCount cleared=0; #endif // Trace was here -> PASS while(iterator.hasNext()){ iterator.next(); Kmer key=*(iterator.getKey()); m_subgraph->clearDirections(&key); #ifdef ASSERT cleared++; Vertex*node=m_subgraph->find(&key); assert(node->getFirstDirection() == NULL); #endif } // Trace was here -> PASS #ifdef ASSERT assert(cleared == m_subgraph->size()); #endif // Trace was here -> PASS int bytes=m_directionsAllocator->getChunkSize() * m_directionsAllocator->getNumberOfChunks(); m_directionsAllocator->clear(); cout<<"Rank "<<m_rank<<" freed "<<bytes/1024<<" KiB from the path memory pool"<<endl; // Trace was here -> <s>FAIL</s> PASS /* * Tell another rank that we are done with this. */ m_core->getSwitchMan()->closeSlaveModeLocally(m_core->getOutbox(),m_core->getRank()); }
/** * here we extract the phylogeny colors */ void PhylogenyViewer::extractColorsForPhylogeny(){ GridTableIterator iterator; iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters); //* only fetch half of the iterated things because we just need one k-mer // for any pair of reverse-complement k-mers int parity=0; while(iterator.hasNext()){ Vertex*node=iterator.next(); Kmer key=*(iterator.getKey()); #ifdef ASSERT assert(parity==0 || parity==1); #endif if(parity==0){ parity=1; }else if(parity==1){ parity=0; continue; // we only need data with parity=0 } VirtualKmerColorHandle color=node->getVirtualColor(); set<PhysicalKmerColor>*physicalColors=m_colorSet->getPhysicalColors(color); for(set<PhysicalKmerColor>::iterator j=physicalColors->begin(); j!=physicalColors->end();j++){ PhysicalKmerColor physicalColor=*j; PhysicalKmerColor nameSpace=physicalColor/COLOR_NAMESPACE_MULTIPLIER; if(nameSpace==COLOR_NAMESPACE_PHYLOGENY){ PhysicalKmerColor colorForPhylogeny=physicalColor % COLOR_NAMESPACE_MULTIPLIER; m_colorsForPhylogeny.insert(colorForPhylogeny); #ifdef DEBUG_PHYLOGENY cout<<"[phylogeny] colorForPhylogeny= "<<colorForPhylogeny<<endl; #endif } } } cout<<"Rank "<<m_rank<<" has exactly "<<m_colorsForPhylogeny.size()<<" k-mer physical colors for the phylogeny."<<endl; cout<<endl; m_extractedColorsForPhylogeny=true; m_loadedTaxonsForPhylogeny=false; m_totalNumberOfKmerObservations=m_searcher->getTotalNumberOfKmerObservations(); }
void GeneOntology::fetchRelevantColors(){ GridTableIterator iterator; iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters); //* only fetch half of the iterated things because we just need one k-mer // for any pair of reverse-complement k-mers int parity=0; while(iterator.hasNext()){ Vertex*node=iterator.next(); Kmer key=*(iterator.getKey()); #ifdef ASSERT assert(parity==0 || parity==1); #endif if(parity==0){ parity=1; }else if(parity==1){ parity=0; continue; // we only need data with parity=0 } VirtualKmerColorHandle color=node->getVirtualColor(); set<PhysicalKmerColor>*physicalColors=m_colorSet->getPhysicalColors(color); for(set<PhysicalKmerColor>::iterator j=physicalColors->begin(); j!=physicalColors->end();j++){ PhysicalKmerColor physicalColor=*j; PhysicalKmerColor nameSpace=physicalColor/COLOR_NAMESPACE_MULTIPLIER; if(nameSpace==COLOR_NAMESPACE_EMBL_CDS){ PhysicalKmerColor colorForPhylogeny=physicalColor % COLOR_NAMESPACE_MULTIPLIER; m_colorsForOntology.insert(colorForPhylogeny); } } } cout<<"Rank "<<m_rank<<" has exactly "<<m_colorsForOntology.size()<<" k-mer physical colors related to EMBL CDS objects."<<endl; cout<<endl; m_listedRelevantColors=true; m_loadedAnnotations=false; }
void GeneOntology::countOntologyTermsInGraph(){ m_kmerObservationsWithGeneOntologies=0; cout<<"Rank "<<m_rank<<": counting ontology terms in the graph..."<<endl; #ifdef ASSERT assert(m_ontologyTermFrequencies.size()==0); #endif GridTableIterator iterator; iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters); //* only fetch half of the iterated things because we just need one k-mer // for any pair of reverse-complement k-mers int parity=0; while(iterator.hasNext()){ Vertex*node=iterator.next(); Kmer key=*(iterator.getKey()); #ifdef ASSERT assert(parity==0 || parity==1); #endif if(parity==0){ parity=1; }else if(parity==1){ parity=0; continue; // we only need data with parity=0 } VirtualKmerColorHandle color=node->getVirtualColor(); set<PhysicalKmerColor>*physicalColors=m_colorSet->getPhysicalColors(color); int kmerCoverage=node->getCoverage(&key); // this is the set of gene ontology terms that // the current k-mer contributes to set<GeneOntologyIdentifier> ontologyTerms; for(set<PhysicalKmerColor>::iterator j=physicalColors->begin(); j!=physicalColors->end();j++){ PhysicalKmerColor physicalColor=*j; PhysicalKmerColor nameSpace=physicalColor/COLOR_NAMESPACE_MULTIPLIER; if(nameSpace==COLOR_NAMESPACE_EMBL_CDS){ PhysicalKmerColor colorForPhylogeny=physicalColor % COLOR_NAMESPACE_MULTIPLIER; /* the color is in the graph, but no annotations exist... */ if(m_annotations.count(colorForPhylogeny)==0){ continue; } vector<GeneOntologyIdentifier>*terms=NULL; terms=&(m_annotations[colorForPhylogeny]); int numberOfTerms=terms->size(); for(int i=0;i<numberOfTerms;i++){ GeneOntologyIdentifier term=terms->at(i); ontologyTerms.insert(term); } } } // here, we have a list of gene ontology terms // update each of them. int quantity=1; for(set<GeneOntologyIdentifier>::iterator i=ontologyTerms.begin();i!=ontologyTerms.end();i++){ GeneOntologyIdentifier term=*i; GeneOntologyIdentifier realTerm=dereferenceTerm(term); #ifdef BUG_DETERMINISM if(term==49){ cout<<"[BUG_DETERMINISM] viaCounter: incrementOntologyTermFrequency "<<term<<" "<<kmerCoverage<<" "<<quantity<<endl; } #endif incrementOntologyTermFrequency(realTerm,kmerCoverage, quantity); } // update the total if(!ontologyTerms.empty()){ m_kmerObservationsWithGeneOntologies+=kmerCoverage; } } m_ontologyTermFrequencies_iterator1=m_ontologyTermFrequencies.begin(); if(m_ontologyTermFrequencies_iterator1!=m_ontologyTermFrequencies.end()){ m_ontologyTermFrequencies_iterator2=m_ontologyTermFrequencies_iterator1->second.begin(); } m_countOntologyTermsInGraph=true; cout<<"Rank "<<m_rank<<": "<<m_ontologyTermFrequencies.size(); cout<<" have some biological signal"<<endl; cout<<"Number of dereferenced alternate handles: "<<m_dereferences<<endl; cout<<"Number of k-mer observations with gene ontology terms: "; cout<<m_kmerObservationsWithGeneOntologies<<endl; }
void CoverageGatherer::writeKmers(){ #ifdef CONFIG_ASSERT LargeCount n=0; #endif if(m_subgraph->size()==0){ (*m_slaveMode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_COVERAGE_END, m_parameters->getRank()); m_outbox->push_back(&aMessage); return; } GridTableIterator iterator; iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters); FILE* kmerFile=NULL; ostringstream buffer; ostringstream name; name<<m_parameters->getPrefix()<<"/kmers.txt"; if(m_parameters->getRank()==0) kmerFile=fopen(name.str().c_str(),"w"); // create empty file else kmerFile=fopen(name.str().c_str(),"a"); // append to file if(m_parameters->getRank()==MASTER_RANK) writeHeader(kmerFile); while(iterator.hasNext()){ Vertex*node=iterator.next(); Kmer key=*(iterator.getKey()); CoverageDepth coverage=node->getCoverage(&key); m_distributionOfCoverage[coverage]++; #ifdef CONFIG_ASSERT n++; #endif string kmerSequence=key.idToWord(m_parameters->getWordSize(),m_parameters->getColorSpaceMode()); vector<Kmer> parents=node->getIngoingEdges(&key,m_parameters->getWordSize()); vector<Kmer> children=node->getOutgoingEdges(&key,m_parameters->getWordSize()); //fprintf(kmerFile,"%s;%i;",kmerSequence.c_str(),coverage); buffer << kmerSequence << ";" << coverage << ";"; for(int i=0;i<(int)parents.size();i++){ string printableVersion=parents[i].idToWord(m_parameters->getWordSize(),m_parameters->getColorSpaceMode()); if(i!=0) buffer << " "; //fprintf(kmerFile," "); //fprintf(kmerFile,"%c",printableVersion[0]); buffer << printableVersion[0]; } //fprintf(kmerFile,";"); buffer << ";"; for(int i=0;i<(int)children.size();i++){ string printableVersion=children[i].idToWord(m_parameters->getWordSize(),m_parameters->getColorSpaceMode()); if(i!=0) buffer << " "; //fprintf(kmerFile," "); //fprintf(kmerFile,"%c",printableVersion[m_parameters->getWordSize()-1]); buffer << printableVersion[m_parameters->getWordSize()-1]; } //fprintf("\n"); buffer << endl; flushFileOperationBuffer_FILE(false, &buffer, kmerFile, CONFIG_FILE_IO_BUFFER_SIZE); } flushFileOperationBuffer_FILE(true, &buffer, kmerFile, CONFIG_FILE_IO_BUFFER_SIZE); fclose(kmerFile); #ifdef CONFIG_ASSERT if(n!=m_subgraph->size()){ cout<<"n="<<n<<" size="<<m_subgraph->size()<<endl; } assert(n==m_subgraph->size()); #endif m_waiting=false; m_coverageIterator=m_distributionOfCoverage.begin(); }
void CoverageGatherer::call_RAY_SLAVE_MODE_SEND_DISTRIBUTION(){ if(m_distributionOfCoverage.size()==0){ #ifdef CONFIG_ASSERT LargeCount n=0; #endif if(m_subgraph->size()==0){ (*m_slaveMode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_COVERAGE_END, m_parameters->getRank()); m_outbox->push_back(&aMessage); return; } GridTableIterator iterator; iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters); while(iterator.hasNext()){ Vertex*node=iterator.next(); Kmer key=*(iterator.getKey()); CoverageDepth coverage=node->getCoverage(&key); m_distributionOfCoverage[coverage]++; #ifdef CONFIG_ASSERT n++; #endif } #ifdef CONFIG_ASSERT if(n!=m_subgraph->size()){ cout<<"Expected (from iterator)="<<n<<" Actual (->size())="<<m_subgraph->size()<<endl; } assert(n==m_subgraph->size()); #endif m_waiting=false; m_coverageIterator=m_distributionOfCoverage.begin(); }else if(m_waiting){ if((*m_inbox).size()>0&&(*m_inbox)[0]->getTag()==RAY_MPI_TAG_COVERAGE_DATA_REPLY){ m_waiting=false; } }else{ MessageUnit*messageContent=(MessageUnit*)m_outboxAllocator->allocate(MAXIMUM_MESSAGE_SIZE_IN_BYTES); int count=0; int maximumElements=MAXIMUM_MESSAGE_SIZE_IN_BYTES/sizeof(MessageUnit); while(count<maximumElements && m_coverageIterator!=m_distributionOfCoverage.end()){ CoverageDepth coverage=m_coverageIterator->first; LargeCount numberOfVertices=m_coverageIterator->second; messageContent[count]=coverage; messageContent[count+1]=numberOfVertices; count+=2; m_coverageIterator++; } if(count!=0){ Message aMessage(messageContent,count,MASTER_RANK,RAY_MPI_TAG_COVERAGE_DATA, m_parameters->getRank()); m_outbox->push_back(&aMessage); m_waiting=true; }else{ m_distributionOfCoverage.clear(); (*m_slaveMode)=RAY_SLAVE_MODE_DO_NOTHING; Message aMessage(NULL,0,MASTER_RANK,RAY_MPI_TAG_COVERAGE_END, m_parameters->getRank()); m_outbox->push_back(&aMessage); } } }
void PhylogenyViewer::gatherKmerObservations(){ /* set to true to use only assembled kmers */ bool useOnlyAssembledKmer=false; GridTableIterator iterator; iterator.constructor(m_subgraph,m_parameters->getWordSize(),m_parameters); //* only fetch half of the iterated things because we just need one k-mer // for any pair of reverse-complement k-mers int parity=0; map<CoverageDepth,LargeCount> frequencies; while(iterator.hasNext()){ #ifdef ASSERT assert(parity==0 || parity==1); #endif Vertex*node=iterator.next(); Kmer key=*(iterator.getKey()); if(parity==0){ parity=1; }else if(parity==1){ parity=0; continue; // we only need data with parity=0 } // check for assembly paths /* here, we just want to find a path with * a good progression */ Direction*a=node->m_directions; bool nicelyAssembled=false; while(a!=NULL){ int progression=a->getProgression(); if(progression>= CONFIG_NICELY_ASSEMBLED_KMER_POSITION){ nicelyAssembled=true; } a=a->getNext(); } if(useOnlyAssembledKmer && !nicelyAssembled){ continue; // the k-mer is not nicely assembled... } #ifdef ASSERT assert(nicelyAssembled || !useOnlyAssembledKmer); #endif // at this point, we have a nicely assembled k-mer int kmerCoverage=node->getCoverage(&key); VirtualKmerColorHandle color=node->getVirtualColor(); set<PhysicalKmerColor>*physicalColors=m_colorSet->getPhysicalColors(color); vector<TaxonIdentifier> taxons; // get a list of taxons associated with this kmer for(set<PhysicalKmerColor>::iterator j=physicalColors->begin(); j!=physicalColors->end();j++){ PhysicalKmerColor physicalColor=*j; PhysicalKmerColor nameSpace=physicalColor/COLOR_NAMESPACE_MULTIPLIER; if(nameSpace==COLOR_NAMESPACE_PHYLOGENY){ PhysicalKmerColor colorForPhylogeny=physicalColor % COLOR_NAMESPACE_MULTIPLIER; #ifdef ASSERT if(m_colorsForPhylogeny.count(colorForPhylogeny)==0){ //cout<<"Error: color "<<colorForPhylogeny<<" should be in m_colorsForPhylogeny which contains "<<m_colorsForPhylogeny.size()<<endl; } #endif //assert(m_colorsForPhylogeny.count(colorForPhylogeny)>0); // this means that this genome is not in the taxonomy tree if(m_genomeToTaxon.count(colorForPhylogeny)==0){ if(m_warnings.count(colorForPhylogeny)==0){ cout<<"Warning, color "<<colorForPhylogeny<<" is not stored, "<<m_genomeToTaxon.size()<<" available. This means that you provided a genome sequence that is not classified in the taxonomy."<<endl; #ifdef VERBOSE for(map<GenomeIdentifier,TaxonIdentifier>::iterator i=m_genomeToTaxon.begin();i!=m_genomeToTaxon.end();i++){ cout<<" "<<i->first<<"->"<<i->second; } cout<<endl; #endif } m_warnings.insert(colorForPhylogeny); continue; } #ifdef ASSERT assert(m_genomeToTaxon.count(colorForPhylogeny)>0); #endif TaxonIdentifier taxon=m_genomeToTaxon[colorForPhylogeny]; taxons.push_back(taxon); } } classifySignal(&taxons,kmerCoverage,node,&key); int count=taxons.size(); frequencies[count]++; } /* * * TODO: move this in colored operation files * cout<<endl; cout<<"Taxon frequencies (only one DNA strand selected)"<<endl; cout<<"Count Frequency"<<endl; for(map<CoverageDepth,LargeCount>::iterator i=frequencies.begin();i!=frequencies.end();i++){ cout<<""<<i->first<<" "<<i->second<<endl; } cout<<"Taxon observations"<<endl; */ //showObservations(&cout); m_gatheredObservations=true; m_syncedTree=false; m_unknownSent=false; m_messageReceived=true; m_messageSent=false; m_countIterator=m_taxonObservations.begin(); }