void GraphPath::push_back(const Kmer*a){ #ifdef ASSERT assert(m_kmerLength!=0); #endif if(!canBeAdded(a)){ if(!m_errorRaised){ cout<<"Error: can not add "<<a->idToWord(m_kmerLength,false)<<endl; cout<<"last objects:"<<endl; int count=16; int iterator=size()-count; while(iterator<size()){ Kmer theObject; at(iterator,&theObject); cout<<" ["<<iterator<<"] ------> "<<theObject.idToWord(m_kmerLength,false)<<endl; iterator++; } m_errorRaised=true; } return; } #ifdef CONFIG_PATH_STORAGE_DEFAULT m_vertices.push_back(*a); #elif defined(CONFIG_PATH_STORAGE_BLOCK) writeObjectInBlock(a); #endif }
void BubbleTool::printStuff(Kmer root,vector<vector<Kmer> >*trees, map<Kmer,int>*coverages){ int m_wordSize=m_parameters->getWordSize(); cout<<"Trees="<<trees->size()<<endl; cout<<"root="<<root.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<endl; cout<<"digraph{"<<endl; map<Kmer,set<Kmer> > printedEdges; for(map<Kmer ,int>::iterator i=coverages->begin();i!=coverages->end();i++){ Kmer kmer=i->first; cout<<kmer.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" [label=\""<<kmer.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" "<<i->second<<"\"]"<<endl; } for(int j=0;j<(int)trees->size();j++){ for(int i=0;i<(int)trees->at(j).size();i+=2){ Kmer a=trees->at(j).at(i+0); #ifdef ASSERT assert(i+1<(int)trees->at(j).size()); #endif Kmer b=trees->at(j).at(i+1); if(printedEdges.count(a)>0 && printedEdges[a].count(b)>0){ continue; } cout<<a.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" -> "<<b.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<endl; printedEdges[a].insert(b); } } cout<<"}"<<endl; }
void GraphPath::writeObjectInBlock(const Kmer*a){ #ifdef ASSERT assert(m_kmerLength!=0); #endif #ifdef CHECK_BUG_142 string copyA="AGGAAGAACCTGCTGAGGAACAAGAAGGTCAACTGCCTGGACTGTAATACC"; string copyB=a->idToWord(m_kmerLength,false); if(copyA==copyB) cout<<"[GraphPath::writeObjectInBlock] returns "<<copyB<<endl; #endif if(m_size==0){ #ifdef ASSERT assert(m_blocks.size()==0); #endif addBlock(); string sequence=a->idToWord(m_kmerLength,false); for(int blockPosition=0;blockPosition<m_kmerLength;blockPosition++){ writeSymbolInBlock(blockPosition,sequence[blockPosition]); } }else{ #ifdef ASSERT assert(m_size>=1); assert(a!=NULL); assert(m_kmerLength!=0); #endif char lastSymbol=a->getLastSymbol(m_kmerLength,false); int usedSymbols=size()+m_kmerLength-1; #ifdef ASSERT assert(usedSymbols>=m_kmerLength); assert(m_blocks.size()>=1); #endif int allocatedSymbols=m_blocks.size()*getBlockSize(); #ifdef ASSERT assert(allocatedSymbols>=getBlockSize()); #endif if(usedSymbols+1>allocatedSymbols){ addBlock(); allocatedSymbols=m_blocks.size()*getBlockSize(); } #ifdef ASSERT assert(usedSymbols+1<=allocatedSymbols); assert(allocatedSymbols>=getBlockSize()); #endif int position=usedSymbols; #ifdef ASSERT assert(position<allocatedSymbols); #endif writeSymbolInBlock(position,lastSymbol); } m_size++; #ifdef ASSERT Kmer addedObject; at(size()-1,&addedObject); if((*a)!=addedObject){ cout<<"Error: expected: "<<a->idToWord(m_kmerLength,false)<<endl; cout<<"actual: "<<addedObject.idToWord(m_kmerLength,false)<<" at position "<<size()-1<<endl; cout<<"kmerLength: "<<m_kmerLength<<" blockSize: "<<getBlockSize()<<endl; int i=size()-1; int j=0; cout<<"dump:"<<endl; while(i-j>=0 && j<10){ Kmer theObject; at(i-j,&theObject); cout<<" ["<<i-j<<"] ------> "<<theObject.idToWord(m_kmerLength,false)<<endl; j++; } } assert((*a)==addedObject); #endif }
/* * do a depth first search with max depth of maxDepth; */ void DepthFirstSearchData::depthFirstSearch(Kmer root,Kmer a,int maxDepth, bool*edgesRequested,bool*vertexCoverageRequested,bool*vertexCoverageReceived, RingAllocator*outboxAllocator,int size,int theRank,StaticVector*outbox, int*receivedVertexCoverage,vector<Kmer>*receivedOutgoingEdges, int minimumCoverage,bool*edgesReceived,int wordSize,Parameters*parameters) { if(!m_doChoice_tips_dfs_initiated) { m_depthFirstSearchVisitedVertices.clear(); m_depthFirstSearchVisitedVertices_vector.clear(); // add an arc m_depthFirstSearchVisitedVertices_vector.push_back(root); m_depthFirstSearchVisitedVertices_vector.push_back(a); m_depthFirstSearchVisitedVertices_depths.clear(); while(m_depthFirstSearchVerticesToVisit.size()>0) { m_depthFirstSearchVerticesToVisit.pop(); } while(m_depthFirstSearchDepths.size()>0) { m_depthFirstSearchDepths.pop(); } m_maxDepthReached=false; m_depthFirstSearchVerticesToVisit.push(a); m_depthFirstSearchVisitedVertices.insert(a); m_depthFirstSearchDepths.push(0); m_depthFirstSearch_maxDepth=0; m_doChoice_tips_dfs_initiated=true; m_doChoice_tips_dfs_done=false; m_coverages.clear(); (*edgesRequested)=false; (*vertexCoverageRequested)=false; #ifdef SHOW_MINI_GRAPH cout<<"<MiniGraph>"<<endl; cout<<root->idToWord(wordSize,parameters->getColorSpaceMode())<<" -> "<<a->idToWord(wordSize,parameters->getColorSpaceMode())<<endl; #endif } if(m_depthFirstSearchVerticesToVisit.size()>0) { Kmer vertexToVisit=m_depthFirstSearchVerticesToVisit.top(); if(!(*vertexCoverageRequested)) { (*vertexCoverageRequested)=true; (*vertexCoverageReceived)=false; MessageUnit*message=(MessageUnit*)(*outboxAllocator).allocate(KMER_U64_ARRAY_SIZE*sizeof(MessageUnit)); int j=0; vertexToVisit.pack(message,&j); int dest=parameters->vertexRank(&vertexToVisit); Message aMessage(message,j,dest,RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE,theRank); (*outbox).push_back(&aMessage); } else if((*vertexCoverageReceived)) { if(!(*edgesRequested)) { m_coverages[vertexToVisit]=(*receivedVertexCoverage); m_depthFirstSearchVisitedVertices.insert(vertexToVisit); int theDepth=m_depthFirstSearchDepths.top(); if(theDepth> m_depthFirstSearch_maxDepth) { m_depthFirstSearch_maxDepth=theDepth; } // visit the vertex, and ask next edges. MessageUnit*message=(MessageUnit*)(*outboxAllocator).allocate(1*sizeof(MessageUnit)); int bufferPosition=0; vertexToVisit.pack(message,&bufferPosition); int destination=parameters->vertexRank(&vertexToVisit); Message aMessage(message,bufferPosition,destination,RAY_MPI_TAG_REQUEST_VERTEX_OUTGOING_EDGES,theRank); (*outbox).push_back(&aMessage); (*edgesRequested)=true; (*edgesReceived)=false; } else if((*edgesReceived)) { Kmer vertexToVisit=m_depthFirstSearchVerticesToVisit.top(); int theDepth=m_depthFirstSearchDepths.top(); #ifdef ASSERT assert(theDepth>=0); assert(theDepth<=maxDepth); #endif int newDepth=theDepth+1; m_depthFirstSearchVerticesToVisit.pop(); m_depthFirstSearchDepths.pop(); for(int i=0; i<(int)(*receivedOutgoingEdges).size(); i++) { Kmer nextVertex=(*receivedOutgoingEdges)[i]; if(m_depthFirstSearchVisitedVertices.count(nextVertex)>0) { continue; } if(newDepth>maxDepth) { m_maxDepthReached=true; continue; } if(m_depthFirstSearchVisitedVertices.size()<MAX_VERTICES_TO_VISIT) { // add an arc m_depthFirstSearchVisitedVertices_vector.push_back(vertexToVisit); m_depthFirstSearchVisitedVertices_vector.push_back(nextVertex); // add the depth for the vertex m_depthFirstSearchVisitedVertices_depths.push_back(newDepth); // stacks m_depthFirstSearchVerticesToVisit.push(nextVertex); m_depthFirstSearchDepths.push(newDepth); } #ifdef SHOW_MINI_GRAPH cout<<vertexToVisit->idToWord(wordSize,parameters->getColorSpaceMode())<<" -> "<<nextVertex->idToWord(wordSize,parameters->getColorSpaceMode())<<endl; #endif } (*edgesRequested)=false; (*vertexCoverageRequested)=false; } } } else { m_doChoice_tips_dfs_done=true; #ifdef SHOW_MINI_GRAPH cout<<"</MiniGraph>"<<endl; #endif } }
void DepthFirstSearchData::depthFirstSearchBidirectional(Kmer a,int maxDepth, bool*edgesRequested,bool*vertexCoverageRequested,bool*vertexCoverageReceived, RingAllocator*outboxAllocator,int size,int theRank,StaticVector*outbox, int*receivedVertexCoverage,SeedingData*seedingData, int minimumCoverage,bool*edgesReceived,Parameters*parameters) { #ifdef ASSERT int wordSize=parameters->getWordSize(); #endif if(!m_doChoice_tips_dfs_initiated) { m_outgoingEdges.clear(); m_ingoingEdges.clear(); m_depthFirstSearchVisitedVertices.clear(); m_depthFirstSearchVisitedVertices_vector.clear(); m_depthFirstSearchVisitedVertices_depths.clear(); while(m_depthFirstSearchVerticesToVisit.size()>0) { m_depthFirstSearchVerticesToVisit.pop(); } while(m_depthFirstSearchDepths.size()>0) { m_depthFirstSearchDepths.pop(); } m_maxDepthReached=false; m_depthFirstSearchVerticesToVisit.push(a); m_depthFirstSearchDepths.push(0); m_depthFirstSearch_maxDepth=0; m_doChoice_tips_dfs_initiated=true; m_doChoice_tips_dfs_done=false; m_coverages.clear(); (*edgesRequested)=false; (*vertexCoverageRequested)=false; } if(m_depthFirstSearchVerticesToVisit.size()>0) { Kmer vertexToVisit=m_depthFirstSearchVerticesToVisit.top(); if(!(*vertexCoverageRequested)) { if(m_depthFirstSearchVisitedVertices.count(vertexToVisit)>0) { m_depthFirstSearchVerticesToVisit.pop(); m_depthFirstSearchDepths.pop(); return; } (*vertexCoverageRequested)=true; (*vertexCoverageReceived)=false; MessageUnit*message=(MessageUnit*)(*outboxAllocator).allocate(KMER_U64_ARRAY_SIZE*sizeof(MessageUnit)); int bufferPosition=0; vertexToVisit.pack(message,&bufferPosition); int dest=parameters->vertexRank(&vertexToVisit); Message aMessage(message,bufferPosition,dest,RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE,theRank); (*outbox).push_back(&aMessage); } else if((*vertexCoverageReceived)) { if(!(*edgesRequested)) { m_coverages[vertexToVisit]=(*receivedVertexCoverage); #ifdef ASSERT if(m_depthFirstSearchVisitedVertices.count(vertexToVisit)>0) { cout<<"Already visited: "<<vertexToVisit.idToWord(wordSize,parameters->getColorSpaceMode())<<" root is "<<a.idToWord(wordSize,parameters->getColorSpaceMode())<<endl; } assert(m_depthFirstSearchVisitedVertices.count(vertexToVisit)==0); assert(*receivedVertexCoverage>0); #endif if((*receivedVertexCoverage)>0) { m_depthFirstSearchVisitedVertices.insert(vertexToVisit); } else { #ifdef ASSERT assert(false); #endif // don't visit it. m_depthFirstSearchVerticesToVisit.pop(); m_depthFirstSearchDepths.pop(); (*edgesRequested)=false; (*vertexCoverageRequested)=false; return; } int theDepth=m_depthFirstSearchDepths.top(); if(theDepth> m_depthFirstSearch_maxDepth) { m_depthFirstSearch_maxDepth=theDepth; } // visit the vertex, and ask next edges. MessageUnit*message=(MessageUnit*)(*outboxAllocator).allocate(1*sizeof(MessageUnit)); int bufferPosition=0; vertexToVisit.pack(message,&bufferPosition); int destination=parameters->vertexRank(&vertexToVisit); Message aMessage(message,bufferPosition,destination,RAY_MPI_TAG_REQUEST_VERTEX_EDGES,theRank); (*outbox).push_back(&aMessage); (*edgesRequested)=true; (*edgesReceived)=false; } else if((*edgesReceived)) { Kmer vertexToVisit=m_depthFirstSearchVerticesToVisit.top(); int theDepth=m_depthFirstSearchDepths.top(); #ifdef ASSERT assert(theDepth>=0); assert(theDepth<=maxDepth); #endif int newDepth=theDepth+1; m_depthFirstSearchVerticesToVisit.pop(); m_depthFirstSearchDepths.pop(); // the first 4 elements are padding // the 5th is the number of outgoing edges // following are the outgoing edges // following is the number of ingoing edges // following are the ingoing edges. vector<Kmer > outgoingEdges=seedingData->m_SEEDING_receivedOutgoingEdges; for(int i=0; i<(int)outgoingEdges.size(); i++) { Kmer nextVertex=outgoingEdges[i]; if(m_depthFirstSearchVisitedVertices.size()>=MAX_VERTICES_TO_VISIT) { continue; } if(m_depthFirstSearchVisitedVertices.count(nextVertex)>0) { continue; } if(newDepth>maxDepth) { m_maxDepthReached=true; continue; } m_depthFirstSearchVerticesToVisit.push(nextVertex); m_depthFirstSearchDepths.push(newDepth); m_depthFirstSearchVisitedVertices_vector.push_back(vertexToVisit); m_depthFirstSearchVisitedVertices_vector.push_back(nextVertex); m_depthFirstSearchVisitedVertices_depths.push_back(newDepth); } #ifdef ASSERT if(m_outgoingEdges.count(vertexToVisit)>0) { cout<<vertexToVisit.idToWord(wordSize,parameters->getColorSpaceMode())<<" is already in the data structure "<<m_outgoingEdges[vertexToVisit].size()<<" v. "<<outgoingEdges.size()<<endl; } assert(m_outgoingEdges.count(vertexToVisit)==0); #endif m_outgoingEdges[vertexToVisit]=outgoingEdges; vector<Kmer> ingoingEdges=seedingData->m_SEEDING_receivedIngoingEdges; for(int i=0; i<(int)ingoingEdges.size(); i++) { Kmer nextVertex=ingoingEdges[i]; if(m_depthFirstSearchVisitedVertices.size()>=MAX_VERTICES_TO_VISIT) { continue; } if(m_depthFirstSearchVisitedVertices.count(nextVertex)>0) { continue; } if(newDepth>maxDepth) { m_maxDepthReached=true; continue; } m_depthFirstSearchVerticesToVisit.push(nextVertex); m_depthFirstSearchDepths.push(newDepth); // reverse the order. m_depthFirstSearchVisitedVertices_vector.push_back(nextVertex); m_depthFirstSearchVisitedVertices_vector.push_back(vertexToVisit); m_depthFirstSearchVisitedVertices_depths.push_back(newDepth); } (*edgesRequested)=false; (*vertexCoverageRequested)=false; #ifdef ASSERT assert(m_ingoingEdges.count(vertexToVisit)==0); #endif m_ingoingEdges[vertexToVisit]=ingoingEdges; } } } else { m_doChoice_tips_dfs_done=true; #ifdef SHOW_MINI_GRAPH cout<<"</MiniGraph>"<<endl; #endif } }
bool BubbleTool::isGenuineBubble(Kmer root,vector<vector<Kmer > >*trees, map<Kmer ,int>*coverages,int repeatCoverage){ #ifdef NO_BUBBLES return false; #endif if((*coverages)[root]>= repeatCoverage){ return false; } int m_wordSize=m_parameters->getWordSize(); #ifdef ASSERT for(int i=0;i<(int)trees->size();i++){ for(int j=0;j<(int)trees->at(i).size();j+=2){ Kmer a=trees->at(i).at(j+0); Kmer b=trees->at(i).at(j+1); string as=a.idToWord(m_wordSize,m_parameters->getColorSpaceMode()); string bs=b.idToWord(m_wordSize,m_parameters->getColorSpaceMode()); assert(as.substr(1,m_wordSize-1)==bs.substr(0,m_wordSize-1)); } } #endif if(m_parameters->debugBubbles()){ printStuff(root,trees,coverages); } if(trees->size()!=2){ return false;// we don'T support that right now ! triploid stuff are awesome. } // given the word size // check that they join. // // substitution SNP is d=0 // del is 1, 2, or 3 map<Kmer ,int> coveringNumber; Kmer target; bool foundTarget=false; for(int j=0;j<(int)trees->size();j++){ for(int i=0;i<(int)trees->at(j).size();i+=2){ Kmer a=trees->at(j).at(i+1); #ifdef ASSERT if(coverages->count(a)==0){ cout<<a.idToWord(m_parameters->getWordSize(),m_parameters->getColorSpaceMode())<<" has no coverage."<<endl; } assert(coverages->count(a)>0); #endif coveringNumber[a]++; if(!foundTarget && coveringNumber[a]==2){ foundTarget=true; target=a; break; } } } if(!foundTarget){ if(m_parameters->debugBubbles()){ cout<<"Target not found."<<endl; } return false; } if((*coverages)[target]>= repeatCoverage){ return false; } #ifdef ASSERT assert(coverages->count(root)>0); assert(coverages->count(target)>0); #endif #ifdef ASSERT int rootCoverage=(*coverages)[root]; int targetCoverage=(*coverages)[target]; assert(rootCoverage>0); assert(targetCoverage>0); #endif vector<map<Kmer ,Kmer > > parents; for(int j=0;j<(int)trees->size();j++){ map<Kmer ,Kmer > aVector; parents.push_back(aVector); for(int i=0;i<(int)trees->at(j).size();i+=2){ Kmer a=trees->at(j).at(i+0); Kmer b=trees->at(j).at(i+1); parents[j][b]=a; } } vector<vector<int> > observedValues; /* * * BUBBLE is below * * * ---- * -------* --------* * \ / * ---- * --------* ------ * * */ // accumulate observed values // and stop when encountering for(int j=0;j<(int)trees->size();j++){ vector<int> aVector; observedValues.push_back(aVector); set<Kmer > visited; Kmer startingPoint=trees->at(j).at(0); Kmer current=target; while(current!=startingPoint){ if(visited.count(current)>0){ return false; } visited.insert(current); Kmer theParent=parents[j][current]; int coverageValue=(*coverages)[theParent]; observedValues[j].push_back(coverageValue); current=theParent; } } if(m_parameters->debugBubbles()){ cout<<"O1="<<observedValues[0].size()<<" O2="<<observedValues[1].size()<<endl; } int sum1=0; for(int i=0;i<(int)observedValues[0].size();i++){ sum1+=observedValues[0][i]; } if(m_parameters->debugBubbles()){ cout<<"O1Values= "; for(int i=0;i<(int)observedValues[0].size();i++){ cout<<observedValues[0][i]<<" "; } cout<<endl; } int sum2=0; for(int i=0;i<(int)observedValues[1].size();i++){ sum2+=observedValues[1][i]; } if(m_parameters->debugBubbles()){ cout<<"O2Values= "; for(int i=0;i<(int)observedValues[1].size();i++){ cout<<observedValues[1][i]<<" "; } cout<<endl; } if((int)observedValues[0].size()<2*m_parameters->getWordSize() && (int)observedValues[1].size()<2*m_parameters->getWordSize()){ if(sum1>sum2){ m_choice=trees->at(0).at(1); }else if(sum2>sum1){ m_choice=trees->at(1).at(1); // this will not happen often }else if(sum1==sum2){ // take the shortest, if any if(observedValues[0].size()<observedValues[1].size()){ m_choice=trees->at(0).at(1); }else if(observedValues[1].size()<observedValues[0].size()){ m_choice=trees->at(1).at(1); // same length and same sum, won't happen very often anyway }else{ m_choice=trees->at(0).at(1); } } if(m_parameters->debugBubbles()){ cout<<"This is a genuine bubble"<<endl; cout<<"root="<<root.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" target="<<target.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<endl; } return true; } if(m_parameters->debugBubbles()){ cout<<"False at last"<<endl; } return false; }