Example #1
0
void GraphPath::push_back(const Kmer*a){

#ifdef ASSERT
	assert(m_kmerLength!=0);
#endif

	if(!canBeAdded(a)){
		if(!m_errorRaised){
			cout<<"Error: can not add "<<a->idToWord(m_kmerLength,false)<<endl;
			cout<<"last objects:"<<endl;
			int count=16;
			int iterator=size()-count;
			while(iterator<size()){
				Kmer theObject;
				at(iterator,&theObject);

				cout<<" ["<<iterator<<"] ------> "<<theObject.idToWord(m_kmerLength,false)<<endl;

				iterator++;
			}

			m_errorRaised=true;
		}

		return;
	}

#ifdef CONFIG_PATH_STORAGE_DEFAULT
	m_vertices.push_back(*a);
#elif defined(CONFIG_PATH_STORAGE_BLOCK)

	writeObjectInBlock(a);
#endif
}
Example #2
0
void BubbleTool::printStuff(Kmer root,vector<vector<Kmer> >*trees,
map<Kmer,int>*coverages){
	int m_wordSize=m_parameters->getWordSize();
	cout<<"Trees="<<trees->size()<<endl;
	cout<<"root="<<root.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<endl;
	cout<<"digraph{"<<endl;
	map<Kmer,set<Kmer> > printedEdges;
	
	for(map<Kmer ,int>::iterator i=coverages->begin();i!=coverages->end();i++){
		Kmer kmer=i->first;
		cout<<kmer.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" [label=\""<<kmer.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" "<<i->second<<"\"]"<<endl;
	}
	for(int j=0;j<(int)trees->size();j++){
		for(int i=0;i<(int)trees->at(j).size();i+=2){
			Kmer a=trees->at(j).at(i+0);
			#ifdef ASSERT
			assert(i+1<(int)trees->at(j).size());
			#endif
			Kmer b=trees->at(j).at(i+1);
			if(printedEdges.count(a)>0 && printedEdges[a].count(b)>0){
				continue;
			}
			cout<<a.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" -> "<<b.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<endl;
			printedEdges[a].insert(b);
		}
	}
	cout<<"}"<<endl;
}
Example #3
0
void GraphPath::writeObjectInBlock(const Kmer*a){

	#ifdef ASSERT
	assert(m_kmerLength!=0);
	#endif

#ifdef CHECK_BUG_142
	string copyA="AGGAAGAACCTGCTGAGGAACAAGAAGGTCAACTGCCTGGACTGTAATACC";
	string copyB=a->idToWord(m_kmerLength,false);
	if(copyA==copyB)
		cout<<"[GraphPath::writeObjectInBlock] returns "<<copyB<<endl;
#endif

	if(m_size==0){
		#ifdef ASSERT
		assert(m_blocks.size()==0);
		#endif

		addBlock();
		string sequence=a->idToWord(m_kmerLength,false);

		for(int blockPosition=0;blockPosition<m_kmerLength;blockPosition++){
			writeSymbolInBlock(blockPosition,sequence[blockPosition]);
		}
	}else{
		#ifdef ASSERT
		assert(m_size>=1);
		assert(a!=NULL);
		assert(m_kmerLength!=0);
		#endif

		char lastSymbol=a->getLastSymbol(m_kmerLength,false);
		int usedSymbols=size()+m_kmerLength-1;

		#ifdef ASSERT
		assert(usedSymbols>=m_kmerLength);
		assert(m_blocks.size()>=1);
		#endif

		int allocatedSymbols=m_blocks.size()*getBlockSize();

		#ifdef ASSERT
		assert(allocatedSymbols>=getBlockSize());
		#endif

		if(usedSymbols+1>allocatedSymbols){
			addBlock();
			allocatedSymbols=m_blocks.size()*getBlockSize();
		}

		#ifdef ASSERT
		assert(usedSymbols+1<=allocatedSymbols);
		assert(allocatedSymbols>=getBlockSize());
		#endif

		int position=usedSymbols;

		#ifdef ASSERT
		assert(position<allocatedSymbols);
		#endif

		writeSymbolInBlock(position,lastSymbol);
	}

	m_size++;

#ifdef ASSERT
	Kmer addedObject;
	at(size()-1,&addedObject);

	if((*a)!=addedObject){
		cout<<"Error: expected: "<<a->idToWord(m_kmerLength,false)<<endl;
		cout<<"actual: "<<addedObject.idToWord(m_kmerLength,false)<<" at position "<<size()-1<<endl;
		cout<<"kmerLength: "<<m_kmerLength<<" blockSize: "<<getBlockSize()<<endl;
		int i=size()-1;
		int j=0;
		cout<<"dump:"<<endl;
		while(i-j>=0 && j<10){
			Kmer theObject;
			at(i-j,&theObject);

			cout<<" ["<<i-j<<"] ------> "<<theObject.idToWord(m_kmerLength,false)<<endl;

			j++;
		}
	}

	assert((*a)==addedObject);
#endif
}
Example #4
0
/*
 * do a depth first search with max depth of maxDepth;
 */
void DepthFirstSearchData::depthFirstSearch(Kmer root,Kmer a,int maxDepth,
        bool*edgesRequested,bool*vertexCoverageRequested,bool*vertexCoverageReceived,
        RingAllocator*outboxAllocator,int size,int theRank,StaticVector*outbox,
        int*receivedVertexCoverage,vector<Kmer>*receivedOutgoingEdges,
        int minimumCoverage,bool*edgesReceived,int wordSize,Parameters*parameters) {
    if(!m_doChoice_tips_dfs_initiated) {
        m_depthFirstSearchVisitedVertices.clear();
        m_depthFirstSearchVisitedVertices_vector.clear();

        // add an arc
        m_depthFirstSearchVisitedVertices_vector.push_back(root);
        m_depthFirstSearchVisitedVertices_vector.push_back(a);

        m_depthFirstSearchVisitedVertices_depths.clear();
        while(m_depthFirstSearchVerticesToVisit.size()>0) {
            m_depthFirstSearchVerticesToVisit.pop();
        }
        while(m_depthFirstSearchDepths.size()>0) {
            m_depthFirstSearchDepths.pop();
        }
        m_maxDepthReached=false;
        m_depthFirstSearchVerticesToVisit.push(a);
        m_depthFirstSearchVisitedVertices.insert(a);
        m_depthFirstSearchDepths.push(0);
        m_depthFirstSearch_maxDepth=0;
        m_doChoice_tips_dfs_initiated=true;
        m_doChoice_tips_dfs_done=false;
        m_coverages.clear();
        (*edgesRequested)=false;
        (*vertexCoverageRequested)=false;
#ifdef SHOW_MINI_GRAPH
        cout<<"<MiniGraph>"<<endl;
        cout<<root->idToWord(wordSize,parameters->getColorSpaceMode())<<" -> "<<a->idToWord(wordSize,parameters->getColorSpaceMode())<<endl;
#endif
    }
    if(m_depthFirstSearchVerticesToVisit.size()>0) {
        Kmer vertexToVisit=m_depthFirstSearchVerticesToVisit.top();
        if(!(*vertexCoverageRequested)) {
            (*vertexCoverageRequested)=true;
            (*vertexCoverageReceived)=false;

            MessageUnit*message=(MessageUnit*)(*outboxAllocator).allocate(KMER_U64_ARRAY_SIZE*sizeof(MessageUnit));
            int j=0;
            vertexToVisit.pack(message,&j);
            int dest=parameters->vertexRank(&vertexToVisit);

            Message aMessage(message,j,dest,RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE,theRank);
            (*outbox).push_back(&aMessage);
        } else if((*vertexCoverageReceived)) {
            if(!(*edgesRequested)) {
                m_coverages[vertexToVisit]=(*receivedVertexCoverage);
                m_depthFirstSearchVisitedVertices.insert(vertexToVisit);
                int theDepth=m_depthFirstSearchDepths.top();

                if(theDepth> m_depthFirstSearch_maxDepth) {
                    m_depthFirstSearch_maxDepth=theDepth;
                }

                // visit the vertex, and ask next edges.
                MessageUnit*message=(MessageUnit*)(*outboxAllocator).allocate(1*sizeof(MessageUnit));
                int bufferPosition=0;
                vertexToVisit.pack(message,&bufferPosition);
                int destination=parameters->vertexRank(&vertexToVisit);
                Message aMessage(message,bufferPosition,destination,RAY_MPI_TAG_REQUEST_VERTEX_OUTGOING_EDGES,theRank);
                (*outbox).push_back(&aMessage);
                (*edgesRequested)=true;
                (*edgesReceived)=false;
            } else if((*edgesReceived)) {
                Kmer vertexToVisit=m_depthFirstSearchVerticesToVisit.top();
                int theDepth=m_depthFirstSearchDepths.top();
#ifdef ASSERT
                assert(theDepth>=0);
                assert(theDepth<=maxDepth);
#endif
                int newDepth=theDepth+1;

                m_depthFirstSearchVerticesToVisit.pop();
                m_depthFirstSearchDepths.pop();

                for(int i=0; i<(int)(*receivedOutgoingEdges).size(); i++) {
                    Kmer nextVertex=(*receivedOutgoingEdges)[i];
                    if(m_depthFirstSearchVisitedVertices.count(nextVertex)>0) {
                        continue;
                    }
                    if(newDepth>maxDepth) {
                        m_maxDepthReached=true;
                        continue;
                    }

                    if(m_depthFirstSearchVisitedVertices.size()<MAX_VERTICES_TO_VISIT) {
                        // add an arc
                        m_depthFirstSearchVisitedVertices_vector.push_back(vertexToVisit);
                        m_depthFirstSearchVisitedVertices_vector.push_back(nextVertex);

                        // add the depth for the vertex
                        m_depthFirstSearchVisitedVertices_depths.push_back(newDepth);

                        // stacks
                        m_depthFirstSearchVerticesToVisit.push(nextVertex);
                        m_depthFirstSearchDepths.push(newDepth);
                    }


#ifdef SHOW_MINI_GRAPH
                    cout<<vertexToVisit->idToWord(wordSize,parameters->getColorSpaceMode())<<" -> "<<nextVertex->idToWord(wordSize,parameters->getColorSpaceMode())<<endl;
#endif
                }
                (*edgesRequested)=false;
                (*vertexCoverageRequested)=false;
            }
        }
    } else {
        m_doChoice_tips_dfs_done=true;
#ifdef SHOW_MINI_GRAPH
        cout<<"</MiniGraph>"<<endl;
#endif
    }
}
Example #5
0
void DepthFirstSearchData::depthFirstSearchBidirectional(Kmer a,int maxDepth,
        bool*edgesRequested,bool*vertexCoverageRequested,bool*vertexCoverageReceived,
        RingAllocator*outboxAllocator,int size,int theRank,StaticVector*outbox,
        int*receivedVertexCoverage,SeedingData*seedingData,
        int minimumCoverage,bool*edgesReceived,Parameters*parameters) {

#ifdef ASSERT
    int wordSize=parameters->getWordSize();
#endif

    if(!m_doChoice_tips_dfs_initiated) {
        m_outgoingEdges.clear();
        m_ingoingEdges.clear();

        m_depthFirstSearchVisitedVertices.clear();
        m_depthFirstSearchVisitedVertices_vector.clear();
        m_depthFirstSearchVisitedVertices_depths.clear();
        while(m_depthFirstSearchVerticesToVisit.size()>0) {
            m_depthFirstSearchVerticesToVisit.pop();
        }
        while(m_depthFirstSearchDepths.size()>0) {
            m_depthFirstSearchDepths.pop();
        }
        m_maxDepthReached=false;
        m_depthFirstSearchVerticesToVisit.push(a);
        m_depthFirstSearchDepths.push(0);
        m_depthFirstSearch_maxDepth=0;
        m_doChoice_tips_dfs_initiated=true;
        m_doChoice_tips_dfs_done=false;
        m_coverages.clear();
        (*edgesRequested)=false;
        (*vertexCoverageRequested)=false;
    }
    if(m_depthFirstSearchVerticesToVisit.size()>0) {
        Kmer vertexToVisit=m_depthFirstSearchVerticesToVisit.top();

        if(!(*vertexCoverageRequested)) {

            if(m_depthFirstSearchVisitedVertices.count(vertexToVisit)>0) {
                m_depthFirstSearchVerticesToVisit.pop();
                m_depthFirstSearchDepths.pop();
                return;
            }

            (*vertexCoverageRequested)=true;
            (*vertexCoverageReceived)=false;

            MessageUnit*message=(MessageUnit*)(*outboxAllocator).allocate(KMER_U64_ARRAY_SIZE*sizeof(MessageUnit));
            int bufferPosition=0;
            vertexToVisit.pack(message,&bufferPosition);
            int dest=parameters->vertexRank(&vertexToVisit);
            Message aMessage(message,bufferPosition,dest,RAY_MPI_TAG_REQUEST_VERTEX_COVERAGE,theRank);
            (*outbox).push_back(&aMessage);
        } else if((*vertexCoverageReceived)) {
            if(!(*edgesRequested)) {
                m_coverages[vertexToVisit]=(*receivedVertexCoverage);

#ifdef ASSERT
                if(m_depthFirstSearchVisitedVertices.count(vertexToVisit)>0) {
                    cout<<"Already visited: "<<vertexToVisit.idToWord(wordSize,parameters->getColorSpaceMode())<<" root is "<<a.idToWord(wordSize,parameters->getColorSpaceMode())<<endl;
                }
                assert(m_depthFirstSearchVisitedVertices.count(vertexToVisit)==0);
                assert(*receivedVertexCoverage>0);
#endif

                if((*receivedVertexCoverage)>0) {
                    m_depthFirstSearchVisitedVertices.insert(vertexToVisit);
                } else {
#ifdef ASSERT
                    assert(false);
#endif
                    // don't visit it.
                    m_depthFirstSearchVerticesToVisit.pop();
                    m_depthFirstSearchDepths.pop();
                    (*edgesRequested)=false;
                    (*vertexCoverageRequested)=false;
                    return;
                }
                int theDepth=m_depthFirstSearchDepths.top();

                if(theDepth> m_depthFirstSearch_maxDepth) {
                    m_depthFirstSearch_maxDepth=theDepth;
                }

                // visit the vertex, and ask next edges.
                MessageUnit*message=(MessageUnit*)(*outboxAllocator).allocate(1*sizeof(MessageUnit));
                int bufferPosition=0;
                vertexToVisit.pack(message,&bufferPosition);
                int destination=parameters->vertexRank(&vertexToVisit);
                Message aMessage(message,bufferPosition,destination,RAY_MPI_TAG_REQUEST_VERTEX_EDGES,theRank);

                (*outbox).push_back(&aMessage);
                (*edgesRequested)=true;
                (*edgesReceived)=false;
            } else if((*edgesReceived)) {
                Kmer vertexToVisit=m_depthFirstSearchVerticesToVisit.top();
                int theDepth=m_depthFirstSearchDepths.top();

#ifdef ASSERT

                assert(theDepth>=0);
                assert(theDepth<=maxDepth);
#endif

                int newDepth=theDepth+1;

                m_depthFirstSearchVerticesToVisit.pop();
                m_depthFirstSearchDepths.pop();

                // the first 4 elements are padding
                // the 5th is the number of outgoing edges
                // following are the outgoing edges
                // following is the number of ingoing edges
                // following are the ingoing edges.

                vector<Kmer > outgoingEdges=seedingData->m_SEEDING_receivedOutgoingEdges;

                for(int i=0; i<(int)outgoingEdges.size(); i++) {
                    Kmer nextVertex=outgoingEdges[i];

                    if(m_depthFirstSearchVisitedVertices.size()>=MAX_VERTICES_TO_VISIT) {
                        continue;
                    }
                    if(m_depthFirstSearchVisitedVertices.count(nextVertex)>0) {
                        continue;
                    }
                    if(newDepth>maxDepth) {
                        m_maxDepthReached=true;
                        continue;
                    }
                    m_depthFirstSearchVerticesToVisit.push(nextVertex);
                    m_depthFirstSearchDepths.push(newDepth);

                    m_depthFirstSearchVisitedVertices_vector.push_back(vertexToVisit);
                    m_depthFirstSearchVisitedVertices_vector.push_back(nextVertex);
                    m_depthFirstSearchVisitedVertices_depths.push_back(newDepth);
                }

#ifdef ASSERT
                if(m_outgoingEdges.count(vertexToVisit)>0) {
                    cout<<vertexToVisit.idToWord(wordSize,parameters->getColorSpaceMode())<<" is already in the data structure "<<m_outgoingEdges[vertexToVisit].size()<<" v. "<<outgoingEdges.size()<<endl;
                }
                assert(m_outgoingEdges.count(vertexToVisit)==0);
#endif

                m_outgoingEdges[vertexToVisit]=outgoingEdges;

                vector<Kmer> ingoingEdges=seedingData->m_SEEDING_receivedIngoingEdges;

                for(int i=0; i<(int)ingoingEdges.size(); i++) {
                    Kmer nextVertex=ingoingEdges[i];

                    if(m_depthFirstSearchVisitedVertices.size()>=MAX_VERTICES_TO_VISIT) {
                        continue;
                    }
                    if(m_depthFirstSearchVisitedVertices.count(nextVertex)>0) {
                        continue;
                    }
                    if(newDepth>maxDepth) {
                        m_maxDepthReached=true;
                        continue;
                    }
                    m_depthFirstSearchVerticesToVisit.push(nextVertex);
                    m_depthFirstSearchDepths.push(newDepth);

                    // reverse the order.
                    m_depthFirstSearchVisitedVertices_vector.push_back(nextVertex);
                    m_depthFirstSearchVisitedVertices_vector.push_back(vertexToVisit);
                    m_depthFirstSearchVisitedVertices_depths.push_back(newDepth);
                }

                (*edgesRequested)=false;
                (*vertexCoverageRequested)=false;

#ifdef ASSERT
                assert(m_ingoingEdges.count(vertexToVisit)==0);

#endif

                m_ingoingEdges[vertexToVisit]=ingoingEdges;
            }
        }
    } else {
        m_doChoice_tips_dfs_done=true;
#ifdef SHOW_MINI_GRAPH
        cout<<"</MiniGraph>"<<endl;
#endif
    }
}
Example #6
0
bool BubbleTool::isGenuineBubble(Kmer root,vector<vector<Kmer > >*trees,
map<Kmer ,int>*coverages,int repeatCoverage){
	#ifdef NO_BUBBLES
	return false;
	#endif

	if((*coverages)[root]>= repeatCoverage){
		return false;
	}

	int m_wordSize=m_parameters->getWordSize();
	#ifdef ASSERT
	for(int i=0;i<(int)trees->size();i++){
		for(int j=0;j<(int)trees->at(i).size();j+=2){
			Kmer a=trees->at(i).at(j+0);
			Kmer b=trees->at(i).at(j+1);
			string as=a.idToWord(m_wordSize,m_parameters->getColorSpaceMode());
			string bs=b.idToWord(m_wordSize,m_parameters->getColorSpaceMode());
			assert(as.substr(1,m_wordSize-1)==bs.substr(0,m_wordSize-1));
		}
	}
	#endif

	if(m_parameters->debugBubbles()){
		printStuff(root,trees,coverages);
	}

	if(trees->size()!=2){
		return false;// we don'T support that right now ! triploid stuff are awesome.
	}

	// given the word size
	// check that they join.
	//
	// substitution SNP is d=0
	// del is 1, 2, or 3

	map<Kmer ,int> coveringNumber;

	Kmer target;
	bool foundTarget=false;
	for(int j=0;j<(int)trees->size();j++){
		for(int i=0;i<(int)trees->at(j).size();i+=2){
			Kmer a=trees->at(j).at(i+1);
			#ifdef ASSERT
			if(coverages->count(a)==0){
				cout<<a.idToWord(m_parameters->getWordSize(),m_parameters->getColorSpaceMode())<<" has no coverage."<<endl;
			}
			assert(coverages->count(a)>0);
			#endif

			coveringNumber[a]++;
			if(!foundTarget && coveringNumber[a]==2){
				foundTarget=true;
				target=a;
				break;
			}
		}
	}

	if(!foundTarget){
		if(m_parameters->debugBubbles()){
			cout<<"Target not found."<<endl;
		}
		return false;
	}

	if((*coverages)[target]>= repeatCoverage){
		return false;
	}

	#ifdef ASSERT
	assert(coverages->count(root)>0);
	assert(coverages->count(target)>0);
	#endif
	#ifdef ASSERT
	int rootCoverage=(*coverages)[root];
	int targetCoverage=(*coverages)[target];
	assert(rootCoverage>0);
	assert(targetCoverage>0);
	#endif

	vector<map<Kmer ,Kmer > > parents;

	for(int j=0;j<(int)trees->size();j++){
		map<Kmer ,Kmer > aVector;
		parents.push_back(aVector);
		for(int i=0;i<(int)trees->at(j).size();i+=2){
			Kmer a=trees->at(j).at(i+0);
			Kmer b=trees->at(j).at(i+1);
			parents[j][b]=a;
		}
	}

	vector<vector<int> > observedValues;
	/*
 *
 *  BUBBLE is below
 *
 *    *  ----  * -------*  --------*
 *      \                          /
 *        ---- * --------* ------ *
 *
 */
	// accumulate observed values
	// and stop when encountering
	for(int j=0;j<(int)trees->size();j++){
		vector<int> aVector;
		observedValues.push_back(aVector);
		set<Kmer > visited;
		
		Kmer startingPoint=trees->at(j).at(0);
		Kmer current=target;

		while(current!=startingPoint){
			if(visited.count(current)>0){
				return false;
			}
			visited.insert(current);
			Kmer theParent=parents[j][current];
			int coverageValue=(*coverages)[theParent];

			observedValues[j].push_back(coverageValue);
			current=theParent;
		}
	}

	if(m_parameters->debugBubbles()){
		cout<<"O1="<<observedValues[0].size()<<" O2="<<observedValues[1].size()<<endl;
	}

	int sum1=0;
	for(int i=0;i<(int)observedValues[0].size();i++){
		sum1+=observedValues[0][i];
	}

	if(m_parameters->debugBubbles()){
		cout<<"O1Values= ";
		for(int i=0;i<(int)observedValues[0].size();i++){
			cout<<observedValues[0][i]<<" ";
		}
		cout<<endl;
	}

	int sum2=0;
	for(int i=0;i<(int)observedValues[1].size();i++){
		sum2+=observedValues[1][i];
	}
	
	if(m_parameters->debugBubbles()){
		cout<<"O2Values= ";
		for(int i=0;i<(int)observedValues[1].size();i++){
			cout<<observedValues[1][i]<<" ";
		}
		cout<<endl;
	}

	if((int)observedValues[0].size()<2*m_parameters->getWordSize()
	&& (int)observedValues[1].size()<2*m_parameters->getWordSize()){
		if(sum1>sum2){
			m_choice=trees->at(0).at(1);
		}else if(sum2>sum1){
			m_choice=trees->at(1).at(1);

		// this will not happen often
		}else if(sum1==sum2){
			// take the shortest, if any
			if(observedValues[0].size()<observedValues[1].size()){
				m_choice=trees->at(0).at(1);
			}else if(observedValues[1].size()<observedValues[0].size()){
				m_choice=trees->at(1).at(1);
			// same length and same sum, won't happen very often anyway
			}else{
				m_choice=trees->at(0).at(1);
			}
		}
		
		if(m_parameters->debugBubbles()){
			cout<<"This is a genuine bubble"<<endl;
			cout<<"root="<<root.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<" target="<<target.idToWord(m_wordSize,m_parameters->getColorSpaceMode())<<endl;
		}

		return true;
	}

	if(m_parameters->debugBubbles()){
		cout<<"False at last"<<endl;
	}

	return false;
}