Example #1
0
/**
 * work method.
 *
 * \author Sébastien Boisvert
 *
 * Code reviews
 *
 * 2011-09-02 -- Code review by Élénie Godzaridis (found bug with worker states)
 *
 */
void FusionWorker::work(){
/*
  used tags:

TODO: does the code pay attention when the coverage indicates a repeated k-mer ? repeats slow things down...

	RAY_MPI_TAG_ASK_VERTEX_PATHS_SIZE
	RAY_MPI_TAG_ASK_VERTEX_PATH
	RAY_MPI_TAG_GET_PATH_LENGTH
*/

	if(m_isDone)
		return;

	if(m_position < (int) m_path->size()){

		/* get the number of paths */
		if(!m_requestedNumberOfPaths){
/*
			if(m_position % 1000 == 0){
				cout<<"FusionWorker "<<m_workerIdentifier<<" position: ["<<m_position<<"/"<<m_path->size()<<endl;
			}
*/
			Kmer kmer;
			m_path->at(m_position,&kmer);

			if(m_reverseStrand)
				kmer=kmer.complementVertex(m_parameters->getWordSize(),m_parameters->getColorSpaceMode());

			Rank destination=kmer.vertexRank(m_parameters->getSize(),
				m_parameters->getWordSize(),m_parameters->getColorSpaceMode());
			int elementsPerQuery=m_virtualCommunicator->getElementsPerQuery(RAY_MPI_TAG_ASK_VERTEX_PATHS_SIZE);
			MessageUnit*message=(MessageUnit*)m_outboxAllocator->allocate(elementsPerQuery);
			int outputPosition=0;
			kmer.pack(message,&outputPosition);
			Message aMessage(message,elementsPerQuery,destination,
				RAY_MPI_TAG_ASK_VERTEX_PATHS_SIZE,m_parameters->getRank());
			m_virtualCommunicator->pushMessage(m_workerIdentifier,&aMessage);

			m_requestedNumberOfPaths=true;
			m_receivedNumberOfPaths=false;

			if(m_parameters->hasOption("-debug-fusions2")){
				cout<<"worker "<<m_workerIdentifier<<" send RAY_MPI_TAG_ASK_VERTEX_PATHS_SIZE"<<endl;
			}

		/* receive the number of paths */
		}else if(m_requestedNumberOfPaths && !m_receivedNumberOfPaths && m_virtualCommunicator->isMessageProcessed(m_workerIdentifier)){
			vector<MessageUnit> response;
			m_virtualCommunicator->getMessageResponseElements(m_workerIdentifier,&response);
			m_numberOfPaths=response[0];
		
			if(m_parameters->hasOption("-debug-fusions2"))
				cout<<"worker "<<m_workerIdentifier<<" Got "<<m_numberOfPaths<<endl;

			m_receivedNumberOfPaths=true;

			m_pathIndex=0;
			m_requestedPath=false;

			/* 2^5 */
			int maximumNumberOfPathsToProcess=32;

			/* don't process repeated stuff */
			if(m_numberOfPaths> maximumNumberOfPathsToProcess)
				m_numberOfPaths=0;

		}else if(m_receivedNumberOfPaths && m_pathIndex < m_numberOfPaths){
			/* request a path */
			if(!m_requestedPath){
				Kmer kmer;
				m_path->at(m_position,&kmer);

				if(m_reverseStrand)
					kmer=kmer.complementVertex(m_parameters->getWordSize(),m_parameters->getColorSpaceMode());
	
				Rank destination=kmer.vertexRank(m_parameters->getSize(),
					m_parameters->getWordSize(),m_parameters->getColorSpaceMode());
				int elementsPerQuery=m_virtualCommunicator->getElementsPerQuery(RAY_MPI_TAG_ASK_VERTEX_PATH);
				MessageUnit*message=(MessageUnit*)m_outboxAllocator->allocate(elementsPerQuery);
				int outputPosition=0;
				kmer.pack(message,&outputPosition);
				message[outputPosition++]=m_pathIndex;

				Message aMessage(message,elementsPerQuery,destination,
					RAY_MPI_TAG_ASK_VERTEX_PATH,m_parameters->getRank());
				m_virtualCommunicator->pushMessage(m_workerIdentifier,&aMessage);

				if(m_parameters->hasOption("-debug-fusions2")){
					cout<<"worker "<<m_workerIdentifier<<" send RAY_MPI_TAG_ASK_VERTEX_PATH "<<m_pathIndex<<endl;
				}

				m_requestedPath=true;
				m_receivedPath=false;
			/* receive the path */
			}else if(!m_receivedPath && m_virtualCommunicator->isMessageProcessed(m_workerIdentifier)){
				vector<MessageUnit> response;
				m_virtualCommunicator->getMessageResponseElements(m_workerIdentifier,&response);
				int bufferPosition=0;

				/* skip the k-mer because we don't need it */
				bufferPosition+=KMER_U64_ARRAY_SIZE;
				PathHandle otherPathIdentifier=response[bufferPosition++];
				//int progression=response[bufferPosition++];

				if(m_parameters->hasOption("-debug-fusions2"))
					cout<<"worker "<<m_workerIdentifier<<" receive RAY_MPI_TAG_ASK_VERTEX_PATH_REPLY"<<endl;

				if(otherPathIdentifier != m_identifier){
					m_hits[otherPathIdentifier]++;
				}
				m_receivedPath=true;

				m_pathIndex++;
				m_requestedPath=false;
			}
		/* received all paths, can do the next one */
		}else if(m_receivedNumberOfPaths && m_pathIndex == m_numberOfPaths){
			m_position++;
			m_requestedNumberOfPaths=false;
			m_receivedNumberOfPaths=false;


			if(m_parameters->hasOption("-debug-fusions2"))
				cout<<"worker "<<m_workerIdentifier<<" Next position is "<<m_position<<endl;
		}
	/* gather hit information */
	}else if(!m_gatheredHits){
		if(!m_initializedGathering){
			for(map<PathHandle,int>::iterator i=m_hits.begin();i!=m_hits.end();i++){
				m_hitNames.push_back(i->first);
			}
			m_initializedGathering=true;
			m_hitIterator=0;
			m_requestedHitLength=false;
		}else if(m_hitIterator < (int) m_hitNames.size()){
			/* ask the hit length */
			if(!m_requestedHitLength){
				MessageUnit*message=(MessageUnit*)m_outboxAllocator->allocate(1);

				PathHandle hitName=m_hitNames[m_hitIterator];
				Rank destination=getRankFromPathUniqueId(hitName);

				message[0]=hitName;

				Message aMessage(message,1,destination,
					RAY_MPI_TAG_GET_PATH_LENGTH,m_parameters->getRank());
				m_virtualCommunicator->pushMessage(m_workerIdentifier,&aMessage);
				m_requestedHitLength=true;

			/* receive the hit length */
			}else if(m_virtualCommunicator->isMessageProcessed(m_workerIdentifier)){
				vector<MessageUnit> response;
				m_virtualCommunicator->getMessageResponseElements(m_workerIdentifier,&response);
				int length=response[0];

				if(m_parameters->hasOption("-debug-fusions2"))
					cout<<"received length, value= "<<length<<endl;
				m_hitLengths.push_back(length);

				m_hitIterator++;
				m_requestedHitLength=false;
			}
		}else{
			m_gatheredHits=true;
		}
	}else{
		/* at this point, we have:
 * 			m_hits
 * 			m_hitNames
 * 			m_hitLengths
 */
		#ifdef ASSERT
		assert(m_hits.size()==m_hitLengths.size());
		assert(m_hitLengths.size()==m_hitNames.size());
		assert(m_hitIterator == (int)m_hitLengths.size());
		#endif

		if(m_parameters->hasOption("-debug-fusions")){
			cout<<"FusionWorker worker "<<m_workerIdentifier<<" path "<<m_identifier<<" strand= "<<m_reverseStrand<<" is Done, analyzed "<<m_position<<" position length is "<<m_path->size()<<endl;
			cout<<"FusionWorker worker "<<m_hits.size()<<" hits "<<endl;
		}

		for(int i=0;i<(int)m_hitNames.size();i++){
			PathHandle hit=m_hitNames[i];
			int hitLength=m_hitLengths[i];
			int selfLength=m_path->size();
			int matches=m_hits[hit];

			#ifdef ASSERT
			assert(hit != m_identifier);
			#endif

			double ratio=(matches+0.0)/selfLength;

			if(m_parameters->hasOption("-debug-fusions")){
				cout<<"FusionWorker path "<<hit<<"	matches= "<<matches<<"	length= "<<hitLength<<endl;
			}

			if(ratio < 0.7)
				continue;

/*
 * We want to make sure that no sequence path is lost.
 * They should be merged since it is not totally included.
 */
			int biologicalObjectsWithoutMatch=selfLength-matches;

			int maximumAllowedLost=1024;

			if(biologicalObjectsWithoutMatch>maximumAllowedLost)
				continue;

			/* the other is longer anyway */
			if(hitLength > selfLength){
				m_eliminated=true;
			}

			/* the longer is "greater" but of equal length */
			if(hitLength == selfLength && hit > m_identifier){
				m_eliminated=true;
			}

		}
		m_isDone=true;
	}
}
Example #2
0
/*
 * get the Directions taken by a vertex.
 *
 * m_Machine_getPaths_INITIALIZED must be set to false before any calls.
 * also, you must set m_Machine_getPaths_DONE to false;
 *
 * when done, m_Machine_getPaths_DONE is true
 * and
 * the result is in m_Machine_getPaths_result (a vector<Direction>)
 */
void FusionData::getPaths(Kmer vertex){
	if(!m_Machine_getPaths_INITIALIZED){
		m_Machine_getPaths_INITIALIZED=true;
		m_FUSION_paths_requested=false;
		m_Machine_getPaths_DONE=false;
		m_Machine_getPaths_result.clear();
		return;
	}
	if(m_cacheForRepeatedVertices.find(vertex,false)!=NULL){
		SplayNode<Kmer ,Direction*>*node=m_cacheForRepeatedVertices.find(vertex,false);
		#ifdef ASSERT
		assert(node!=NULL);
		#endif
		Direction**ddirect=node->getValue();
		#ifdef ASSERT
		assert(ddirect!=NULL);
		#endif
		Direction*d=*ddirect;
		while(d!=NULL){
			m_Machine_getPaths_result.push_back(*d);
			d=d->getNext();
		}
		m_Machine_getPaths_DONE=true;
	}else if(!m_FUSION_paths_requested){
		uint64_t*message=(uint64_t*)m_outboxAllocator->allocate(2*sizeof(uint64_t));
		int bufferPosition=0;
		vertex.pack(message,&bufferPosition);
		message[bufferPosition++]=0;
		Message aMessage(message,bufferPosition,
			m_parameters->_vertexRank(&vertex),RAY_MPI_TAG_ASK_VERTEX_PATHS,getRank());
		m_outbox->push_back(aMessage);
		m_FUSION_paths_requested=true;
		m_FUSION_paths_received=false;
		m_FUSION_receivedPaths.clear();
	}else if(m_FUSION_paths_received){
		#ifdef ASSERT
		for(int i=0;i<(int)m_FUSION_receivedPaths.size();i++){
			assert(getRankFromPathUniqueId(m_FUSION_receivedPaths[i].getWave())<m_size);
		}
		#endif
		// save the result in the cache.
		#ifdef ASSERT
		assert(m_cacheForRepeatedVertices.find(vertex,false)==NULL);
		#endif

		bool inserted;
		SplayNode<Kmer ,Direction*>*node=m_cacheForRepeatedVertices.insert(vertex,&m_cacheAllocator,&inserted);
		int i=0;
		Direction*theDirection=NULL;
		while(i<(int)m_Machine_getPaths_result.size()){
			Direction*newDirection=(Direction*)m_cacheAllocator.allocate(sizeof(Direction)*1);
			*newDirection=m_Machine_getPaths_result[i];
			newDirection->setNext(theDirection);
			theDirection=newDirection;
			i++;
		}

		Direction**ddirect=node->getValue();
		*ddirect=theDirection;

		#ifdef ASSERT
		if(m_Machine_getPaths_result.size()==0){
			assert(*(m_cacheForRepeatedVertices.find(vertex,false)->getValue())==NULL);
		}
		#endif

		m_Machine_getPaths_DONE=true;
	}
}