/** * work method. * * \author Sébastien Boisvert * * Code reviews * * 2011-09-02 -- Code review by Élénie Godzaridis (found bug with worker states) * */ void FusionWorker::work(){ /* used tags: TODO: does the code pay attention when the coverage indicates a repeated k-mer ? repeats slow things down... RAY_MPI_TAG_ASK_VERTEX_PATHS_SIZE RAY_MPI_TAG_ASK_VERTEX_PATH RAY_MPI_TAG_GET_PATH_LENGTH */ if(m_isDone) return; if(m_position < (int) m_path->size()){ /* get the number of paths */ if(!m_requestedNumberOfPaths){ /* if(m_position % 1000 == 0){ cout<<"FusionWorker "<<m_workerIdentifier<<" position: ["<<m_position<<"/"<<m_path->size()<<endl; } */ Kmer kmer; m_path->at(m_position,&kmer); if(m_reverseStrand) kmer=kmer.complementVertex(m_parameters->getWordSize(),m_parameters->getColorSpaceMode()); Rank destination=kmer.vertexRank(m_parameters->getSize(), m_parameters->getWordSize(),m_parameters->getColorSpaceMode()); int elementsPerQuery=m_virtualCommunicator->getElementsPerQuery(RAY_MPI_TAG_ASK_VERTEX_PATHS_SIZE); MessageUnit*message=(MessageUnit*)m_outboxAllocator->allocate(elementsPerQuery); int outputPosition=0; kmer.pack(message,&outputPosition); Message aMessage(message,elementsPerQuery,destination, RAY_MPI_TAG_ASK_VERTEX_PATHS_SIZE,m_parameters->getRank()); m_virtualCommunicator->pushMessage(m_workerIdentifier,&aMessage); m_requestedNumberOfPaths=true; m_receivedNumberOfPaths=false; if(m_parameters->hasOption("-debug-fusions2")){ cout<<"worker "<<m_workerIdentifier<<" send RAY_MPI_TAG_ASK_VERTEX_PATHS_SIZE"<<endl; } /* receive the number of paths */ }else if(m_requestedNumberOfPaths && !m_receivedNumberOfPaths && m_virtualCommunicator->isMessageProcessed(m_workerIdentifier)){ vector<MessageUnit> response; m_virtualCommunicator->getMessageResponseElements(m_workerIdentifier,&response); m_numberOfPaths=response[0]; if(m_parameters->hasOption("-debug-fusions2")) cout<<"worker "<<m_workerIdentifier<<" Got "<<m_numberOfPaths<<endl; m_receivedNumberOfPaths=true; m_pathIndex=0; m_requestedPath=false; /* 2^5 */ int maximumNumberOfPathsToProcess=32; /* don't process repeated stuff */ if(m_numberOfPaths> maximumNumberOfPathsToProcess) m_numberOfPaths=0; }else if(m_receivedNumberOfPaths && m_pathIndex < m_numberOfPaths){ /* request a path */ if(!m_requestedPath){ Kmer kmer; m_path->at(m_position,&kmer); if(m_reverseStrand) kmer=kmer.complementVertex(m_parameters->getWordSize(),m_parameters->getColorSpaceMode()); Rank destination=kmer.vertexRank(m_parameters->getSize(), m_parameters->getWordSize(),m_parameters->getColorSpaceMode()); int elementsPerQuery=m_virtualCommunicator->getElementsPerQuery(RAY_MPI_TAG_ASK_VERTEX_PATH); MessageUnit*message=(MessageUnit*)m_outboxAllocator->allocate(elementsPerQuery); int outputPosition=0; kmer.pack(message,&outputPosition); message[outputPosition++]=m_pathIndex; Message aMessage(message,elementsPerQuery,destination, RAY_MPI_TAG_ASK_VERTEX_PATH,m_parameters->getRank()); m_virtualCommunicator->pushMessage(m_workerIdentifier,&aMessage); if(m_parameters->hasOption("-debug-fusions2")){ cout<<"worker "<<m_workerIdentifier<<" send RAY_MPI_TAG_ASK_VERTEX_PATH "<<m_pathIndex<<endl; } m_requestedPath=true; m_receivedPath=false; /* receive the path */ }else if(!m_receivedPath && m_virtualCommunicator->isMessageProcessed(m_workerIdentifier)){ vector<MessageUnit> response; m_virtualCommunicator->getMessageResponseElements(m_workerIdentifier,&response); int bufferPosition=0; /* skip the k-mer because we don't need it */ bufferPosition+=KMER_U64_ARRAY_SIZE; PathHandle otherPathIdentifier=response[bufferPosition++]; //int progression=response[bufferPosition++]; if(m_parameters->hasOption("-debug-fusions2")) cout<<"worker "<<m_workerIdentifier<<" receive RAY_MPI_TAG_ASK_VERTEX_PATH_REPLY"<<endl; if(otherPathIdentifier != m_identifier){ m_hits[otherPathIdentifier]++; } m_receivedPath=true; m_pathIndex++; m_requestedPath=false; } /* received all paths, can do the next one */ }else if(m_receivedNumberOfPaths && m_pathIndex == m_numberOfPaths){ m_position++; m_requestedNumberOfPaths=false; m_receivedNumberOfPaths=false; if(m_parameters->hasOption("-debug-fusions2")) cout<<"worker "<<m_workerIdentifier<<" Next position is "<<m_position<<endl; } /* gather hit information */ }else if(!m_gatheredHits){ if(!m_initializedGathering){ for(map<PathHandle,int>::iterator i=m_hits.begin();i!=m_hits.end();i++){ m_hitNames.push_back(i->first); } m_initializedGathering=true; m_hitIterator=0; m_requestedHitLength=false; }else if(m_hitIterator < (int) m_hitNames.size()){ /* ask the hit length */ if(!m_requestedHitLength){ MessageUnit*message=(MessageUnit*)m_outboxAllocator->allocate(1); PathHandle hitName=m_hitNames[m_hitIterator]; Rank destination=getRankFromPathUniqueId(hitName); message[0]=hitName; Message aMessage(message,1,destination, RAY_MPI_TAG_GET_PATH_LENGTH,m_parameters->getRank()); m_virtualCommunicator->pushMessage(m_workerIdentifier,&aMessage); m_requestedHitLength=true; /* receive the hit length */ }else if(m_virtualCommunicator->isMessageProcessed(m_workerIdentifier)){ vector<MessageUnit> response; m_virtualCommunicator->getMessageResponseElements(m_workerIdentifier,&response); int length=response[0]; if(m_parameters->hasOption("-debug-fusions2")) cout<<"received length, value= "<<length<<endl; m_hitLengths.push_back(length); m_hitIterator++; m_requestedHitLength=false; } }else{ m_gatheredHits=true; } }else{ /* at this point, we have: * m_hits * m_hitNames * m_hitLengths */ #ifdef ASSERT assert(m_hits.size()==m_hitLengths.size()); assert(m_hitLengths.size()==m_hitNames.size()); assert(m_hitIterator == (int)m_hitLengths.size()); #endif if(m_parameters->hasOption("-debug-fusions")){ cout<<"FusionWorker worker "<<m_workerIdentifier<<" path "<<m_identifier<<" strand= "<<m_reverseStrand<<" is Done, analyzed "<<m_position<<" position length is "<<m_path->size()<<endl; cout<<"FusionWorker worker "<<m_hits.size()<<" hits "<<endl; } for(int i=0;i<(int)m_hitNames.size();i++){ PathHandle hit=m_hitNames[i]; int hitLength=m_hitLengths[i]; int selfLength=m_path->size(); int matches=m_hits[hit]; #ifdef ASSERT assert(hit != m_identifier); #endif double ratio=(matches+0.0)/selfLength; if(m_parameters->hasOption("-debug-fusions")){ cout<<"FusionWorker path "<<hit<<" matches= "<<matches<<" length= "<<hitLength<<endl; } if(ratio < 0.7) continue; /* * We want to make sure that no sequence path is lost. * They should be merged since it is not totally included. */ int biologicalObjectsWithoutMatch=selfLength-matches; int maximumAllowedLost=1024; if(biologicalObjectsWithoutMatch>maximumAllowedLost) continue; /* the other is longer anyway */ if(hitLength > selfLength){ m_eliminated=true; } /* the longer is "greater" but of equal length */ if(hitLength == selfLength && hit > m_identifier){ m_eliminated=true; } } m_isDone=true; } }
/* * get the Directions taken by a vertex. * * m_Machine_getPaths_INITIALIZED must be set to false before any calls. * also, you must set m_Machine_getPaths_DONE to false; * * when done, m_Machine_getPaths_DONE is true * and * the result is in m_Machine_getPaths_result (a vector<Direction>) */ void FusionData::getPaths(Kmer vertex){ if(!m_Machine_getPaths_INITIALIZED){ m_Machine_getPaths_INITIALIZED=true; m_FUSION_paths_requested=false; m_Machine_getPaths_DONE=false; m_Machine_getPaths_result.clear(); return; } if(m_cacheForRepeatedVertices.find(vertex,false)!=NULL){ SplayNode<Kmer ,Direction*>*node=m_cacheForRepeatedVertices.find(vertex,false); #ifdef ASSERT assert(node!=NULL); #endif Direction**ddirect=node->getValue(); #ifdef ASSERT assert(ddirect!=NULL); #endif Direction*d=*ddirect; while(d!=NULL){ m_Machine_getPaths_result.push_back(*d); d=d->getNext(); } m_Machine_getPaths_DONE=true; }else if(!m_FUSION_paths_requested){ uint64_t*message=(uint64_t*)m_outboxAllocator->allocate(2*sizeof(uint64_t)); int bufferPosition=0; vertex.pack(message,&bufferPosition); message[bufferPosition++]=0; Message aMessage(message,bufferPosition, m_parameters->_vertexRank(&vertex),RAY_MPI_TAG_ASK_VERTEX_PATHS,getRank()); m_outbox->push_back(aMessage); m_FUSION_paths_requested=true; m_FUSION_paths_received=false; m_FUSION_receivedPaths.clear(); }else if(m_FUSION_paths_received){ #ifdef ASSERT for(int i=0;i<(int)m_FUSION_receivedPaths.size();i++){ assert(getRankFromPathUniqueId(m_FUSION_receivedPaths[i].getWave())<m_size); } #endif // save the result in the cache. #ifdef ASSERT assert(m_cacheForRepeatedVertices.find(vertex,false)==NULL); #endif bool inserted; SplayNode<Kmer ,Direction*>*node=m_cacheForRepeatedVertices.insert(vertex,&m_cacheAllocator,&inserted); int i=0; Direction*theDirection=NULL; while(i<(int)m_Machine_getPaths_result.size()){ Direction*newDirection=(Direction*)m_cacheAllocator.allocate(sizeof(Direction)*1); *newDirection=m_Machine_getPaths_result[i]; newDirection->setNext(theDirection); theDirection=newDirection; i++; } Direction**ddirect=node->getValue(); *ddirect=theDirection; #ifdef ASSERT if(m_Machine_getPaths_result.size()==0){ assert(*(m_cacheForRepeatedVertices.find(vertex,false)->getValue())==NULL); } #endif m_Machine_getPaths_DONE=true; } }