bool IterativeExtensions<span, Node, Edge, Graph>::compare_and_mark_last_k_minus_one_mer (const string& node, set<kmer_type>& kmers_set) { KmerModel leftKmer = modelMinusOne.codeSeed (node.c_str(), Data::ASCII, node.size() - modelMinusOne.getKmerSize()); kmer_type kmer = leftKmer.value(); if (kmers_set.find(kmer) != kmers_set.end()) return true; kmers_set.insert(kmer); return false; }
void IterativeExtensions<span, Node, Edge, GraphDataVariant>::construct_linear_seqs ( const string& L, const string& R, IBank* outputBank, bool swf ) { /** Shortcuts. */ size_t sizeKmer = graph.getKmerSize(); /** We first reset the terminator. */ terminator.reset(); // distinct extensions may share kmers, however, a unique extension doesn't. DEBUG ((cout << "[IterativeExtensions::construct_linear_seqs] output=" << outputBank->getId() << " L=" << L << " search=" << searchMode << " swf=" << swf << endl )); /** We get a token on the bank. */ LOCAL (outputBank); /** We create a Traversal instance. */ TraversalTemplate<Node,Edge,GraphDataVariant>* traversal = TraversalTemplate<Node,Edge,GraphDataVariant>::create (traversalKind, graph, terminator, max_depth, 500, 20); LOCAL (traversal); long long nbNodes = 0; long long totalnt = 0; /** We need a container that holds NodeDepth objects during the extension. */ vector <NodeDepth<Node> > kmers_to_traverse; /** We get the first kmer of the L string. */ KmerModel leftKmer = model.codeSeed (L.c_str(), Data::ASCII, 0); /** We put this kmer into the vector of kmers to be processed. */ NodeDepth<Node> ksd (typename Node::Value(leftKmer.value()), leftKmer.which() ? STRAND_FORWARD : STRAND_REVCOMP, 0); kmers_to_traverse.push_back (ksd); DEBUG ((cout << "---> kmer=" << leftKmer.value() << " strand=" << (leftKmer.which() ? "FW" : "RC") << endl)); #ifndef DONTMARK set<kmer_type> already_extended_from; // compare_and_mark_last_k_minus_one_mer(L, already_extended_from); // mark first kmer to never extend from it again, // L will be marked at first iteration below #endif /** We will need a Path object and a Sequence object during the extension. */ Path_t<Node> rightTraversal; Node endNode; Sequence seq (Data::ASCII); /**************************************************************/ /** MAIN LOOP ON THE REMAINING KMERS */ /**************************************************************/ while (kmers_to_traverse.size() > 0) // min_depth is max_gap_length here { VERBOSE (("IterativeExtensions::construct_linear_seqs MAIN LOOP %ld\n", kmers_to_traverse.size())); if (searchMode == SearchMode_Depth) { ksd = kmers_to_traverse.back(); kmers_to_traverse.pop_back(); } else if (searchMode == SearchMode_Breadth) { ksd = kmers_to_traverse.front(); kmers_to_traverse.erase (kmers_to_traverse.begin()); } /** We compute the extension on the right. */ int len_right = traversal->traverse (ksd.node, endNode, DIR_OUTCOMING, rightTraversal); DEBUG ((cout << "------> kmer=" << std::hex << ksd.node.kmer.get<kmer_type>() << std::dec << " strand=" << toString(ksd.node.strand) << " depth=" << ksd.depth << " len_right=" << len_right << endl )); /** We build the sequence to be inserted in the output bank. */ buildSequence (ksd.node, rightTraversal, nbNodes, ksd.depth, seq); /** We insert the sequence into the output bank. */ outputBank->insert (seq); /** We update statistics. */ int node_len = len_right + sizeKmer; nbNodes += 1; totalnt += node_len; // if we only want 1 extension, stop now if (when_to_stop_extending == ExtendStopMode_after_first_contig) { INFO (("Stopping because we want only 1 extension\n")); break; } if (swf) { char* found = strstr (seq.getDataBuffer(), R.c_str()); if (found != NULL && ksd.depth > (int)sizeKmer) { INFO (("swf STOP \n")); break; } } if (nbNodes > max_nodes) //GR stop when too complex huum when to stop ? { INFO (("... XXX Stopped extending node %s because %lld nodes reached. Was at depth %d.\n", seq.toString().c_str(), nbNodes, ksd.depth )); break; } // if max depth reached, don't extend that one if (ksd.depth + node_len > max_depth) { INFO (("... XXX max depth reached for node %s (depth + node length %i %i = %i) \n", seq.toString().c_str(), ksd.depth,node_len,ksd.depth + node_len )); continue; } #ifndef DONTMARK // make sure this is the only time we see this (k-1)-overlap bool already_seen = compare_and_mark_last_k_minus_one_mer (graph.toString(ksd.node), already_extended_from); if (already_seen) { INFO (("... XXX not extending node %s becaues last k-1-mer was already seen\n", seq.toString().c_str())); continue; } #endif // continue extending from immediately overlapping kmers // there may be just one 1 possibility (there was in-branching) /** We get the successors of the node. */ typename GraphTemplate<Node,Edge,GraphDataVariant>::template Vector<Node> successors = graph.successors (endNode); /** We iterate the successors. */ for (size_t i=0; i<successors.size(); i++) { kmers_to_traverse.push_back ( NodeDepth<Node> (successors[i].kmer, successors[i].strand, ksd.depth + len_right +1) ); // ou plutot depth + len_right +1 (+1 = la nt ajoutee ici) (et pas node_len) ? } INFO (("... number of extensions: %d\n", successors.size())); } /* end of while (kmers_to_traverse.size() > 0) */ /** We have to flush the output bank. */ outputBank->flush(); }