示例#1
0
bool IterativeExtensions<span, Node, Edge, Graph>::compare_and_mark_last_k_minus_one_mer (const string& node, set<kmer_type>& kmers_set)
{
    KmerModel leftKmer = modelMinusOne.codeSeed (node.c_str(), Data::ASCII, node.size() - modelMinusOne.getKmerSize());
    kmer_type kmer = leftKmer.value();

    if (kmers_set.find(kmer) != kmers_set.end())
        return true;

    kmers_set.insert(kmer);
    return false;
}
void IterativeExtensions<span, Node, Edge, GraphDataVariant>::construct_linear_seqs (
    const string& L,
    const string& R,
    IBank*        outputBank,
    bool          swf
)
{
    /** Shortcuts. */
    size_t sizeKmer = graph.getKmerSize();

    /** We first reset the terminator. */
    terminator.reset(); // distinct extensions may share kmers, however, a unique extension doesn't.

    DEBUG ((cout << "[IterativeExtensions::construct_linear_seqs]  output=" << outputBank->getId() << "  L=" << L
        << "  search=" << searchMode << " swf=" << swf << endl
    ));

    /** We get a token on the bank. */
    LOCAL (outputBank);

    /** We create a Traversal instance. */
    TraversalTemplate<Node,Edge,GraphDataVariant>* traversal = TraversalTemplate<Node,Edge,GraphDataVariant>::create (traversalKind, graph, terminator, max_depth, 500, 20);
    LOCAL (traversal);

    long long nbNodes = 0;
    long long totalnt = 0;

    /** We need a container that holds NodeDepth objects during the extension. */
    vector <NodeDepth<Node> > kmers_to_traverse;

    /** We get the first kmer of the L string. */
    KmerModel leftKmer = model.codeSeed (L.c_str(), Data::ASCII, 0);

    /** We put this kmer into the vector of kmers to be processed. */
    NodeDepth<Node> ksd (typename Node::Value(leftKmer.value()), leftKmer.which() ? STRAND_FORWARD : STRAND_REVCOMP, 0);
    kmers_to_traverse.push_back (ksd);

    DEBUG ((cout << "---> kmer=" <<  leftKmer.value() << " strand=" << (leftKmer.which() ? "FW" : "RC") << endl));

#ifndef DONTMARK
    set<kmer_type> already_extended_from;
    //   compare_and_mark_last_k_minus_one_mer(L, already_extended_from); // mark first kmer to never extend from it again, // L will be marked at first iteration below
#endif

    /** We will need a Path object and a Sequence object during the extension. */
    Path_t<Node> rightTraversal;
    Node endNode;
    Sequence seq (Data::ASCII);

    /**************************************************************/
    /**          MAIN LOOP ON THE REMAINING KMERS                 */
    /**************************************************************/
    while (kmers_to_traverse.size() > 0) // min_depth is max_gap_length here
    {
        VERBOSE (("IterativeExtensions::construct_linear_seqs  MAIN LOOP %ld\n", kmers_to_traverse.size()));

        if (searchMode == SearchMode_Depth)
        {
            ksd = kmers_to_traverse.back();
            kmers_to_traverse.pop_back();
        }
        else if (searchMode == SearchMode_Breadth)
        {
            ksd = kmers_to_traverse.front();
            kmers_to_traverse.erase (kmers_to_traverse.begin());
        }

        /** We compute the extension on the right. */
        int len_right = traversal->traverse (ksd.node, endNode, DIR_OUTCOMING, rightTraversal);

        DEBUG ((cout << "------> kmer=" << std::hex << ksd.node.kmer.get<kmer_type>() << std::dec
            << "  strand=" << toString(ksd.node.strand) << "  depth=" << ksd.depth
            << "  len_right=" << len_right << endl
        ));

        /** We build the sequence to be inserted in the output bank. */
        buildSequence (ksd.node, rightTraversal, nbNodes, ksd.depth, seq);

        /** We insert the sequence into the output bank. */
        outputBank->insert (seq);

        /** We update statistics. */
        int node_len = len_right + sizeKmer;
        nbNodes += 1;
        totalnt += node_len;

        // if we only want 1 extension, stop now
        if (when_to_stop_extending == ExtendStopMode_after_first_contig)
        {
            INFO (("Stopping because we want only 1 extension\n"));
            break;
        }

        if (swf)
        {
            char* found = strstr (seq.getDataBuffer(), R.c_str());
            if (found != NULL  &&  ksd.depth > (int)sizeKmer)
            {
                INFO (("swf STOP \n"));
                break;
            }
        }

        if (nbNodes > max_nodes) //GR stop when too complex  huum when to stop ?
        {
            INFO (("... XXX Stopped extending node %s because %lld nodes reached. Was at depth %d.\n",
                seq.toString().c_str(), nbNodes, ksd.depth
            ));
            break;
        }


        // if max depth reached, don't extend that one
        if (ksd.depth + node_len > max_depth)
        {
            INFO (("... XXX max depth reached for node %s (depth + node length %i %i = %i) \n",
                seq.toString().c_str(), ksd.depth,node_len,ksd.depth + node_len
            ));
            continue;
        }

#ifndef DONTMARK
        // make sure this is the only time we see this (k-1)-overlap
        bool already_seen = compare_and_mark_last_k_minus_one_mer (graph.toString(ksd.node), already_extended_from);
        if (already_seen)
        {
            INFO (("... XXX not extending node %s becaues last k-1-mer was already seen\n", seq.toString().c_str()));
            continue;
        }
#endif

        // continue extending from immediately overlapping kmers
        // there may be just one 1 possibility (there was in-branching)

        /** We get the successors of the node. */
        typename GraphTemplate<Node,Edge,GraphDataVariant>::template Vector<Node> successors = graph.successors (endNode);

        /** We iterate the successors. */
        for (size_t i=0; i<successors.size(); i++)
        {
            kmers_to_traverse.push_back ( NodeDepth<Node> (successors[i].kmer, successors[i].strand, ksd.depth + len_right +1) );
            // ou plutot depth + len_right +1 (+1 = la nt ajoutee ici) (et pas node_len)  ?
        }

        INFO (("... number of extensions: %d\n", successors.size()));

    }   /* end of while (kmers_to_traverse.size() > 0) */

    /** We have to flush the output bank. */
    outputBank->flush();
}