Esempio n. 1
0
void IterateHashGraph(AssemblyInfo &assembly_info, int new_kmer_size, int min_support, HashGraph &hash_graph, deque<Sequence> &old_contigs)
{
    int old_kmer_size = hash_graph.kmer_size();
    deque<ShortSequence> &reads = assembly_info.reads;
    deque<Sequence> &long_reads = assembly_info.long_reads;
    vector<bool> &read_flags = assembly_info.read_flags;
    vector<bool> &long_read_flags = assembly_info.long_read_flags;

#pragma omp parallel for schedule(static, 1)
    for (int64_t i = 0; i < (int64_t)old_contigs.size(); ++i)
        hash_graph.InsertUncountKmers(old_contigs[i]);
    hash_graph.AddAllEdges();

    deque<Sequence> contigs;
    hash_graph.Assemble(contigs);
    hash_graph.clear();

    uint64_t sum = 0;
    int d = new_kmer_size - old_kmer_size;
    for (unsigned i = 0; i < contigs.size(); ++i)
    {
        if ((int)contigs[i].size() - old_kmer_size + 1 >= 2*d + 2)
            sum += 2*d + 2;
        else if ((int)contigs[i].size() >= old_kmer_size)
            sum += contigs[i].size() - old_kmer_size + 1;
    }

    HashGraph old_hash_graph(old_kmer_size);
    old_hash_graph.reserve(sum);
#pragma omp parallel for schedule(static, 1)
    for (int64_t i = 0; i < (int64_t)contigs.size(); ++i)
    {
        Sequence seq;
        seq.Assign(contigs[i], 0, min(new_kmer_size, (int)contigs[i].size()));
        old_hash_graph.InsertKmers(seq);

        seq.Assign(contigs[i], max(0, (int)contigs[i].size() - new_kmer_size), min(new_kmer_size, (int)contigs[i].size()));
        old_hash_graph.InsertKmers(seq);
    }
    //cout << "old kmer " << old_hash_graph.num_vertices() << endl;

    hash_graph.set_kmer_size(new_kmer_size);
#pragma omp parallel for
    for (int64_t i = 0; i < (int64_t)reads.size(); ++i)
    {
        if (!read_flags[i])
            continue;

        Sequence seq(reads[i]);
        InsertIterativeKmers(old_hash_graph, seq, hash_graph);
    }

#pragma omp parallel for schedule(static, 1)
    for (int64_t i = 0; i < (int64_t)long_reads.size(); ++i)
    {
        if (!long_read_flags[i])
            continue;

        InsertIterativeKmers(old_hash_graph, long_reads[i], hash_graph);
    }
    
#pragma omp parallel for schedule(static, 1)
    for (int64_t i = 0; i < (int64_t)assembly_info.ref_contigs.size(); ++i)
        InsertIterativeKmers(old_hash_graph, assembly_info.ref_contigs[i], hash_graph);

    old_hash_graph.clear();
    {
        HashGraph tmp_hash_graph;
        tmp_hash_graph.swap(old_hash_graph);
    }

    hash_graph.RefreshVertices(min_support);

#pragma omp parallel for schedule(static, 1)
    for (int64_t i = 0; i < (int64_t)old_contigs.size(); ++i)
        hash_graph.InsertUncountKmers(old_contigs[i]);
    hash_graph.ClearCount();

    InsertExistKmers(assembly_info, hash_graph);
}