Example #1
0
int64 HashGraph::Trim(int minLength)
{
    vector<Contig> contigs;
    Assemble(contigs);

    int total = 0;
#pragma omp parallel for
    for (int i = 0; i < (int)contigs.size(); ++i)
    {
        if (contigs[i].IsTangle() && contigs[i].Size() < kmerLength + minLength - 1)
        {
            Kmer kmer;
            for (int j = 0; j+1 < kmerLength; ++j)
                kmer.AddRight(contigs[i][j]);
            for (int j = kmerLength-1; j < contigs[i].Size(); ++j)
            {
                kmer.AddRight(contigs[i][j]);
                KmerNode *node = GetNode(kmer);
                if (node != NULL)
                    node->SetDeadFlag();
            }

#pragma omp atomic
            ++total;
        }
    }

    Refresh();

    LogMessage("trim %lld dead ends\n", total);

    return total;
}
Example #2
0
int64 HashGraph::RemoveLowCoverageContigs(double c)
{
    vector<Contig> contigs;
    Assemble(contigs);

    int total = 0;
#pragma omp parallel for
    for (int i = 0; i < (int)contigs.size(); ++i)
    {
        if (contigs[i].Coverage() < c)
        {
            Kmer kmer;
            for (int j = 0; j+1 < kmerLength; ++j)
                kmer.AddRight(contigs[i][j]);
            for (int j = kmerLength-1; j < contigs[i].Size(); ++j)
            {
                kmer.AddRight(contigs[i][j]);
                KmerNode *node = GetNode(kmer);
                if (node != NULL)
                    node->SetDeadFlag();
            }

#pragma omp atomic
            ++total;
        }
    }

    Refresh();

    return total;
}
Example #3
0
void HashGraph::InsertSequence(const Sequence &seq, uint64 prefix, uint64 mask)
{
    if (seq.Size() < kmerLength)
        return;

    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);
        Kmer key = kmer;
        Kmer rev_comp = kmer;
        rev_comp.ReverseComplement();
        if (rev_comp < kmer)
            key = rev_comp;

        if ((key.Hash() & mask) == prefix)
        {
            KmerNodeAdapter adp(InsertKmer(kmer), kmer);
            if (i >= (int)kmerLength)
            {
                adp.AddInEdge(3 - seq[i-kmerLength]);
            }

            if (i+1 < seq.Size())
            {
                adp.AddOutEdge(seq[i+1]);
            }
        }
    }
}
Example #4
0
bool HashGraph::AddEdgesFromSequence(const Sequence &seq)
{
    if (seq.Size() < kmerLength)
        return false;

    bool flag = false;
    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);

        KmerNodeAdapter adp = GetNodeAdapter(kmer);
        if (!adp.IsNull())
        {
            flag = true;
            adp.Increase();
            if (i >= (int)kmerLength)
            {
                adp.AddInEdge(3 - seq[i-kmerLength]);
            }

            if (i+1 < seq.Size())
            {
                adp.AddOutEdge(seq[i+1]);
            }
        }
    }

    return flag;
}
Example #5
0
bool HashGraph::IsValid(const Sequence &seq)
{
    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);
        if (GetNode(kmer) == NULL)
            return false;
    }

    return true;
}
Example #6
0
void HashGraph::AddInternalKmers(const Sequence &seq, int minCount)
{
    if (seq.Size() <= kmerLength)
        return;

    vector<int> v;
    int count = 0;
    int sum = 0;
    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);

        KmerNode *node = GetNode(kmer);
        if (node != NULL && node->Count() >= (unsigned)minCount)
        {
            sum += node->Count();
            ++count;
            v.push_back(i);
        }
    }

    if (count > max(seq.Size() - kmerLength*2 + 1, (seq.Size() - kmerLength + 1)/2))
    {
        Kmer kmer;
        for (int i = 0; i < kmerLength-1; ++i)
            kmer.AddRight(seq[i]);
        for (int i = kmerLength-1; i < seq.Size(); ++i)
        {
            kmer.AddRight(seq[i]);

            if (v.front() <= i && i <= v.back() && GetNode(kmer) == NULL)
            {
                KmerNodeAdapter adp(InsertKmer(kmer), kmer);
                if (i >= (int)kmerLength)
                {
                    adp.AddInEdge(3 - seq[i-kmerLength]);
                }

                if (i+1 < seq.Size())
                {
                    adp.AddOutEdge(seq[i+1]);
                }
            }
        }
    }
}
Example #7
0
bool HashGraph::Check()
{
    for (int64 i = 0; i < (int64)table_size; ++i)
    {
        HashNode *node = table[i];
        while (node != NULL)
        {
            KmerNodeAdapter adapter(node);
            Kmer kmer = adapter.GetNode()->GetKmer();
            for (int strand = 0; strand < 2; ++strand)
            {
                unsigned edges = adapter.OutEdges();

                for (int x = 0; x < 4; ++x)
                {
                    if (edges & (1 << x))
                    {
                        Kmer next = kmer;
                        next.AddRight(x);
                        KmerNode *q = GetNode(next);
                        if (q == NULL)
                        {
                            cout << "null fail" << endl;
                            return false;
                        }

                        if (q->IsDead())
                        {
                            cout << "deadend fail" << endl;
                            return false;
                        }

                        KmerNodeAdapter adp(q, next);

                        if (((1 << (3 - kmer.GetBase(0))) & adp.InEdges()) == 0)
                        {
                            cout << (int)kmer.GetBase(0) << " " << (int)adp.InEdges() << endl;
                            cout << "no in edge fail" << endl;
                            return false;
                        }
                    }
                }

                kmer.ReverseComplement();
                adapter.ReverseComplement();
            }

            node = node->next;
        }
    }

    return true;
}
Example #8
0
void HashGraph::RefreshEdges()
{
    num_edges = 0;
#pragma omp parallel for
    for (int64 i = 0; i < (int64)table_size; ++i)
    {
        for (HashNode *node = table[i]; node; node = node->next)
        {
            KmerNodeAdapter curr(node);
            for (int strand = 0; strand < 2; ++strand)
            {
                Kmer kmer;
                curr.GetKmer(kmer);
                unsigned edges = curr.OutEdges();
                for (int x = 0; x < 4; ++x)
                {
                    if (edges & (1 << x))
                    {
                        Kmer next = kmer;
                        next.AddRight(x);
                        if (GetNode(next) == NULL)
                            curr.RemoveOutEdge(x);
                        else
                        {
#pragma omp atomic
                            ++num_edges;
                        }
                    }
                }

                curr.ReverseComplement();
            }

            if (node->kmer.IsPalindrome())
            {
                unsigned edges = node->InEdges() | node->OutEdges();
                node->SetInEdges(edges);
                node->SetOutEdges(edges);
            }
        }
    }

    num_edges >>= 1;
}