Exemple #1
0
void HashGraph::InsertSequence(const Sequence &seq, uint64 prefix, uint64 mask)
{
    if (seq.Size() < kmerLength)
        return;

    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);
        Kmer key = kmer;
        Kmer rev_comp = kmer;
        rev_comp.ReverseComplement();
        if (rev_comp < kmer)
            key = rev_comp;

        if ((key.Hash() & mask) == prefix)
        {
            KmerNodeAdapter adp(InsertKmer(kmer), kmer);
            if (i >= (int)kmerLength)
            {
                adp.AddInEdge(3 - seq[i-kmerLength]);
            }

            if (i+1 < seq.Size())
            {
                adp.AddOutEdge(seq[i+1]);
            }
        }
    }
}
Exemple #2
0
bool HashGraph::AddEdgesFromSequence(const Sequence &seq)
{
    if (seq.Size() < kmerLength)
        return false;

    bool flag = false;
    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);

        KmerNodeAdapter adp = GetNodeAdapter(kmer);
        if (!adp.IsNull())
        {
            flag = true;
            adp.Increase();
            if (i >= (int)kmerLength)
            {
                adp.AddInEdge(3 - seq[i-kmerLength]);
            }

            if (i+1 < seq.Size())
            {
                adp.AddOutEdge(seq[i+1]);
            }
        }
    }

    return flag;
}
Exemple #3
0
bool HashGraph::IsValid(const Sequence &seq)
{
    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);
        if (GetNode(kmer) == NULL)
            return false;
    }

    return true;
}
Exemple #4
0
void HashGraph::AddInternalKmers(const Sequence &seq, int minCount)
{
    if (seq.Size() <= kmerLength)
        return;

    vector<int> v;
    int count = 0;
    int sum = 0;
    Kmer kmer;
    for (int i = 0; i < kmerLength-1; ++i)
        kmer.AddRight(seq[i]);
    for (int i = kmerLength-1; i < seq.Size(); ++i)
    {
        kmer.AddRight(seq[i]);

        KmerNode *node = GetNode(kmer);
        if (node != NULL && node->Count() >= (unsigned)minCount)
        {
            sum += node->Count();
            ++count;
            v.push_back(i);
        }
    }

    if (count > max(seq.Size() - kmerLength*2 + 1, (seq.Size() - kmerLength + 1)/2))
    {
        Kmer kmer;
        for (int i = 0; i < kmerLength-1; ++i)
            kmer.AddRight(seq[i]);
        for (int i = kmerLength-1; i < seq.Size(); ++i)
        {
            kmer.AddRight(seq[i]);

            if (v.front() <= i && i <= v.back() && GetNode(kmer) == NULL)
            {
                KmerNodeAdapter adp(InsertKmer(kmer), kmer);
                if (i >= (int)kmerLength)
                {
                    adp.AddInEdge(3 - seq[i-kmerLength]);
                }

                if (i+1 < seq.Size())
                {
                    adp.AddOutEdge(seq[i+1]);
                }
            }
        }
    }
}
void AbstractAssembler::ReadBunchOfReads(vector<Read> &reads, int64 expected_num)
{
    reads.resize(expected_num);
    Sequence seq;
    string comment;
    int count = 0;
    for (int64 i = 0; i < (int64)reads.size(); ++i)
    {
        if (!read_reader->Read(seq, comment))
        {
            reads.resize(i);
            break;
        }

        reads[i].Inactivate();
        seq.Trim(option.trim);

        int from = 0;
        int to = 0;
        int last = 0;
        
        for (int current = 0; current < seq.Size(); ++current)
        {
            if (seq[current] == 'N')
            {
                if (current - last > to - from)
                {
                    from = last;
                    to = current;
                }

                last = current + 1;
            }
        }

        if (seq.Size() - last > to - from)
        {
            from = last;
            to = seq.Size();
        }

        Sequence valid_seq;
        seq.GetSubSequence(valid_seq, from, to - from);

        seq = valid_seq;

//        seq.TrimError();
//        seq.ReverseComplement();
//        seq.TrimError();
//        seq.ReverseComplement();


        if (!seq.IsChar())
            continue;

        ++count;

        seq.Encode();
        reads[i] = seq;
    }

    printf("reads %d\n", count);
}
Exemple #6
0
int main(int argc, char *argv[])
{
    AddParameter("length", &length, INTEGER);
    AddParameter("mate", &mate, SIMPLE);

    ProcessParameters(argc, argv);

    if (argc < 3
        || strcmp(argv[1], "--help") == 0
        || strcmp(argv[1], "-h") == 0)
    {
        fprintf(stderr, "usage: normReads fa-file norm-fa-file\n");
        fprintf(stderr, "       [--length l] [--mate]\n");
        exit(1);
    }

    FastAReader reader(argv[1]);
    FastAWriter writer(argv[2]);

    Sequence seq;
    Sequence seq2;
    unsigned index = 0;
    string comment;
    string comment2;

    if (mate)
    {
        while (reader.Read(seq, comment))
        {
            if (!reader.Read(seq2, comment2))
                break;

            if (length == 0)
            {
                writer.Write(seq, comment);
                writer.Write(seq2, comment2);
                index += 2;
            }
            else
            {
                if (seq.Size() >= length && seq2.Size() >= length)
                {
                    seq.Resize(length);
                    seq2.Resize(length);
                    writer.Write(seq, comment);
                    writer.Write(seq2, comment2);
                    index += 2;
                }
            }
        }
    }
    else
    {
        while (reader.Read(seq, comment))
        {
            if (length == 0)
            {
                writer.Write(seq, comment);
                ++index;
            }
            else
            {
                if (seq.Size() >= length)
                {
                    seq.Resize(length);
                    writer.Write(seq, comment);
                    ++index;
                }
            }
        }
    }

//    fclose(freadFile);
//    fclose(ftableFile);

    return 0;
}