void HashGraph::InsertSequence(const Sequence &seq, uint64 prefix, uint64 mask) { if (seq.Size() < kmerLength) return; Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); Kmer key = kmer; Kmer rev_comp = kmer; rev_comp.ReverseComplement(); if (rev_comp < kmer) key = rev_comp; if ((key.Hash() & mask) == prefix) { KmerNodeAdapter adp(InsertKmer(kmer), kmer); if (i >= (int)kmerLength) { adp.AddInEdge(3 - seq[i-kmerLength]); } if (i+1 < seq.Size()) { adp.AddOutEdge(seq[i+1]); } } } }
bool HashGraph::AddEdgesFromSequence(const Sequence &seq) { if (seq.Size() < kmerLength) return false; bool flag = false; Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); KmerNodeAdapter adp = GetNodeAdapter(kmer); if (!adp.IsNull()) { flag = true; adp.Increase(); if (i >= (int)kmerLength) { adp.AddInEdge(3 - seq[i-kmerLength]); } if (i+1 < seq.Size()) { adp.AddOutEdge(seq[i+1]); } } } return flag; }
bool HashGraph::IsValid(const Sequence &seq) { Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); if (GetNode(kmer) == NULL) return false; } return true; }
void HashGraph::AddInternalKmers(const Sequence &seq, int minCount) { if (seq.Size() <= kmerLength) return; vector<int> v; int count = 0; int sum = 0; Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); KmerNode *node = GetNode(kmer); if (node != NULL && node->Count() >= (unsigned)minCount) { sum += node->Count(); ++count; v.push_back(i); } } if (count > max(seq.Size() - kmerLength*2 + 1, (seq.Size() - kmerLength + 1)/2)) { Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); if (v.front() <= i && i <= v.back() && GetNode(kmer) == NULL) { KmerNodeAdapter adp(InsertKmer(kmer), kmer); if (i >= (int)kmerLength) { adp.AddInEdge(3 - seq[i-kmerLength]); } if (i+1 < seq.Size()) { adp.AddOutEdge(seq[i+1]); } } } } }
void AbstractAssembler::ReadBunchOfReads(vector<Read> &reads, int64 expected_num) { reads.resize(expected_num); Sequence seq; string comment; int count = 0; for (int64 i = 0; i < (int64)reads.size(); ++i) { if (!read_reader->Read(seq, comment)) { reads.resize(i); break; } reads[i].Inactivate(); seq.Trim(option.trim); int from = 0; int to = 0; int last = 0; for (int current = 0; current < seq.Size(); ++current) { if (seq[current] == 'N') { if (current - last > to - from) { from = last; to = current; } last = current + 1; } } if (seq.Size() - last > to - from) { from = last; to = seq.Size(); } Sequence valid_seq; seq.GetSubSequence(valid_seq, from, to - from); seq = valid_seq; // seq.TrimError(); // seq.ReverseComplement(); // seq.TrimError(); // seq.ReverseComplement(); if (!seq.IsChar()) continue; ++count; seq.Encode(); reads[i] = seq; } printf("reads %d\n", count); }
int main(int argc, char *argv[]) { AddParameter("length", &length, INTEGER); AddParameter("mate", &mate, SIMPLE); ProcessParameters(argc, argv); if (argc < 3 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) { fprintf(stderr, "usage: normReads fa-file norm-fa-file\n"); fprintf(stderr, " [--length l] [--mate]\n"); exit(1); } FastAReader reader(argv[1]); FastAWriter writer(argv[2]); Sequence seq; Sequence seq2; unsigned index = 0; string comment; string comment2; if (mate) { while (reader.Read(seq, comment)) { if (!reader.Read(seq2, comment2)) break; if (length == 0) { writer.Write(seq, comment); writer.Write(seq2, comment2); index += 2; } else { if (seq.Size() >= length && seq2.Size() >= length) { seq.Resize(length); seq2.Resize(length); writer.Write(seq, comment); writer.Write(seq2, comment2); index += 2; } } } } else { while (reader.Read(seq, comment)) { if (length == 0) { writer.Write(seq, comment); ++index; } else { if (seq.Size() >= length) { seq.Resize(length); writer.Write(seq, comment); ++index; } } } } // fclose(freadFile); // fclose(ftableFile); return 0; }