int64 HashGraph::Trim(int minLength) { vector<Contig> contigs; Assemble(contigs); int total = 0; #pragma omp parallel for for (int i = 0; i < (int)contigs.size(); ++i) { if (contigs[i].IsTangle() && contigs[i].Size() < kmerLength + minLength - 1) { Kmer kmer; for (int j = 0; j+1 < kmerLength; ++j) kmer.AddRight(contigs[i][j]); for (int j = kmerLength-1; j < contigs[i].Size(); ++j) { kmer.AddRight(contigs[i][j]); KmerNode *node = GetNode(kmer); if (node != NULL) node->SetDeadFlag(); } #pragma omp atomic ++total; } } Refresh(); LogMessage("trim %lld dead ends\n", total); return total; }
int64 HashGraph::RemoveLowCoverageContigs(double c) { vector<Contig> contigs; Assemble(contigs); int total = 0; #pragma omp parallel for for (int i = 0; i < (int)contigs.size(); ++i) { if (contigs[i].Coverage() < c) { Kmer kmer; for (int j = 0; j+1 < kmerLength; ++j) kmer.AddRight(contigs[i][j]); for (int j = kmerLength-1; j < contigs[i].Size(); ++j) { kmer.AddRight(contigs[i][j]); KmerNode *node = GetNode(kmer); if (node != NULL) node->SetDeadFlag(); } #pragma omp atomic ++total; } } Refresh(); return total; }
void HashGraph::InsertSequence(const Sequence &seq, uint64 prefix, uint64 mask) { if (seq.Size() < kmerLength) return; Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); Kmer key = kmer; Kmer rev_comp = kmer; rev_comp.ReverseComplement(); if (rev_comp < kmer) key = rev_comp; if ((key.Hash() & mask) == prefix) { KmerNodeAdapter adp(InsertKmer(kmer), kmer); if (i >= (int)kmerLength) { adp.AddInEdge(3 - seq[i-kmerLength]); } if (i+1 < seq.Size()) { adp.AddOutEdge(seq[i+1]); } } } }
bool HashGraph::AddEdgesFromSequence(const Sequence &seq) { if (seq.Size() < kmerLength) return false; bool flag = false; Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); KmerNodeAdapter adp = GetNodeAdapter(kmer); if (!adp.IsNull()) { flag = true; adp.Increase(); if (i >= (int)kmerLength) { adp.AddInEdge(3 - seq[i-kmerLength]); } if (i+1 < seq.Size()) { adp.AddOutEdge(seq[i+1]); } } } return flag; }
bool HashGraph::IsValid(const Sequence &seq) { Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); if (GetNode(kmer) == NULL) return false; } return true; }
void HashGraph::AddInternalKmers(const Sequence &seq, int minCount) { if (seq.Size() <= kmerLength) return; vector<int> v; int count = 0; int sum = 0; Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); KmerNode *node = GetNode(kmer); if (node != NULL && node->Count() >= (unsigned)minCount) { sum += node->Count(); ++count; v.push_back(i); } } if (count > max(seq.Size() - kmerLength*2 + 1, (seq.Size() - kmerLength + 1)/2)) { Kmer kmer; for (int i = 0; i < kmerLength-1; ++i) kmer.AddRight(seq[i]); for (int i = kmerLength-1; i < seq.Size(); ++i) { kmer.AddRight(seq[i]); if (v.front() <= i && i <= v.back() && GetNode(kmer) == NULL) { KmerNodeAdapter adp(InsertKmer(kmer), kmer); if (i >= (int)kmerLength) { adp.AddInEdge(3 - seq[i-kmerLength]); } if (i+1 < seq.Size()) { adp.AddOutEdge(seq[i+1]); } } } } }
bool HashGraph::Check() { for (int64 i = 0; i < (int64)table_size; ++i) { HashNode *node = table[i]; while (node != NULL) { KmerNodeAdapter adapter(node); Kmer kmer = adapter.GetNode()->GetKmer(); for (int strand = 0; strand < 2; ++strand) { unsigned edges = adapter.OutEdges(); for (int x = 0; x < 4; ++x) { if (edges & (1 << x)) { Kmer next = kmer; next.AddRight(x); KmerNode *q = GetNode(next); if (q == NULL) { cout << "null fail" << endl; return false; } if (q->IsDead()) { cout << "deadend fail" << endl; return false; } KmerNodeAdapter adp(q, next); if (((1 << (3 - kmer.GetBase(0))) & adp.InEdges()) == 0) { cout << (int)kmer.GetBase(0) << " " << (int)adp.InEdges() << endl; cout << "no in edge fail" << endl; return false; } } } kmer.ReverseComplement(); adapter.ReverseComplement(); } node = node->next; } } return true; }
void HashGraph::RefreshEdges() { num_edges = 0; #pragma omp parallel for for (int64 i = 0; i < (int64)table_size; ++i) { for (HashNode *node = table[i]; node; node = node->next) { KmerNodeAdapter curr(node); for (int strand = 0; strand < 2; ++strand) { Kmer kmer; curr.GetKmer(kmer); unsigned edges = curr.OutEdges(); for (int x = 0; x < 4; ++x) { if (edges & (1 << x)) { Kmer next = kmer; next.AddRight(x); if (GetNode(next) == NULL) curr.RemoveOutEdge(x); else { #pragma omp atomic ++num_edges; } } } curr.ReverseComplement(); } if (node->kmer.IsPalindrome()) { unsigned edges = node->InEdges() | node->OutEdges(); node->SetInEdges(edges); node->SetOutEdges(edges); } } } num_edges >>= 1; }