void erase_if_dispatch(Sequence& c, Predicate p, sequence_tag, IteratorStability) { #if 0 c.erase(std::remove_if(c.begin(), c.end(), p), c.end()); #else if (! c.empty()) c.erase(std::remove_if(c.begin(), c.end(), p), c.end()); #endif }
void sequence_iteration( Sequence &_sequence, UpdateAction const &_update_action ) { for( auto it( _sequence.begin() ); it != _sequence.end(); ) { switch( _update_action( *it ) ) { case fcppt::algorithm::update_action::remove: it = _sequence.erase( it ); break; case fcppt::algorithm::update_action::keep: ++it; break; } } }
/** Append the sequence of contig v to seq. */ static void mergeContigs(const Graph& g, const Contigs& contigs, vertex_descriptor u, vertex_descriptor v, Sequence& seq, const ContigPath& path) { int d = get(edge_bundle, g, u, v).distance; assert(d < 0); unsigned overlap = -d; const Sequence& s = sequence(contigs, v); assert(s.length() > overlap); Sequence ao; Sequence bo(s, 0, overlap); Sequence o; do { assert(seq.length() > overlap); ao = seq.substr(seq.length() - overlap); o = createConsensus(ao, bo); if (!o.empty()) { seq.resize(seq.length() - overlap); seq += o; seq += Sequence(s, overlap); return; } } while (chomp(seq, 'n')); // Try an overlap alignment. if (opt::verbose > 2) cerr << '\n'; vector<overlap_align> overlaps; alignOverlap(ao, bo, 0, overlaps, false, opt::verbose > 2); bool good = false; if (!overlaps.empty()) { assert(overlaps.size() == 1); const overlap_align& o = overlaps.front(); unsigned matches = o.overlap_match; const string& consensus = o.overlap_str; float identity = (float)matches / consensus.size(); good = matches >= opt::minOverlap && identity >= opt::minIdentity; if (opt::verbose > 2) cerr << matches << " / " << consensus.size() << " = " << identity << (matches < opt::minOverlap ? " (too few)" : identity < opt::minIdentity ? " (too low)" : " (good)") << '\n'; } if (good) { assert(overlaps.size() == 1); const overlap_align& o = overlaps.front(); seq.erase(seq.length() - overlap + o.overlap_t_pos); seq += o.overlap_str; seq += Sequence(s, o.overlap_h_pos + 1); } else { cerr << "warning: the head of " << get(vertex_name, g, v) << " does not match the tail of the previous contig\n" << ao << '\n' << bo << '\n' << path << endl; seq += 'n'; seq += s; } }
void erase_dispatch(Sequence& c, const T& x, sequence_tag) { c.erase(std::remove(c.begin(), c.end(), x), c.end()); }
/** Read a single record. */ Sequence FastaReader::read(string& id, string& comment, char& anchor, string& q) { next_record: id.clear(); comment.clear(); anchor = 0; q.clear(); // Discard comments. while (peek() == '#') ignoreLines(1); signed char recordType = peek(); Sequence s; unsigned qualityOffset = 0; if (eof() || recordType == EOF || ftell(m_in) >= m_end) { string header; getline(header); return s; } else if (recordType == '>' || recordType == '@') { // Read the header. string header; getline(header); istringstream headerStream(header); headerStream >> recordType >> id >> ws; std::getline(headerStream, comment); // Ignore SAM headers. if (id.length() == 2 && isupper(id[0]) && isupper(id[1]) && comment.length() > 2 && comment[2] == ':') goto next_record; // Casava FASTQ format if (comment.size() > 3 && comment[1] == ':' && comment[3] == ':') { // read, chastity, flags, index: 1:Y:0:AAAAAA if (opt::chastityFilter && comment[2] == 'Y') { m_unchaste++; if (recordType == '@') { ignoreLines(3); } else { while (peek() != '>' && peek() != '#' && ignoreLines(1)) ; } goto next_record; } if (id.size() > 2 && id.rbegin()[1] != '/') { // Add the read number to the ID. id += '/'; id += comment[0]; } } getline(s); if (recordType == '>') { // Read a multi-line FASTA record. string line; while (peek() != '>' && peek() != '#' && getline(line)) s += line; if (eof()) clear(); } if (recordType == '@') { char c = peek(); if (c != '+') { die() << s << '\n' << header << '\n'; string line; getline(line); die() << "expected `+' and saw "; if (eof()) cerr << "end-of-file\n"; else cerr << "`" << c << "' near\n" << c << line << "\n"; exit(EXIT_FAILURE); } ignoreLines(1); getline(q); } else q.clear(); if (s.empty()) { die() << "sequence with ID `" << id << "' is empty\n"; exit(EXIT_FAILURE); } if (s.length() < opt::minLength) { goto next_record; } bool colourSpace = isColourSpace(s); if (colourSpace && !isdigit(s[0])) { // The first character is the primer base. The second // character is the dibase read of the primer and the // first base of the sample, which is not part of the // assembly. assert(s.length() > 2); anchor = colourToNucleotideSpace(s[0], s[1]); s.erase(0, 2); q.erase(0, 1); } if (!q.empty()) checkSeqQual(s, q); if (opt::trimMasked && !colourSpace) { // Removed masked (lower case) sequence at the beginning // and end of the read. size_t trimFront = 0; while (trimFront <= s.length() && islower(s[trimFront])) trimFront++; size_t trimBack = s.length(); while (trimBack > 0 && islower(s[trimBack - 1])) trimBack--; s.erase(trimBack); s.erase(0, trimFront); if (!q.empty()) { q.erase(trimBack); q.erase(0, trimFront); } } if (flagFoldCase()) transform(s.begin(), s.end(), s.begin(), ::toupper); qualityOffset = 33; } else {