Exemple #1
0
  void erase_if_dispatch(Sequence& c, Predicate p,
                         sequence_tag, IteratorStability)
  {
#if 0
    c.erase(std::remove_if(c.begin(), c.end(), p), c.end());
#else
    if (! c.empty())
      c.erase(std::remove_if(c.begin(), c.end(), p), c.end());
#endif
  }
Exemple #2
0
void
sequence_iteration(
	Sequence &_sequence,
	UpdateAction const &_update_action
)
{
	for(
		auto it(
			_sequence.begin()
		);
		it != _sequence.end();
	)
	{
		switch(
			_update_action(
				*it
			)
		)
		{
		case fcppt::algorithm::update_action::remove:
			it =
				_sequence.erase(
					it
				);

			break;
		case fcppt::algorithm::update_action::keep:
			++it;

			break;
		}
	}
}
Exemple #3
0
/** Append the sequence of contig v to seq. */
static void mergeContigs(const Graph& g, const Contigs& contigs,
		vertex_descriptor u, vertex_descriptor v,
		Sequence& seq, const ContigPath& path)
{
	int d = get(edge_bundle, g, u, v).distance;
	assert(d < 0);
	unsigned overlap = -d;
	const Sequence& s = sequence(contigs, v);
	assert(s.length() > overlap);
	Sequence ao;
	Sequence bo(s, 0, overlap);
	Sequence o;
	do {
		assert(seq.length() > overlap);
		ao = seq.substr(seq.length() - overlap);
		o = createConsensus(ao, bo);
		if (!o.empty()) {
			seq.resize(seq.length() - overlap);
			seq += o;
			seq += Sequence(s, overlap);
			return;
		}
	} while (chomp(seq, 'n'));

	// Try an overlap alignment.
	if (opt::verbose > 2)
		cerr << '\n';
	vector<overlap_align> overlaps;
	alignOverlap(ao, bo, 0, overlaps, false, opt::verbose > 2);
	bool good = false;
	if (!overlaps.empty()) {
		assert(overlaps.size() == 1);
		const overlap_align& o = overlaps.front();
		unsigned matches = o.overlap_match;
		const string& consensus = o.overlap_str;
		float identity = (float)matches / consensus.size();
		good = matches >= opt::minOverlap
			&& identity >= opt::minIdentity;
		if (opt::verbose > 2)
			cerr << matches << " / " << consensus.size()
				<< " = " << identity
				<< (matches < opt::minOverlap ? " (too few)"
						: identity < opt::minIdentity ? " (too low)"
						: " (good)") << '\n';
	}
	if (good) {
		assert(overlaps.size() == 1);
		const overlap_align& o = overlaps.front();
		seq.erase(seq.length() - overlap + o.overlap_t_pos);
		seq += o.overlap_str;
		seq += Sequence(s, o.overlap_h_pos + 1);
	} else {
		cerr << "warning: the head of " << get(vertex_name, g, v)
			<< " does not match the tail of the previous contig\n"
			<< ao << '\n' << bo << '\n' << path << endl;
		seq += 'n';
		seq += s;
	}
}
Exemple #4
0
 void erase_dispatch(Sequence& c, const T& x, 
                     sequence_tag)
 {
   c.erase(std::remove(c.begin(), c.end(), x), c.end());
 }
Exemple #5
0
/** Read a single record. */
Sequence FastaReader::read(string& id, string& comment,
		char& anchor, string& q)
{
next_record:
	id.clear();
	comment.clear();
	anchor = 0;
	q.clear();

	// Discard comments.
	while (peek() == '#')
		ignoreLines(1);

	signed char recordType = peek();
	Sequence s;

	unsigned qualityOffset = 0;
	if (eof() || recordType == EOF || ftell(m_in) >= m_end) {
		string header;
		getline(header);

		return s;
	} else if (recordType == '>' || recordType == '@') {
		// Read the header.
		string header;
		getline(header);
		istringstream headerStream(header);
		headerStream >> recordType >> id >> ws;
		std::getline(headerStream, comment);

		// Ignore SAM headers.
		if (id.length() == 2 && isupper(id[0]) && isupper(id[1])
				&& comment.length() > 2 && comment[2] == ':')
			goto next_record;

		// Casava FASTQ format
		if (comment.size() > 3
				&& comment[1] == ':' && comment[3] == ':') {
			// read, chastity, flags, index: 1:Y:0:AAAAAA
			if (opt::chastityFilter && comment[2] == 'Y') {
				m_unchaste++;
				if (recordType == '@') {
					ignoreLines(3);
				} else {
					while (peek() != '>' && peek() != '#'
							&& ignoreLines(1))
						;
				}
				goto next_record;
			}
			if (id.size() > 2 && id.rbegin()[1] != '/') {
				// Add the read number to the ID.
				id += '/';
				id += comment[0];
			}
		}

		getline(s);
		if (recordType == '>') {
			// Read a multi-line FASTA record.
			string line;
			while (peek() != '>' && peek() != '#'
					&& getline(line))
				s += line;
			if (eof())
				clear();
		}

		if (recordType == '@') {
			char c = peek();
			if (c != '+') {
				die() << s << '\n' << header << '\n';
				string line;
				getline(line);
				die() << "expected `+' and saw ";
				if (eof())
					cerr << "end-of-file\n";
				else
					cerr << "`" << c << "' near\n"
					<< c << line << "\n";
				exit(EXIT_FAILURE);
			}
			ignoreLines(1);
			getline(q);
		} else
			q.clear();

		if (s.empty()) {
			die() << "sequence with ID `" << id << "' is empty\n";
			exit(EXIT_FAILURE);
		}

		if (s.length() < opt::minLength) {
			goto next_record;
		}

		bool colourSpace = isColourSpace(s);
		if (colourSpace && !isdigit(s[0])) {
			// The first character is the primer base. The second
			// character is the dibase read of the primer and the
			// first base of the sample, which is not part of the
			// assembly.
			assert(s.length() > 2);
			anchor = colourToNucleotideSpace(s[0], s[1]);
			s.erase(0, 2);
			q.erase(0, 1);
		}

		if (!q.empty())
			checkSeqQual(s, q);

		if (opt::trimMasked && !colourSpace) {
			// Removed masked (lower case) sequence at the beginning
			// and end of the read.
			size_t trimFront = 0;
			while (trimFront <= s.length() && islower(s[trimFront]))
				trimFront++;
			size_t trimBack = s.length();
			while (trimBack > 0 && islower(s[trimBack - 1]))
				trimBack--;
			s.erase(trimBack);
			s.erase(0, trimFront);
			if (!q.empty()) {
				q.erase(trimBack);
				q.erase(0, trimFront);
			}
		}
		if (flagFoldCase())
			transform(s.begin(), s.end(), s.begin(), ::toupper);

		qualityOffset = 33;
	} else {