pair<Edit, Edit> cut_edit_at_from(const Edit& e, size_t from_off) { Edit left, right; if (from_off > e.from_length()) { return make_pair(e, right); } // from-length of left portion size_t l = e.from_length() - from_off; // from-length of right portion size_t r = e.from_length() - l; if (edit_is_match(e)) { left.set_from_length(l); left.set_to_length(l); right.set_from_length(r); right.set_to_length(r); } else if (edit_is_sub(e)) { left.set_from_length(l); left.set_to_length(l); left.set_sequence(e.sequence().substr(0, l)); right.set_from_length(r); right.set_to_length(r); right.set_sequence(e.sequence().substr(l)); } else if (edit_is_insertion(e)) { left = e; } else if (edit_is_deletion(e)) { left.set_from_length(l); right.set_from_length(r); } return make_pair(left, right); }
vector<Edit> Sampler::mutate_edit(const Edit& edit, const pos_t& position, double base_error, double indel_error, const string& bases, uniform_real_distribution<double>& rprob, uniform_int_distribution<int>& rbase) { // we will build up a mapping representing the modified edit Mapping new_mapping; //*new_mapping.mutable_position() = make_position(position); // determine to-length of edit size_t to_length = edit.to_length(); // we will keep track of the current base using this pos_t curr_pos = position; /// TODO we should punt if we aren't a pure edit // as in, we are something with mixed to and from lengths; like a block sub with an indel if (edit_is_match(edit) || edit_is_sub(edit) || edit_is_insertion(edit)) { // distribute mutations across this length for (size_t k = 0; k < to_length; ++k) { char c = 'N'; // in the case that we are in an insertion if (!edit_is_insertion(edit)) { c = pos_char(curr_pos); ++get_offset(curr_pos); } if (rprob(rng) <= base_error) { // pick another base than what c is char n; do { n = bases[rbase(rng)]; } while (n == c); // make the edit for the sub Edit* e = new_mapping.add_edit(); string s(1, n); e->set_sequence(s); e->set_from_length(1); e->set_to_length(1); // if we've got a indel // note that we're using a simple geometric indel dsitribution here } else if (rprob(rng) <= indel_error) { if (rprob(rng) < 0.5) { char n = bases[rbase(rng)]; Edit* e = new_mapping.add_edit(); string s(1, c); e->set_sequence(s); e->set_to_length(1); } else { Edit* e = new_mapping.add_edit(); e->set_from_length(1); } } else { // make the edit for the 1bp match Edit* e = new_mapping.add_edit(); e->set_from_length(1); e->set_to_length(1); } } } else if (edit_is_deletion(edit)) { // special case: 0 (deletion) // maybe we do nothing; as there is no length in the read } // simplify the mapping new_mapping = simplify(new_mapping); // copy the new edits vector<Edit> new_edits; for (size_t i = 0; i < new_mapping.edit_size(); ++i) { new_edits.push_back(new_mapping.edit(i)); } // and send them back return new_edits; }