Example #1
0
pair<Edit, Edit> cut_edit_at_from(const Edit& e, size_t from_off) {
    Edit left, right;
    if (from_off > e.from_length()) {
        return make_pair(e, right);
    }
    // from-length of left portion
    size_t l = e.from_length() - from_off;
    // from-length of right portion
    size_t r = e.from_length() - l;
    if (edit_is_match(e)) {
        left.set_from_length(l);
        left.set_to_length(l);
        right.set_from_length(r);
        right.set_to_length(r);
    } else if (edit_is_sub(e)) {
        left.set_from_length(l);
        left.set_to_length(l);
        left.set_sequence(e.sequence().substr(0, l));
        right.set_from_length(r);
        right.set_to_length(r);
        right.set_sequence(e.sequence().substr(l));
    } else if (edit_is_insertion(e)) {
        left = e;
    } else if (edit_is_deletion(e)) {
        left.set_from_length(l);
        right.set_from_length(r);
    }
    return make_pair(left, right);
}
Example #2
0
vector<Edit> Sampler::mutate_edit(const Edit& edit,
                                  const pos_t& position,
                                  double base_error,
                                  double indel_error,
                                  const string& bases,
                                  uniform_real_distribution<double>& rprob,
                                  uniform_int_distribution<int>& rbase) {

    // we will build up a mapping representing the modified edit
    Mapping new_mapping;
    //*new_mapping.mutable_position() = make_position(position);
    // determine to-length of edit
    size_t to_length = edit.to_length();
    // we will keep track of the current base using this
    pos_t curr_pos = position;
    /// TODO we should punt if we aren't a pure edit
    // as in, we are something with mixed to and from lengths; like a block sub with an indel
    if (edit_is_match(edit) || edit_is_sub(edit)
        || edit_is_insertion(edit)) {
        // distribute mutations across this length
        for (size_t k = 0; k < to_length; ++k) {
            char c = 'N'; // in the case that we are in an insertion
            if (!edit_is_insertion(edit)) {
                c = pos_char(curr_pos);
                ++get_offset(curr_pos);
            }
            if (rprob(rng) <= base_error) {
                // pick another base than what c is
                char n;
                do {
                    n = bases[rbase(rng)];
                } while (n == c);
                // make the edit for the sub
                Edit* e = new_mapping.add_edit();
                string s(1, n);
                e->set_sequence(s);
                e->set_from_length(1);
                e->set_to_length(1);
            // if we've got a indel
            // note that we're using a simple geometric indel dsitribution here
            } else if (rprob(rng) <= indel_error) {
                if (rprob(rng) < 0.5) {
                    char n = bases[rbase(rng)];
                    Edit* e = new_mapping.add_edit();
                    string s(1, c);
                    e->set_sequence(s);
                    e->set_to_length(1);
                } else {
                    Edit* e = new_mapping.add_edit();
                    e->set_from_length(1);
                }
            } else {
                // make the edit for the 1bp match
                Edit* e = new_mapping.add_edit();
                e->set_from_length(1);
                e->set_to_length(1);
            }

        }
    } else if (edit_is_deletion(edit)) {
        // special case: 0 (deletion)
        // maybe we do nothing; as there is no length in the read
    }
    // simplify the mapping
    new_mapping = simplify(new_mapping);
    // copy the new edits
    vector<Edit> new_edits;
    for (size_t i = 0; i < new_mapping.edit_size(); ++i) {
        new_edits.push_back(new_mapping.edit(i));
    }
    // and send them back
    return new_edits;
}