fasta::SequenceList fasta::remove_gaps( const fasta::SequenceList& sequences) { auto s = sequences; for (auto& seq : s) { seq.residues.clear(); } for (size_t i = 0; i < sequences.size(); ++i) { for (size_t j = 0; j < sequences[i].residues.size(); ++j) { if (sequences[i].residues[j].codon[0] != '-') { s[i].residues.push_back(sequences[i].residues[j]); } } } return s; }
bool fasta::check_length(fasta::SequenceList const& sequences, int limit) { if (limit == 0) { limit = sequences.size(); } auto result = true; auto prev_length = sequences[0].residues.size(); auto i = 1; while (result && i < limit) { auto length = sequences[i].residues.size(); if (length != prev_length) { result = false; } ++i; } return result; }
std::unordered_map<std::string, Occurences> FeatureScores::update_occurences( const fasta::SequenceList& sequences, const std::vector<double>& identities, const bool fade_out) { std::unordered_map<std::string, Occurences> p; for (auto& f: m_features) { p[f] = std::vector<double>(sequences[0].residues.size(), 0); } for (size_t i = 0; i < sequences[0].residues.size(); ++i) { for (size_t j = 0; j < sequences.size(); ++j) { for (auto& f : sequences[j].residues[i].features) { assert(p.find(f) != p.end()); if (fade_out) { p[f][i] += 0.5 * (1 + identities[j]); } else { p[f][i] += 1.0; } } } } return p; }
void FeatureScores::update_scores(const fasta::SequenceList& sequences, const f_config::FeatureSettingsMap& f_set, const std::vector<double>& identities, const bool fade_out) { m_occurences = update_occurences(sequences, identities, fade_out); // convert occurences to probabilities for (auto& occ: m_occurences) { size_t i = 0; for (auto& v: occ.second) { occ.second[i] = v / sequences.size(); ++i; } } // create an empty score map std::unordered_map<std::string, Scores> scores; for (auto& f: m_features) { scores[f] = std::vector<double>(sequences[0].residues.size(), 0); } for (size_t i = 0; i < sequences[0].residues.size(); ++i) { for (auto &feat : m_features){ if (feat.substr(0, 2) == "p_") { scores[feat][i] = score_ptm(i, feat); } else if (feat.substr(0, 2) == "d_") { scores[feat][i] = score_domain(i, feat); } else if (feat.substr(0, 2) == "m_") { scores[feat][i] = score_motif(i, feat); } else if (feat.substr(0, 2) == "s_") { scores[feat][i] = score_strct(i, feat); } else if (feat.substr(0, 3) == "USR") { scores[feat][i] = score_usr_feature(i, feat, f_set.at(feat)); } } } m_scores = scores; }