AlleleAttributes alleleAttributes(Allele & main_allele, Allele & reference_allele) { assert(!(main_allele.seq().empty())); assert(!(reference_allele.seq().empty())); assert(!(reference_allele.isMissing())); if (main_allele.isMissing()) { return AlleleAttributes(Type::Missing, 0, 0, 0); } if (main_allele == reference_allele) { return AlleleAttributes(Type::Reference, main_allele.seq().size(), count(main_allele.seq().begin(), main_allele.seq().end(), 'N'), 0); } Allele trimmed_main_allele = main_allele; Allele trimmed_reference_allele = reference_allele; fullTrimAllelePair(&trimmed_main_allele, &trimmed_reference_allele); assert(!(trimmed_main_allele.seq().empty()) or !(trimmed_reference_allele.seq().empty())); uint trimmed_main_allele_length = trimmed_main_allele.seq().size(); uint trimmed_reference_allele_length = trimmed_reference_allele.seq().size(); uint trimmed_main_allele_num_ambiguous = count(trimmed_main_allele.seq().begin(), trimmed_main_allele.seq().end(), 'N'); if (trimmed_main_allele_length == trimmed_reference_allele_length) { auto allele_type = Type::Complex; if (trimmed_main_allele_length == 1) { allele_type = Type::SNP; } else if (isInversion(trimmed_main_allele, trimmed_reference_allele, 0.95, 10)) { allele_type = Type::Inversion; } return AlleleAttributes(allele_type, trimmed_main_allele_length, trimmed_main_allele_num_ambiguous, 0); } else { auto allele_type = Type::Complex; if (trimmed_main_allele_length == 0) { allele_type = Type::Deletion; } else if (trimmed_reference_allele_length == 0) { allele_type = Type::Insertion; } return AlleleAttributes(allele_type, trimmed_main_allele_length, trimmed_main_allele_num_ambiguous, trimmed_main_allele_length - trimmed_reference_allele_length); } }
bool hasAmbiguous(Allele & allele) { if (allele.seq().find_first_of("N") != string::npos) { return true; } else { return false; } }
bool isInversion(Allele & main_allele, Allele & reference_allele, const float min_match_fraction, const uint min_size) { if (main_allele.seq().size() != reference_allele.seq().size()) { return false; } if (main_allele.seq().size() < min_size) { return false; } string main_allele_rv = reverseComplementSequence(main_allele.seq()); assert(main_allele_rv.size() == reference_allele.seq().size()); auto main_rv_it = main_allele_rv.begin(); auto reference_rit = reference_allele.seq().begin(); uint num_correct_bases = 0; while (main_rv_it != main_allele_rv.end()) { if ((*main_rv_it == *reference_rit) and (*main_rv_it != 'N')) { num_correct_bases++; } main_rv_it++; reference_rit++; } assert(num_correct_bases <= main_allele_rv.size()); assert(reference_rit == reference_allele.seq().end()); if ((static_cast<float>(num_correct_bases)/main_allele_rv.size()) < min_match_fraction) { return false; } else { return true; } }