bool binspector_parser_t::is_named_statement() { return is_invariant() || is_constant() || is_skip() || is_slot() || is_signal() || is_field(); // field should be last because atoms only // require an expression which most everything // falls into; the more explicit stuff should // come first. }
/* given a list of 5' and 3' splice sites extracted from a group, check whether they form valid pairs in all species */ int are_introns_okay(List *intron_splice, MSA *msa, List *problems, int offset5, int offset3) { int i, j, start1, start2; char str1[3], str2[3], str12[5]; char strand; int retval = 1; char * splice_pairs[3] = {"GTAG", "GCAG", "ATAC"}; str1[2] = '\0'; str2[2] = '\0'; if (lst_size(intron_splice) < 2) return 1; strand = ((GFF_Feature*)lst_get_ptr(intron_splice, 0))->strand; /* assume all same strand */ if (strand == '+') lst_qsort(intron_splice, feature_comparator_ascending); else lst_qsort(intron_splice, feature_comparator_descending); for (i = 0; i < lst_size(intron_splice) - 1; i++) { /* assume every 5' splice and immediately following 3' splice form a pair */ GFF_Feature *f1 = lst_get_ptr(intron_splice, i); GFF_Feature *f2 = lst_get_ptr(intron_splice, i+1); if (str_starts_with_charstr(f1->feature, SPLICE_5) && str_starts_with_charstr(f2->feature, SPLICE_3)) { start1 = f1->start - 1 + (strand == '-' ? offset5 : 0); start2 = f2->start - 1 + (strand == '+' ? offset3 : 0); for (j = 0; j < msa->nseqs; j++) { str1[0] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start1], j, 0); str1[1] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start1+1], j, 0); str2[0] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start2], j, 0); str2[1] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start2+1], j, 0); if (strand == '-') { msa_reverse_compl_seq(str1, 2); msa_reverse_compl_seq(str2, 2); } strcpy(str12, str1); strcat(str12, str2); if (!is_signal(str12, 3, splice_pairs, msa->is_missing)) { problem_add(problems, f1, BAD_INTRON, -1, -1); problem_add(problems, f2, BAD_INTRON, -1, -1); retval = 0; break; } } i++; /* no need to look at next feature */ } } return retval; }
static PHAST_INLINE int is_conserved_stop(GFF_Feature *feat, MSA *msa) { char tuplestr[4]; int j; int start = feat->start - 1; tuplestr[3] = '\0'; char * stop_signals[3] = { "TAA", "TAG", "TGA" }; for (j = 0; j < msa->nseqs; j++) { tuplestr[0] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start], j, 0); tuplestr[1] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start+1], j, 0); tuplestr[2] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start+2], j, 0); if (feat->strand == '-') msa_reverse_compl_seq(tuplestr, 3); if (!is_signal(tuplestr, 3, stop_signals, msa->is_missing)) return 0; } return 1; }
static PHAST_INLINE int is_conserved_3splice(GFF_Feature *feat, MSA *msa, int offset3, int splice_strict) { char tuplestr[3]; int j, start = feat->start - 1; /* base 0 indexing */ if (feat->strand == '+') start += offset3; tuplestr[2] = '\0'; char * splice_signals[2] = {"AG", "AC"}; for (j = 0; j < msa->nseqs; j++) { tuplestr[0] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start], j, 0); tuplestr[1] = ss_get_char_tuple(msa, msa->ss->tuple_idx[start+1], j, 0); if (feat->strand == '-') msa_reverse_compl_seq(tuplestr, 2); if (!is_signal(tuplestr, (splice_strict?1:2), splice_signals, msa->is_missing)) return 0; } return 1; }
// Return 'true' if current entry is signal bool NameList::current_is_signal() { const char *entry = current(); return is_signal(entry); }