/** * Remove redundant entries such as duplicates and substrings e.g. * {T,TT,TT} -> {TT} * {A,C,CG,CGC} -> {A,CGC} */ void gpath_subset_rmsubstr(GPathSubset *subset) { if(subset->list.len <= 1) return; if(!subset->is_sorted) gpath_subset_sort(subset); size_t i, j, len = subset->list.len, min_juncs, ncols = subset->gpset->ncols; GPath **list = subset->list.b; // Work backwards to remove colours from subsumed paths for(i = len-1; i > 0; i--) { if(list[i] != NULL) { // work backwards over paths as j as they match for(j = i-1; j != SIZE_MAX; j--) { if(list[j] != NULL) { min_juncs = MIN2(list[i]->num_juncs, list[j]->num_juncs); // j can't be a subset of i if it's longer, // or orientations don't match if(list[i]->num_juncs < list[j]->num_juncs || list[i]->orient != list[j]->orient || binary_seqs_cmp(list[i]->seq, min_juncs, list[j]->seq, min_juncs) != 0) { break; } else if(list[i]->num_juncs == list[j]->num_juncs) { // paths match, steal colours from j and remove it gpath_colset_or_mt(list[i], list[j], ncols); gpath_set_nseen_sum_mt(list[i], subset->gpset, list[j], subset->gpset); list[j] = NULL; } else { // path j is substring of i, remove colours from j that are in i // then remove path j only if all colours removed if(gpath_colset_rm_intersect(list[i], list[j], ncols) == 0) list[j] = NULL; } } } } } // loop over entries and remove empty ones for(i = j = 0; i < len; i++) if(list[i] != NULL) list[j++] = list[i]; subset->list.len = j; }
// Compare by orient, sequence int gpath_cmp(const GPath *a, const GPath *b) { int ret = (int)a->orient - (int)b->orient; return ret ? ret : binary_seqs_cmp(a->seq, a->num_juncs, b->seq, b->num_juncs); }