Ejemplo n.º 1
0
/**
 * Remove redundant entries such as duplicates and substrings e.g.
 *  {T,TT,TT} -> {TT}
 *  {A,C,CG,CGC} -> {A,CGC}
 */
void gpath_subset_rmsubstr(GPathSubset *subset)
{
  if(subset->list.len <= 1) return;
  if(!subset->is_sorted) gpath_subset_sort(subset);

  size_t i, j, len = subset->list.len, min_juncs, ncols = subset->gpset->ncols;
  GPath **list = subset->list.b;

  // Work backwards to remove colours from subsumed paths
  for(i = len-1; i > 0; i--) {
    if(list[i] != NULL) {
      // work backwards over paths as j as they match
      for(j = i-1; j != SIZE_MAX; j--)
      {
        if(list[j] != NULL)
        {
          min_juncs = MIN2(list[i]->num_juncs, list[j]->num_juncs);

          // j can't be a subset of i if it's longer,
          // or orientations don't match
          if(list[i]->num_juncs < list[j]->num_juncs ||
             list[i]->orient != list[j]->orient ||
             binary_seqs_cmp(list[i]->seq, min_juncs,
                             list[j]->seq, min_juncs) != 0)
          {
            break;
          }
          else if(list[i]->num_juncs == list[j]->num_juncs)
          {
            // paths match, steal colours from j and remove it
            gpath_colset_or_mt(list[i], list[j], ncols);
            gpath_set_nseen_sum_mt(list[i], subset->gpset,
                                   list[j], subset->gpset);
            list[j] = NULL;
          }
          else
          {
            // path j is substring of i, remove colours from j that are in i
            // then remove path j only if all colours removed
            if(gpath_colset_rm_intersect(list[i], list[j], ncols) == 0)
              list[j] = NULL;
          }
        }
      }
    }
  }

  // loop over entries and remove empty ones
  for(i = j = 0; i < len; i++)
    if(list[i] != NULL)
      list[j++] = list[i];

  subset->list.len = j;
}
Ejemplo n.º 2
0
// Compare by orient, sequence
int gpath_cmp(const GPath *a, const GPath *b)
{
  int ret = (int)a->orient - (int)b->orient;
  return ret ? ret : binary_seqs_cmp(a->seq, a->num_juncs, b->seq, b->num_juncs);
}