Example #1
0
static bool chain_is_filled_and_consistent(GthChain *chain,
                                           unsigned long gen_total_length,
                                           unsigned long gen_offset)
{
  GtArray *testranges;

  /* check of file sequence numbers are defined */
  if (chain->gen_file_num == GT_UNDEF_ULONG ||
      chain->gen_seq_num  == GT_UNDEF_ULONG ||
      chain->ref_file_num == GT_UNDEF_ULONG ||
      chain->ref_seq_num  == GT_UNDEF_ULONG) {
    return false;
  }

  if (!gt_ranges_are_consecutive(chain->forwardranges))
    return false;

  testranges = gt_array_new(sizeof (GtRange));
  gt_ranges_copy_to_opposite_strand(testranges, chain->reverseranges,
                                    gen_total_length, gen_offset);
  if (!gt_ranges_are_equal(testranges, chain->forwardranges)) {
    gt_array_delete(testranges);
    return false;
  }

  gt_array_delete(testranges);

  return true;
}
Example #2
0
bool gth_sa_exons_are_forward_and_consecutive(const GthSA *sa)
{
  GtArray *ranges;
  gt_assert(sa);
  ranges = gt_array_new(sizeof (GtRange));
  gth_sa_get_exons(sa, ranges);
  if (!gt_ranges_are_consecutive(ranges)) {
    gt_array_delete(ranges);
    return false;
  }
  gt_array_delete(ranges);
  return true;
}
Example #3
0
void gth_save_chain(GtChain *chain, GtFragment *fragments,
                    unsigned long num_of_fragments,
                    GT_UNUSED unsigned long max_gap_width,
                    void *data)
{
  GthSaveChainInfo *info = (GthSaveChainInfo*) data;
  GtRange range;
  GthChain *gth_chain;
  unsigned long i, fragnum;

  gt_assert(chain_is_colinear(chain, fragments));

  if (info->comments) {
    gt_file_xprintf(info->outfp, "%c process global chain with score %ld\n",
                       COMMENTCHAR, gt_chain_get_score(chain));
    gt_file_xprintf(info->outfp, "%c process global chain with the "
                       "following fragments\n", COMMENTCHAR);
    for (i = 0; i < gt_chain_size(chain); i++)
      showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp);
  }

  /* init */
  gth_chain = gth_chain_new();
  gth_chain->gen_file_num = info->gen_file_num;
  gth_chain->gen_seq_num  = info->gen_seq_num;
  gth_chain->ref_file_num = info->ref_file_num;
  gth_chain->ref_seq_num  = info->ref_seq_num;

  /* chain has a minimum length of 1 */
  gt_assert(gt_chain_size(chain));

  /* global chain filter */
  if (globalchainislongenough(chain, fragments,
                              &gth_chain->refseqcoverage, info->gcmincoverage,
                              info->referencelength, info->stat, info->comments,
                              info->outfp)) {
    /* save all potential exons */
    for (i = 0; i < gt_chain_size(chain); i++) {
      fragnum = gt_chain_get_fragnum(chain, i);
      range.start = fragments[fragnum].startpos2;
      range.end = fragments[fragnum].endpos2;

      /* check for overlap */
      if (i > 0 &&
         range.start <=
         ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end) {
        /* overlap found -> modify last range */
        gt_assert(((GtRange*) gt_array_get_last(gth_chain->forwardranges))
                  ->end <= range.end);
        ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end =
          range.end;
      }
      else {
#ifndef NDEBUG
        if (i > 0) {
          /* gap width is smaller or equal than the maximum gap width */
          gt_assert((range.start - 1 -
                 ((GtRange*) gt_array_get_last(gth_chain->forwardranges))
                 ->end + 1 - 1) <= max_gap_width);
        }
#endif
        /* save range */
        gt_array_add(gth_chain->forwardranges, range);
      }
    }

    GtRange genomicrange = chain_get_genomicrange(gth_chain);

    if (info->enrichchains) {
      enrich_chain(gth_chain, fragments, num_of_fragments, info->comments,
                   info->outfp);
    }

    gt_assert(gt_ranges_are_consecutive(gth_chain->forwardranges));

    /* copy ranges to opposite strand */
    gt_ranges_copy_to_opposite_strand(gth_chain->reverseranges,
                                      gth_chain->forwardranges,
                                      info->gen_total_length,
                                      info->gen_offset);

    /* compute jump table if necessary */
    if (info->jump_table) {
      GthJumpTable *forward_jump_table, *reverse_jump_table;
      GtArray *chain_fragments;
      chain_fragments = make_list_of_chain_fragments(chain, fragments,
                                                     num_of_fragments,
                                                     info->enrichchains,
                                                     &genomicrange);
      forward_jump_table =
        info->jump_table_new(gt_array_get_space(chain_fragments),
                             gt_array_size(chain_fragments), info->jtdebug);
      reverse_jump_table =
        info->jump_table_new_reverse(forward_jump_table,
                                     info->gen_total_length, info->gen_offset,
                                     info->ref_total_length, info->ref_offset);
      gt_assert(!gth_chain->forward_jump_table);
      gth_chain->forward_jump_table = forward_jump_table;
      gt_assert(!gth_chain->reverse_jump_table);
      gth_chain->reverse_jump_table = reverse_jump_table;
      gt_array_delete(chain_fragments);
      gth_chain->jump_table_delete = info->jump_table_delete;
    }

    /* save array of potential exons */
    gth_chain_collection_add(info->chain_collection, gth_chain);
    if (info->comments) {
      gt_file_xprintf(info->outfp, "%c global chain with the following "
                                   "ranges has been saved\n",COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c forward ranges:\n", COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR);
      gt_ranges_show(gth_chain->forwardranges, info->outfp);
      gt_file_xprintf(info->outfp, "%c reverse ranges:\n", COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR);
      gt_ranges_show(gth_chain->reverseranges, info->outfp);
    }

    /* output stored chains here
       (Mohamed needed this to compare the chaining phase of gth with CHAINER)
     */
    if (info->stopafterchaining) {
      gt_file_xprintf(info->outfp,
                      "%c gl. chain with coverage=%.2f and score %ld "
                      "(genseq=%lu, str.=%c, refseq=%lu)\n", COMMENTCHAR,
                      gth_chain->refseqcoverage, gt_chain_get_score(chain),
                      gth_chain->gen_seq_num, SHOWSTRAND(info->directmatches),
                      gth_chain->ref_seq_num);

      for (i = 0; i < gt_chain_size(chain); i++)
        showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp);
    }
  }
  else {
    /* for -paralogs this case is not supposed to occur */
    gt_assert(!info->paralogs);
    if (info->comments)
      gt_file_xprintf(info->outfp, "%c global chain discarded\n",
                         COMMENTCHAR);
    gth_chain_delete(gth_chain);
  }
}
Example #4
0
/* XXX: change this function: add more sophisticated extension strategy */
void gth_chain_extend_borders(GthChain *chain, const GtRange *gen_seq_bounds,
                              const GtRange *gen_seq_bounds_rc,
                              GT_UNUSED unsigned long gen_total_length,
                              GT_UNUSED unsigned long gen_offset)
{
  long tmpborder;

  /* at least one range in chain */
  gt_assert(gt_array_size(chain->forwardranges));
  /* forward range borders are in considered genomic region */
  gt_assert(gt_ranges_borders_are_in_region(chain->forwardranges,
                                            gen_seq_bounds));
  /* reverse range borders are in considered genomic region */
  gt_assert(gt_ranges_borders_are_in_region(chain->reverseranges,
                                            gen_seq_bounds_rc));
  /* chain->forwardranges is forward and consecutive */
  gt_assert(gt_ranges_are_consecutive(chain->forwardranges));
  /* valid sequence bounds */
  gt_assert(gen_seq_bounds->start <= gen_seq_bounds->end);
  gt_assert(gen_seq_bounds_rc->start <= gen_seq_bounds_rc->end);

  /* set start border, forward strand */
  tmpborder = gt_safe_cast2long(((GtRange*)
                                 gt_array_get_first(chain->forwardranges))
                                 ->start);
  tmpborder -= DPEXTENSION;
  if (tmpborder < gt_safe_cast2long(gen_seq_bounds->start))
    tmpborder = gen_seq_bounds->start;
  ((GtRange*) gt_array_get_first(chain->forwardranges))->start =
    gt_safe_cast2ulong(tmpborder);

  /* set start border, reverse complement strand */
  tmpborder = gt_safe_cast2long(((GtRange*)
                                 gt_array_get_first(chain->reverseranges))
                                ->start);
  tmpborder -= DPEXTENSION;
  if (tmpborder < gt_safe_cast2long(gen_seq_bounds_rc->start))
    tmpborder = gen_seq_bounds_rc->start;
  ((GtRange*) gt_array_get_first(chain->reverseranges))->start =
    gt_safe_cast2ulong(tmpborder);

  /* set end border, forward strand */
  tmpborder = gt_safe_cast2long(((GtRange*)
                                gt_array_get_last(chain->forwardranges))
                                ->end);
  tmpborder += DPEXTENSION;
  if (tmpborder > gt_safe_cast2long(gen_seq_bounds->end))
    tmpborder = gen_seq_bounds->end;
  ((GtRange*) gt_array_get_last(chain->forwardranges))->end =
    gt_safe_cast2ulong(tmpborder);

  /* set end border, reverse complement strand */
  tmpborder = gt_safe_cast2long(((GtRange*)
                                gt_array_get_last(chain->reverseranges))
                                ->end);
  tmpborder += DPEXTENSION;
  if (tmpborder > gt_safe_cast2long(gen_seq_bounds_rc->end))
    tmpborder = gen_seq_bounds_rc->end;
  ((GtRange*) gt_array_get_last(chain->reverseranges))->end =
    gt_safe_cast2ulong(tmpborder);

  gt_assert(chain_is_filled_and_consistent(chain, gen_total_length,
                                           gen_offset));
}