Beispiel #1
0
static unsigned long totallengthoffragments(GtChain *chain,
                                            GtFragment *fragments)
{
  GtRange currentrange, previousrange;
  unsigned long i, fragnum;
  long totallength = 0;

  previousrange.end = GT_UNDEF_ULONG;

  for (i = 0; i < gt_chain_size(chain); i++) {
    fragnum = gt_chain_get_fragnum(chain, i);
    currentrange.start  = fragments[fragnum].startpos1;
    currentrange.end = fragments[fragnum].endpos1;

    /* currentrange is forward */
    gt_assert(currentrange.start <= currentrange.end);

    totallength += currentrange.end - currentrange.start + 1;

    if (i > 0) {
      /* subtract overlaps if necessary */
      if (currentrange.start <= previousrange.end)
        totallength -= previousrange.end - currentrange.start + 1;
    }

    previousrange = currentrange;
  }

  gt_assert(totallength > 0);

  return totallength;
}
Beispiel #2
0
static bool chain_is_colinear(GtChain *chain, GtFragment *fragments)
{
  GtFragment *firstfragment, *secondfragment;
  unsigned long i;

  if (gt_chain_size(chain) > 1) {
    for (i = 0; i < gt_chain_size(chain) - 1; i++) {
      firstfragment  = fragments + gt_chain_get_fragnum(chain, i);
      secondfragment = fragments + gt_chain_get_fragnum(chain, i+1);

      if (firstfragment->startpos1 >= secondfragment->startpos1 ||
          firstfragment->endpos1   >= secondfragment->endpos1   ||
          firstfragment->startpos2 >= secondfragment->startpos2 ||
          firstfragment->endpos2   >= secondfragment->endpos2) {
        return false;
      }
    }
  }
  return true;
}
static void gt_ltrdigest_pdom_visitor_chainproc(GtChain *c, GtFragment *f,
                                             GT_UNUSED GtUword nof_frags,
                                             GT_UNUSED GtUword gap_length,
                                             void *data)
{
  GtUword i,
                *chainno = (GtUword*) data;
  gt_log_log("resulting chain has "GT_WD" GtFragments, score "GT_WD"",
             gt_chain_size(c),
             gt_chain_get_score(c));
  for (i = 0; i < gt_chain_size(c); i++) {
    GtFragment frag;
    frag = f[gt_chain_get_fragnum(c, i)];
    gt_log_log("("GT_WU" "GT_WU") ("GT_WU" "GT_WU")", frag.startpos1,
               frag.endpos1, frag.startpos2, frag.endpos2);
    gt_array_add(((GtHMMERSingleHit*) frag.data)->chains, *chainno);
  }
  (*chainno)++;
  gt_log_log("\n");
}
static void gt_ltrdigest_pdom_visitor_chainproc(GtChain *c, GtFragment *f,
                                             GT_UNUSED unsigned long nof_frags,
                                             GT_UNUSED unsigned long gap_length,
                                             void *data)
{
  unsigned long i,
                *chainno = (unsigned long*) data;
  gt_log_log("resulting chain has %ld GtFragments, score %ld",
             gt_chain_size(c),
             gt_chain_get_score(c));
  for (i = 0; i < gt_chain_size(c); i++) {
    GtFragment frag;
    frag = f[gt_chain_get_fragnum(c, i)];
    gt_log_log("(%lu %lu) (%lu %lu)", frag.startpos1, frag.endpos1,
                                      frag.startpos2, frag.endpos2);
    gt_array_add(((GtHMMERSingleHit*) frag.data)->chains, *chainno);
  }
  (*chainno)++;
  gt_log_log("\n");
}
Beispiel #5
0
static GtArray* make_list_of_chain_fragments(GtChain *chain,
                                             GtFragment *fragments,
                                             unsigned long num_of_fragments,
                                             bool enrichchains,
                                             const GtRange *genomicrange)
{
  unsigned long i, fragnum;
  GtArray *chain_fragments;
  GthJTMatch match;
  gt_assert(chain && fragments && num_of_fragments);
  chain_fragments = gt_array_new(sizeof (GthJTMatch));
  if (!enrichchains) {
    /* no chain enrichment used -> store all fragments from chain */
    for (i = 0; i < gt_chain_size(chain); i++) {
      fragnum = gt_chain_get_fragnum(chain, i);
      match.gen_range.start = fragments[fragnum].startpos2;
      match.gen_range.end   = fragments[fragnum].endpos2;
      match.ref_range.start = fragments[fragnum].startpos1;
      match.ref_range.end   = fragments[fragnum].endpos1;
      gt_array_add(chain_fragments, match);
    }
  }
  else {
    GtRange fragmentrange;
    /* chain enrichment used -> store all fragments which overlap with genomic
       range of computed chain */
    for (i = 0; i < num_of_fragments; i++) {
      fragmentrange.start  = fragments[i].startpos2;
      fragmentrange.end = fragments[i].endpos2;
      if (gt_range_overlap(genomicrange, &fragmentrange)) {
        match.gen_range.start = fragments[i].startpos2;
        match.gen_range.end   = fragments[i].endpos2;
        match.ref_range.start = fragments[i].startpos1;
        match.ref_range.end   = fragments[i].endpos1;
        gt_array_add(chain_fragments, match);
      }
    }
  }
  return chain_fragments;
}
Beispiel #6
0
void gth_save_chain(GtChain *chain, GtFragment *fragments,
                    unsigned long num_of_fragments,
                    GT_UNUSED unsigned long max_gap_width,
                    void *data)
{
  GthSaveChainInfo *info = (GthSaveChainInfo*) data;
  GtRange range;
  GthChain *gth_chain;
  unsigned long i, fragnum;

  gt_assert(chain_is_colinear(chain, fragments));

  if (info->comments) {
    gt_file_xprintf(info->outfp, "%c process global chain with score %ld\n",
                       COMMENTCHAR, gt_chain_get_score(chain));
    gt_file_xprintf(info->outfp, "%c process global chain with the "
                       "following fragments\n", COMMENTCHAR);
    for (i = 0; i < gt_chain_size(chain); i++)
      showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp);
  }

  /* init */
  gth_chain = gth_chain_new();
  gth_chain->gen_file_num = info->gen_file_num;
  gth_chain->gen_seq_num  = info->gen_seq_num;
  gth_chain->ref_file_num = info->ref_file_num;
  gth_chain->ref_seq_num  = info->ref_seq_num;

  /* chain has a minimum length of 1 */
  gt_assert(gt_chain_size(chain));

  /* global chain filter */
  if (globalchainislongenough(chain, fragments,
                              &gth_chain->refseqcoverage, info->gcmincoverage,
                              info->referencelength, info->stat, info->comments,
                              info->outfp)) {
    /* save all potential exons */
    for (i = 0; i < gt_chain_size(chain); i++) {
      fragnum = gt_chain_get_fragnum(chain, i);
      range.start = fragments[fragnum].startpos2;
      range.end = fragments[fragnum].endpos2;

      /* check for overlap */
      if (i > 0 &&
         range.start <=
         ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end) {
        /* overlap found -> modify last range */
        gt_assert(((GtRange*) gt_array_get_last(gth_chain->forwardranges))
                  ->end <= range.end);
        ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end =
          range.end;
      }
      else {
#ifndef NDEBUG
        if (i > 0) {
          /* gap width is smaller or equal than the maximum gap width */
          gt_assert((range.start - 1 -
                 ((GtRange*) gt_array_get_last(gth_chain->forwardranges))
                 ->end + 1 - 1) <= max_gap_width);
        }
#endif
        /* save range */
        gt_array_add(gth_chain->forwardranges, range);
      }
    }

    GtRange genomicrange = chain_get_genomicrange(gth_chain);

    if (info->enrichchains) {
      enrich_chain(gth_chain, fragments, num_of_fragments, info->comments,
                   info->outfp);
    }

    gt_assert(gt_ranges_are_consecutive(gth_chain->forwardranges));

    /* copy ranges to opposite strand */
    gt_ranges_copy_to_opposite_strand(gth_chain->reverseranges,
                                      gth_chain->forwardranges,
                                      info->gen_total_length,
                                      info->gen_offset);

    /* compute jump table if necessary */
    if (info->jump_table) {
      GthJumpTable *forward_jump_table, *reverse_jump_table;
      GtArray *chain_fragments;
      chain_fragments = make_list_of_chain_fragments(chain, fragments,
                                                     num_of_fragments,
                                                     info->enrichchains,
                                                     &genomicrange);
      forward_jump_table =
        info->jump_table_new(gt_array_get_space(chain_fragments),
                             gt_array_size(chain_fragments), info->jtdebug);
      reverse_jump_table =
        info->jump_table_new_reverse(forward_jump_table,
                                     info->gen_total_length, info->gen_offset,
                                     info->ref_total_length, info->ref_offset);
      gt_assert(!gth_chain->forward_jump_table);
      gth_chain->forward_jump_table = forward_jump_table;
      gt_assert(!gth_chain->reverse_jump_table);
      gth_chain->reverse_jump_table = reverse_jump_table;
      gt_array_delete(chain_fragments);
      gth_chain->jump_table_delete = info->jump_table_delete;
    }

    /* save array of potential exons */
    gth_chain_collection_add(info->chain_collection, gth_chain);
    if (info->comments) {
      gt_file_xprintf(info->outfp, "%c global chain with the following "
                                   "ranges has been saved\n",COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c forward ranges:\n", COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR);
      gt_ranges_show(gth_chain->forwardranges, info->outfp);
      gt_file_xprintf(info->outfp, "%c reverse ranges:\n", COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR);
      gt_ranges_show(gth_chain->reverseranges, info->outfp);
    }

    /* output stored chains here
       (Mohamed needed this to compare the chaining phase of gth with CHAINER)
     */
    if (info->stopafterchaining) {
      gt_file_xprintf(info->outfp,
                      "%c gl. chain with coverage=%.2f and score %ld "
                      "(genseq=%lu, str.=%c, refseq=%lu)\n", COMMENTCHAR,
                      gth_chain->refseqcoverage, gt_chain_get_score(chain),
                      gth_chain->gen_seq_num, SHOWSTRAND(info->directmatches),
                      gth_chain->ref_seq_num);

      for (i = 0; i < gt_chain_size(chain); i++)
        showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp);
    }
  }
  else {
    /* for -paralogs this case is not supposed to occur */
    gt_assert(!info->paralogs);
    if (info->comments)
      gt_file_xprintf(info->outfp, "%c global chain discarded\n",
                         COMMENTCHAR);
    gth_chain_delete(gth_chain);
  }
}