Exemplo n.º 1
0
static bool conversion_is_correct(GthChain *orig_chain,
                                  GthInvertedChain *inverted_chain,
                                  unsigned long gen_total_length,
                                  unsigned long gen_offset)
{
  GthChain *check_chain;
  unsigned long i;

  check_chain = gth_chain_new();
  convert_inverted_chain_to_chain(check_chain, inverted_chain, gen_total_length,
                                  gen_offset);

  /* compare number of (potential) exons */
  if ((gt_array_size(orig_chain->forwardranges) !=
       gt_array_size(check_chain->forwardranges)) ||
      (gt_array_size(orig_chain->reverseranges) !=
       gt_array_size(check_chain->reverseranges))) {
    gth_chain_delete(check_chain);
    return false;
  }

  /* compare positions of (potential) exon */
  for (i = 0; i < gt_array_size(orig_chain->forwardranges); i++) {
    if ((((GtRange*)gt_array_get(orig_chain->forwardranges, i))->start !=
         ((GtRange*)gt_array_get(check_chain->forwardranges, i))->start) ||
        (((GtRange*)gt_array_get(orig_chain->forwardranges, i))->end !=
         ((GtRange*)gt_array_get(check_chain->forwardranges, i))->end) ||
        (((GtRange*)gt_array_get(orig_chain->reverseranges, i))->start !=
         ((GtRange*)gt_array_get(check_chain->reverseranges, i))->start) ||
        (((GtRange*)gt_array_get(orig_chain->reverseranges, i))->end !=
         ((GtRange*)gt_array_get(check_chain->reverseranges, i))->end)) {
      gth_chain_delete(check_chain);
      return false;
    }
  }

  gth_chain_delete(check_chain);

  return true;
}
Exemplo n.º 2
0
void gth_save_chain(GtChain *chain, GtFragment *fragments,
                    unsigned long num_of_fragments,
                    GT_UNUSED unsigned long max_gap_width,
                    void *data)
{
  GthSaveChainInfo *info = (GthSaveChainInfo*) data;
  GtRange range;
  GthChain *gth_chain;
  unsigned long i, fragnum;

  gt_assert(chain_is_colinear(chain, fragments));

  if (info->comments) {
    gt_file_xprintf(info->outfp, "%c process global chain with score %ld\n",
                       COMMENTCHAR, gt_chain_get_score(chain));
    gt_file_xprintf(info->outfp, "%c process global chain with the "
                       "following fragments\n", COMMENTCHAR);
    for (i = 0; i < gt_chain_size(chain); i++)
      showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp);
  }

  /* init */
  gth_chain = gth_chain_new();
  gth_chain->gen_file_num = info->gen_file_num;
  gth_chain->gen_seq_num  = info->gen_seq_num;
  gth_chain->ref_file_num = info->ref_file_num;
  gth_chain->ref_seq_num  = info->ref_seq_num;

  /* chain has a minimum length of 1 */
  gt_assert(gt_chain_size(chain));

  /* global chain filter */
  if (globalchainislongenough(chain, fragments,
                              &gth_chain->refseqcoverage, info->gcmincoverage,
                              info->referencelength, info->stat, info->comments,
                              info->outfp)) {
    /* save all potential exons */
    for (i = 0; i < gt_chain_size(chain); i++) {
      fragnum = gt_chain_get_fragnum(chain, i);
      range.start = fragments[fragnum].startpos2;
      range.end = fragments[fragnum].endpos2;

      /* check for overlap */
      if (i > 0 &&
         range.start <=
         ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end) {
        /* overlap found -> modify last range */
        gt_assert(((GtRange*) gt_array_get_last(gth_chain->forwardranges))
                  ->end <= range.end);
        ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end =
          range.end;
      }
      else {
#ifndef NDEBUG
        if (i > 0) {
          /* gap width is smaller or equal than the maximum gap width */
          gt_assert((range.start - 1 -
                 ((GtRange*) gt_array_get_last(gth_chain->forwardranges))
                 ->end + 1 - 1) <= max_gap_width);
        }
#endif
        /* save range */
        gt_array_add(gth_chain->forwardranges, range);
      }
    }

    GtRange genomicrange = chain_get_genomicrange(gth_chain);

    if (info->enrichchains) {
      enrich_chain(gth_chain, fragments, num_of_fragments, info->comments,
                   info->outfp);
    }

    gt_assert(gt_ranges_are_consecutive(gth_chain->forwardranges));

    /* copy ranges to opposite strand */
    gt_ranges_copy_to_opposite_strand(gth_chain->reverseranges,
                                      gth_chain->forwardranges,
                                      info->gen_total_length,
                                      info->gen_offset);

    /* compute jump table if necessary */
    if (info->jump_table) {
      GthJumpTable *forward_jump_table, *reverse_jump_table;
      GtArray *chain_fragments;
      chain_fragments = make_list_of_chain_fragments(chain, fragments,
                                                     num_of_fragments,
                                                     info->enrichchains,
                                                     &genomicrange);
      forward_jump_table =
        info->jump_table_new(gt_array_get_space(chain_fragments),
                             gt_array_size(chain_fragments), info->jtdebug);
      reverse_jump_table =
        info->jump_table_new_reverse(forward_jump_table,
                                     info->gen_total_length, info->gen_offset,
                                     info->ref_total_length, info->ref_offset);
      gt_assert(!gth_chain->forward_jump_table);
      gth_chain->forward_jump_table = forward_jump_table;
      gt_assert(!gth_chain->reverse_jump_table);
      gth_chain->reverse_jump_table = reverse_jump_table;
      gt_array_delete(chain_fragments);
      gth_chain->jump_table_delete = info->jump_table_delete;
    }

    /* save array of potential exons */
    gth_chain_collection_add(info->chain_collection, gth_chain);
    if (info->comments) {
      gt_file_xprintf(info->outfp, "%c global chain with the following "
                                   "ranges has been saved\n",COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c forward ranges:\n", COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR);
      gt_ranges_show(gth_chain->forwardranges, info->outfp);
      gt_file_xprintf(info->outfp, "%c reverse ranges:\n", COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR);
      gt_ranges_show(gth_chain->reverseranges, info->outfp);
    }

    /* output stored chains here
       (Mohamed needed this to compare the chaining phase of gth with CHAINER)
     */
    if (info->stopafterchaining) {
      gt_file_xprintf(info->outfp,
                      "%c gl. chain with coverage=%.2f and score %ld "
                      "(genseq=%lu, str.=%c, refseq=%lu)\n", COMMENTCHAR,
                      gth_chain->refseqcoverage, gt_chain_get_score(chain),
                      gth_chain->gen_seq_num, SHOWSTRAND(info->directmatches),
                      gth_chain->ref_seq_num);

      for (i = 0; i < gt_chain_size(chain); i++)
        showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp);
    }
  }
  else {
    /* for -paralogs this case is not supposed to occur */
    gt_assert(!info->paralogs);
    if (info->comments)
      gt_file_xprintf(info->outfp, "%c global chain discarded\n",
                         COMMENTCHAR);
    gth_chain_delete(gth_chain);
  }
}
static int callsahmt(bool call_dna_dp,
                     GthSA *sa,
                     bool forward,
                     GtUword gen_file_num,
                     GtUword ref_file_num,
                     GthChain *raw_chain,
                     GtUword gen_total_length,
                     GtUword gen_offset,
                     const GtRange *gen_seq_bounds,
                     const GtRange *gen_seq_bounds_rc,
                     const unsigned char *ref_seq_tran,
                     const unsigned char *ref_seq_orig,
                     GtUword ref_total_length,
                     GtUword ref_offset,
                     GthInput *input,
                     Introncutoutinfo *introncutoutinfo,
                     GthStat *stat,
                     GtUword chainctr,
                     GtUword num_of_chains,
                     GtUword translationtable,
                     bool directmatches,
                     bool proteinexonpenal,
                     GthSpliceSiteModel *splice_site_model,
                     GthDPOptionsCore *dp_options_core,
                     GthDPOptionsEST *dp_options_est,
                     GthDPOptionsPostpro *dp_options_postpro,
                     GthDNACompletePathMatrixJT dna_complete_path_matrix_jt,
                     GthProteinCompletePathMatrixJT
                     protein_complete_path_matrix_jt,
                     GthOutput *out)
{
  int rval;
  GthChain *actual_chain, *contracted_chain, *used_chain;
  GtUword icdelta = introncutoutinfo->icinitialdelta,
                iciterations = introncutoutinfo->iciterations;
  bool useintroncutout = introncutoutinfo->introncutout;
  /* initially useintron is set to the value of introncutoutinfo->introncutout,
     if the automatic intron cutotu technique is acitvated it can be set to
     true if an matrix allocation error (ERROR_MATRIX_ALLOCATION_FAILED) occurs
   */

  gt_assert(sa);

  actual_chain = gth_chain_new();
  contracted_chain = gth_chain_new();

  for (;;) {
    /* reset actualDPrange; */
    gt_array_set_size(actual_chain->forwardranges, 0);
    gt_array_set_size(actual_chain->reverseranges, 0);

    /* copy raw chain to actual chain */
    gth_chain_copy(actual_chain, raw_chain);

    /* shorten potential introns and compute spliced sequence, if the intron
       cutout technique is used */
    if (useintroncutout) {
      /* shorten potential introns */
      gth_chain_shorten_introns(actual_chain, icdelta,
                                introncutoutinfo->icminremintronlength,
                                gen_total_length, gen_offset, out->comments,
                                out->outfp);
    }
    else
      gth_chain_contract(contracted_chain, actual_chain);

    if (out->showverbose) {
      show_matrix_calculation_status(out->showverbose, forward,
                                     gth_sa_ref_strand_forward(sa),
                                     useintroncutout, chainctr, num_of_chains,
                                     icdelta, gen_file_num,
                                     gth_input_num_of_gen_files(input),
                                     ref_file_num,
                                     gth_input_num_of_ref_files(input),
                                     directmatches, out->verboseseqs,
                                     gth_sa_gen_id(sa), gth_sa_ref_id(sa));
    }

    /* allocate space for DP parameter */
    if (out->comments) {
      gt_file_xprintf(out->outfp, "%c alloc space for DP param "
                         "(genomicid=%s, referenceid=%s)\n", COMMENTCHAR,
                         gth_sa_gen_id(sa), gth_sa_ref_id(sa));
    }
    used_chain = useintroncutout ? actual_chain : contracted_chain;

    /* The variable 'forward' denotes the genomic strand on which the DP is
       applied. */
    if (forward) {
      if (call_dna_dp) {
        rval = gth_align_dna(sa, used_chain->forwardranges,
                             gth_input_current_gen_seq_tran(input),
                             gth_input_current_gen_seq_orig(input),
                             ref_seq_tran, ref_seq_orig, ref_total_length,
                             gth_input_current_gen_alphabet(input),
                             gth_input_current_ref_alphabet(input),
                             useintroncutout,
                             introncutoutinfo->autoicmaxmatrixsize,
                             out->showeops, out->comments, out->gs2out,
                             gen_seq_bounds, splice_site_model, dp_options_core,
                             dp_options_est, dp_options_postpro,
                             dna_complete_path_matrix_jt,
                             raw_chain->forward_jump_table, ref_offset, stat,
                             out->outfp);
      }
      else { /* call_protein_dp */
        rval = gth_align_protein(sa, used_chain->forwardranges,
                                 gth_input_current_gen_seq_tran(input),
                                 ref_seq_tran, ref_seq_orig, ref_total_length,
                                 gth_input_current_gen_alphabet(input),
                                 gth_input_current_ref_alphabet(input),
                                 input, useintroncutout,
                                 introncutoutinfo->autoicmaxmatrixsize,
                                 proteinexonpenal, out->showeops, out->comments,
                                 out->gs2out, translationtable, gen_seq_bounds,
                                 splice_site_model, dp_options_core,
                                 dp_options_postpro,
                                 protein_complete_path_matrix_jt,
                                 raw_chain->forward_jump_table, ref_offset,
                                 stat, out->outfp);
      }
    }
    else {
      /* the DP is called with the revers positions specifiers */
      if (call_dna_dp) {
        rval = gth_align_dna(sa, used_chain->reverseranges,
                             gth_input_current_gen_seq_tran_rc(input),
                             gth_input_current_gen_seq_orig_rc(input),
                             ref_seq_tran, ref_seq_orig, ref_total_length,
                             gth_input_current_gen_alphabet(input),
                             gth_input_current_ref_alphabet(input),
                             useintroncutout,
                             introncutoutinfo->autoicmaxmatrixsize,
                             out->showeops, out->comments, out->gs2out,
                             gen_seq_bounds_rc, splice_site_model,
                             dp_options_core, dp_options_est,
                             dp_options_postpro, dna_complete_path_matrix_jt,
                             raw_chain->reverse_jump_table, ref_offset, stat,
                             out->outfp);
      }
      else { /* call_protein_dp */
        rval = gth_align_protein(sa, used_chain->reverseranges,
                                 gth_input_current_gen_seq_tran_rc(input),
                                 ref_seq_tran, ref_seq_orig, ref_total_length,
                                 gth_input_current_gen_alphabet(input),
                                 gth_input_current_ref_alphabet(input),
                                 input, useintroncutout,
                                 introncutoutinfo->autoicmaxmatrixsize,
                                 proteinexonpenal, out->showeops, out->comments,
                                 out->gs2out, translationtable, gen_seq_bounds,
                                 splice_site_model, dp_options_core,
                                 dp_options_postpro,
                                 protein_complete_path_matrix_jt,
                                 raw_chain->reverse_jump_table, ref_offset,
                                 stat, out->outfp);
      }
    }

    if (rval == GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED)
      return GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED;

    /* handling of special error codes ERROR_CUTOUT_NOT_IN_INTRON and
       ERROR_MATRIX_ALLOCATION_FAILED from DP
       the only possible special error code given back by this function is
       ERROR_SA_COULD_NOT_BE_DETERMINED */
#ifndef NDEBUG
    if (!useintroncutout) gt_assert(rval != GTH_ERROR_CUTOUT_NOT_IN_INTRON);
#endif
    if (useintroncutout && rval == GTH_ERROR_CUTOUT_NOT_IN_INTRON) {
      /* the intron cutout technique failed -> increase counter */
      gth_stat_increment_numofunsuccessfulintroncutoutDPs(stat);
      if (--iciterations > 0) {
        /* if an iterations is left, increase icdelta, decrease the remaining
           iterations, and continue the while-loop */
        icdelta += introncutoutinfo->icdeltaincrease;
        continue;
      }
      else {
        /* no iteration left, discard SA */
        gth_stat_increment_numofundeterminedSAs(stat);
        gth_chain_delete(actual_chain);
        gth_chain_delete(contracted_chain);
        return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED;
      }
    }
    else if (rval == GTH_ERROR_MATRIX_ALLOCATION_FAILED) {
      if (introncutoutinfo->autoicmaxmatrixsize > 0 && !useintroncutout) {
        /* if the automatic intron cutout technique is enabled and a ``normal''
           DP returned with the matrix allocation error, set useintroncutout,
           increase counter, and continue */
        if (out->showverbose) {
          out->showverbose("matrix allocation failed, use intron cutout "
                           "technique");
        }
        gth_stat_increment_numofautointroncutoutcalls(stat);
        useintroncutout = true;
        continue;
      }
      else {
        /* otherwise increase relevant statistics, free space and return with
           error */
        gth_stat_increment_numoffailedmatrixallocations(stat);
        gth_stat_increment_numofundeterminedSAs(stat);
        gth_chain_delete(actual_chain);
        gth_chain_delete(contracted_chain);
        return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED;
      }
    }
    else if (rval) /* ``normal'' DP */
      return -1;
    break;
  }

#if 0
  if (out->comments) {
    gt_file_xprintf(out->outfp, "%c this SA has been computed:\n", COMMENTCHAR);
    gth_sa_show(sa, input, out->outfp);
  }
#endif

  /* free */
  gth_chain_delete(actual_chain);
  gth_chain_delete(contracted_chain);

  return 0;
}