Ejemplo n.º 1
0
static void potentialintronspostpro(GtArray *intronstoprocess,
                                    unsigned long icdelta,
                                    unsigned long icminremintronlength)
{
  GtArray *originalintrons;
  GtRange potintron;
  unsigned long i, potintronlength,
       minintronlength = 2 * icdelta + icminremintronlength;

  originalintrons = gt_array_new(sizeof (GtRange));

  /* save all (potential) introns */
  gt_array_add_array(originalintrons, intronstoprocess);

  /* reset introns to process */
  gt_array_set_size(intronstoprocess, 0);

  /* store introns */
  for (i = 0; i < gt_array_size(originalintrons); i++) {
    potintron       = *(GtRange*) gt_array_get(originalintrons, i);
    potintronlength = potintron.end - potintron.start + 1;

    if (potintronlength >= minintronlength) {
      /* keep this intron (plus/minus intron deltas)
         that is, this intron is cut out later */
      potintron.start  += icdelta;
      potintron.end -= icdelta;
      gt_array_add(intronstoprocess, potintron);
    }
    /* else: skip this intron
       that is, this intron is not cut out later */
  }

  gt_array_delete(originalintrons);
}
Ejemplo n.º 2
0
void gth_backtrace_path_cutoff_start(GthBacktracePath *bp)
{
  gt_assert(bp);
  gt_assert(bp->gen_dp_start != GT_UNDEF_ULONG);
  gt_assert(bp->gen_dp_length != GT_UNDEF_ULONG);
  gt_assert(bp->ref_dp_start != GT_UNDEF_ULONG);
  gt_assert(bp->ref_dp_length != GT_UNDEF_ULONG);
  if (bp->cutoffs.start.genomiccutoff) {
    bp->gen_dp_start += bp->cutoffs.start.genomiccutoff;
    gt_assert(bp->gen_dp_length >= bp->cutoffs.start.genomiccutoff);
    bp->gen_dp_length -= bp->cutoffs.start.genomiccutoff;
    bp->cutoffs.start.genomiccutoff = 0;
  }
  if (bp->cutoffs.start.referencecutoff) {
    bp->ref_dp_start += bp->cutoffs.start.referencecutoff;
    gt_assert(bp->ref_dp_length >= bp->cutoffs.start.referencecutoff);
    bp->ref_dp_length -= bp->cutoffs.start.referencecutoff;
    bp->cutoffs.start.referencecutoff = 0;
  }
  if (bp->cutoffs.start.eopcutoff) {
    gt_array_set_size(bp->editoperations,
                      gt_array_size(bp->editoperations) -
                      bp->cutoffs.start.eopcutoff);
    bp->cutoffs.start.eopcutoff = 0;
  }
}
Ejemplo n.º 3
0
void gth_backtrace_path_reset(GthBacktracePath *bp)
{
  gt_assert(bp);
  gt_array_set_size(bp->editoperations, 0);
  bp->alphatype = UNDEF_ALPHA;
  bp->dummy_index = GT_UNDEF_ULONG;
}
Ejemplo n.º 4
0
void gth_chain_shorten_introns(GthChain *chain, unsigned long icdelta,
                               unsigned long icminremintronlength,
                               unsigned long gen_total_length,
                               unsigned long gen_offset, bool comments,
                               GtFile *outfp)
{
  GthInvertedChain inverted_chain;

  gt_assert(chain);

  /* init */
  inverted_chain_init(&inverted_chain);

  if (comments) {
    gt_file_xprintf(outfp, "%c forward DP ranges (before post processing of "
                       "potential introns):\n", COMMENTCHAR);
    gt_file_xprintf(outfp, "%c ", COMMENTCHAR);
    gt_ranges_show(chain->forwardranges, outfp);
  }

  /* chain -> inverted_chain */
  convert_chain_to_inverted_chain(&inverted_chain, chain);
  gt_assert(conversion_is_correct(chain, &inverted_chain, gen_total_length,
                                  gen_offset));

  /* post processing of potential introns */
  potentialintronspostpro(inverted_chain.forwardranges, icdelta,
                          icminremintronlength);

  /* reset chain */
  gt_array_set_size(chain->forwardranges, 0);
  gt_array_set_size(chain->reverseranges, 0);

  /* inverted_chain -> chain */
  convert_inverted_chain_to_chain(chain, &inverted_chain, gen_total_length,
                                  gen_offset);

  if (comments) {
    gt_file_xprintf(outfp,"%c forward DP ranges (after post processing of "
                       "potential introns):\n" , COMMENTCHAR);
    gt_file_xprintf(outfp, "%c ", COMMENTCHAR);
    gt_ranges_show(chain->forwardranges, outfp);
  }

  /* free space */
  inverted_chain_free(&inverted_chain);
}
Ejemplo n.º 5
0
void gth_backtrace_path_cutoff_walked_path(GthBacktracePath *bp,
                                           const GthPathWalker *pw,
                                           bool showeops, GtFile *outfp)
{
  unsigned int length;
  gt_assert(bp && pw);
  if (gth_path_walker_is_forward(pw)) {
    gt_assert(!backtrace_path_start_cutoffs_are_set(bp));
    if (showeops) {
      gt_file_xprintf(outfp, "%s(): show path walker\n", __func__);
      gth_path_walker_show(pw, outfp);
      gt_file_xprintf(outfp, "%s(): show backtrace path (before eop "
                         "removal)\n", __func__);
      gth_backtrace_path_show(bp, false, 0, outfp);
    }
    /* remove complete eops */
    gt_array_set_size(bp->editoperations,
                      gt_array_size(bp->editoperations) -
                      gth_path_walker_actual_eops(pw));
    if (showeops) {
      gt_file_xprintf(outfp, "%s(): show backtrace path (after eop "
                         "removal)\n", __func__);
      gth_backtrace_path_show(bp, false, 0, outfp);
    }
    /* remove part of last eop */
    if (gth_path_walker_steps_in_current_eop(pw)) {
      length = gt_editoperation_length(*(Editoperation*)
                                    gt_array_get_last(bp->editoperations),
                                    bp->alphatype == PROTEIN_ALPHA);
      gt_assert(length > gth_path_walker_steps_in_current_eop(pw));
      gt_editoperation_set_length(gt_array_get_last(bp->editoperations),
                               length-gth_path_walker_steps_in_current_eop(pw),
                               bp->alphatype == PROTEIN_ALPHA);
    }
    /* adjusting genomic and reference DP ranges */
    bp->gen_dp_start += gth_path_walker_gen_distance(pw);
    bp->gen_dp_length -= gth_path_walker_gen_distance(pw);
    bp->ref_dp_start += gth_path_walker_ref_distance(pw);
    bp->ref_dp_length -= gth_path_walker_ref_distance(pw);
  }
  else {
    gt_assert(0); /* XXX: implement reverse case */
    gt_assert(!backtrace_path_end_cutoffs_are_set(bp));
  }
}
Ejemplo n.º 6
0
static void ensure_eop_of_len_1_before_introns(GtArray *editoperations)
{
  Editoperation eop, *eopptr;
  Eoptype eoptype;
  unsigned long eoplength;
  GtArray *backup;
  bool processing_necessary = false,
       split_match          = false;

  /* check if processing is necessary
     the check is rather simple, it might be possible that
     ``processing_necessary'' is set to ``true'' whereas in fact no processing
     is necessary */
  for (eopptr = gt_array_get_space(editoperations);
       eopptr < (Editoperation*) gt_array_get_space(editoperations) +
                                 gt_array_size(editoperations) - 1;
       eopptr++) {
    if ((eoptype = gt_editoperation_type(*eopptr, true)) ==
        EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
        eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
      processing_necessary = true;
      break;
    }
  }

  if (processing_necessary) {
    /* init backup for the editoperations */
    backup = gt_array_new(sizeof (Editoperation));

    /* fill backup */
    gt_array_add_array(backup, editoperations);

    /* reset the original edit operations */
    gt_array_set_size(editoperations, 0);

    /* process the backup and fill the original editoperations */
    for (eopptr = gt_array_get_space(backup);
         eopptr < (Editoperation*)
                  gt_array_get_space(backup) + gt_array_size(backup);
         eopptr++) {

      if ((eoptype = gt_editoperation_length(*eopptr, true)) ==
          EOP_TYPE_INTRON_WITH_1_BASE_LEFT ||
          eoptype == EOP_TYPE_INTRON_WITH_2_BASES_LEFT) {
        split_match = true;
      }
      else if (split_match) {
        if (eoptype == EOP_TYPE_MATCH) {
          split_match = false;
          if ((eoplength = gt_editoperation_length(*eopptr, true)) > 1) {
            eop = 1;
            gt_array_add(editoperations, eop);
            eop = eoplength - 1;
            gt_array_add(editoperations, eop);
            continue;
          }
        }
        else if (eoptype == EOP_TYPE_MISMATCH ||
                 eoptype == EOP_TYPE_MISMATCH_WITH_1_GAP) {
          split_match = false;
        }
      }
      gt_array_add(editoperations, *eopptr);
    }

    /* free backup */
    gt_array_delete(backup);
  }
}
static int callsahmt(bool call_dna_dp,
                     GthSA *sa,
                     bool forward,
                     GtUword gen_file_num,
                     GtUword ref_file_num,
                     GthChain *raw_chain,
                     GtUword gen_total_length,
                     GtUword gen_offset,
                     const GtRange *gen_seq_bounds,
                     const GtRange *gen_seq_bounds_rc,
                     const unsigned char *ref_seq_tran,
                     const unsigned char *ref_seq_orig,
                     GtUword ref_total_length,
                     GtUword ref_offset,
                     GthInput *input,
                     Introncutoutinfo *introncutoutinfo,
                     GthStat *stat,
                     GtUword chainctr,
                     GtUword num_of_chains,
                     GtUword translationtable,
                     bool directmatches,
                     bool proteinexonpenal,
                     GthSpliceSiteModel *splice_site_model,
                     GthDPOptionsCore *dp_options_core,
                     GthDPOptionsEST *dp_options_est,
                     GthDPOptionsPostpro *dp_options_postpro,
                     GthDNACompletePathMatrixJT dna_complete_path_matrix_jt,
                     GthProteinCompletePathMatrixJT
                     protein_complete_path_matrix_jt,
                     GthOutput *out)
{
  int rval;
  GthChain *actual_chain, *contracted_chain, *used_chain;
  GtUword icdelta = introncutoutinfo->icinitialdelta,
                iciterations = introncutoutinfo->iciterations;
  bool useintroncutout = introncutoutinfo->introncutout;
  /* initially useintron is set to the value of introncutoutinfo->introncutout,
     if the automatic intron cutotu technique is acitvated it can be set to
     true if an matrix allocation error (ERROR_MATRIX_ALLOCATION_FAILED) occurs
   */

  gt_assert(sa);

  actual_chain = gth_chain_new();
  contracted_chain = gth_chain_new();

  for (;;) {
    /* reset actualDPrange; */
    gt_array_set_size(actual_chain->forwardranges, 0);
    gt_array_set_size(actual_chain->reverseranges, 0);

    /* copy raw chain to actual chain */
    gth_chain_copy(actual_chain, raw_chain);

    /* shorten potential introns and compute spliced sequence, if the intron
       cutout technique is used */
    if (useintroncutout) {
      /* shorten potential introns */
      gth_chain_shorten_introns(actual_chain, icdelta,
                                introncutoutinfo->icminremintronlength,
                                gen_total_length, gen_offset, out->comments,
                                out->outfp);
    }
    else
      gth_chain_contract(contracted_chain, actual_chain);

    if (out->showverbose) {
      show_matrix_calculation_status(out->showverbose, forward,
                                     gth_sa_ref_strand_forward(sa),
                                     useintroncutout, chainctr, num_of_chains,
                                     icdelta, gen_file_num,
                                     gth_input_num_of_gen_files(input),
                                     ref_file_num,
                                     gth_input_num_of_ref_files(input),
                                     directmatches, out->verboseseqs,
                                     gth_sa_gen_id(sa), gth_sa_ref_id(sa));
    }

    /* allocate space for DP parameter */
    if (out->comments) {
      gt_file_xprintf(out->outfp, "%c alloc space for DP param "
                         "(genomicid=%s, referenceid=%s)\n", COMMENTCHAR,
                         gth_sa_gen_id(sa), gth_sa_ref_id(sa));
    }
    used_chain = useintroncutout ? actual_chain : contracted_chain;

    /* The variable 'forward' denotes the genomic strand on which the DP is
       applied. */
    if (forward) {
      if (call_dna_dp) {
        rval = gth_align_dna(sa, used_chain->forwardranges,
                             gth_input_current_gen_seq_tran(input),
                             gth_input_current_gen_seq_orig(input),
                             ref_seq_tran, ref_seq_orig, ref_total_length,
                             gth_input_current_gen_alphabet(input),
                             gth_input_current_ref_alphabet(input),
                             useintroncutout,
                             introncutoutinfo->autoicmaxmatrixsize,
                             out->showeops, out->comments, out->gs2out,
                             gen_seq_bounds, splice_site_model, dp_options_core,
                             dp_options_est, dp_options_postpro,
                             dna_complete_path_matrix_jt,
                             raw_chain->forward_jump_table, ref_offset, stat,
                             out->outfp);
      }
      else { /* call_protein_dp */
        rval = gth_align_protein(sa, used_chain->forwardranges,
                                 gth_input_current_gen_seq_tran(input),
                                 ref_seq_tran, ref_seq_orig, ref_total_length,
                                 gth_input_current_gen_alphabet(input),
                                 gth_input_current_ref_alphabet(input),
                                 input, useintroncutout,
                                 introncutoutinfo->autoicmaxmatrixsize,
                                 proteinexonpenal, out->showeops, out->comments,
                                 out->gs2out, translationtable, gen_seq_bounds,
                                 splice_site_model, dp_options_core,
                                 dp_options_postpro,
                                 protein_complete_path_matrix_jt,
                                 raw_chain->forward_jump_table, ref_offset,
                                 stat, out->outfp);
      }
    }
    else {
      /* the DP is called with the revers positions specifiers */
      if (call_dna_dp) {
        rval = gth_align_dna(sa, used_chain->reverseranges,
                             gth_input_current_gen_seq_tran_rc(input),
                             gth_input_current_gen_seq_orig_rc(input),
                             ref_seq_tran, ref_seq_orig, ref_total_length,
                             gth_input_current_gen_alphabet(input),
                             gth_input_current_ref_alphabet(input),
                             useintroncutout,
                             introncutoutinfo->autoicmaxmatrixsize,
                             out->showeops, out->comments, out->gs2out,
                             gen_seq_bounds_rc, splice_site_model,
                             dp_options_core, dp_options_est,
                             dp_options_postpro, dna_complete_path_matrix_jt,
                             raw_chain->reverse_jump_table, ref_offset, stat,
                             out->outfp);
      }
      else { /* call_protein_dp */
        rval = gth_align_protein(sa, used_chain->reverseranges,
                                 gth_input_current_gen_seq_tran_rc(input),
                                 ref_seq_tran, ref_seq_orig, ref_total_length,
                                 gth_input_current_gen_alphabet(input),
                                 gth_input_current_ref_alphabet(input),
                                 input, useintroncutout,
                                 introncutoutinfo->autoicmaxmatrixsize,
                                 proteinexonpenal, out->showeops, out->comments,
                                 out->gs2out, translationtable, gen_seq_bounds,
                                 splice_site_model, dp_options_core,
                                 dp_options_postpro,
                                 protein_complete_path_matrix_jt,
                                 raw_chain->reverse_jump_table, ref_offset,
                                 stat, out->outfp);
      }
    }

    if (rval == GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED)
      return GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED;

    /* handling of special error codes ERROR_CUTOUT_NOT_IN_INTRON and
       ERROR_MATRIX_ALLOCATION_FAILED from DP
       the only possible special error code given back by this function is
       ERROR_SA_COULD_NOT_BE_DETERMINED */
#ifndef NDEBUG
    if (!useintroncutout) gt_assert(rval != GTH_ERROR_CUTOUT_NOT_IN_INTRON);
#endif
    if (useintroncutout && rval == GTH_ERROR_CUTOUT_NOT_IN_INTRON) {
      /* the intron cutout technique failed -> increase counter */
      gth_stat_increment_numofunsuccessfulintroncutoutDPs(stat);
      if (--iciterations > 0) {
        /* if an iterations is left, increase icdelta, decrease the remaining
           iterations, and continue the while-loop */
        icdelta += introncutoutinfo->icdeltaincrease;
        continue;
      }
      else {
        /* no iteration left, discard SA */
        gth_stat_increment_numofundeterminedSAs(stat);
        gth_chain_delete(actual_chain);
        gth_chain_delete(contracted_chain);
        return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED;
      }
    }
    else if (rval == GTH_ERROR_MATRIX_ALLOCATION_FAILED) {
      if (introncutoutinfo->autoicmaxmatrixsize > 0 && !useintroncutout) {
        /* if the automatic intron cutout technique is enabled and a ``normal''
           DP returned with the matrix allocation error, set useintroncutout,
           increase counter, and continue */
        if (out->showverbose) {
          out->showverbose("matrix allocation failed, use intron cutout "
                           "technique");
        }
        gth_stat_increment_numofautointroncutoutcalls(stat);
        useintroncutout = true;
        continue;
      }
      else {
        /* otherwise increase relevant statistics, free space and return with
           error */
        gth_stat_increment_numoffailedmatrixallocations(stat);
        gth_stat_increment_numofundeterminedSAs(stat);
        gth_chain_delete(actual_chain);
        gth_chain_delete(contracted_chain);
        return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED;
      }
    }
    else if (rval) /* ``normal'' DP */
      return -1;
    break;
  }

#if 0
  if (out->comments) {
    gt_file_xprintf(out->outfp, "%c this SA has been computed:\n", COMMENTCHAR);
    gth_sa_show(sa, input, out->outfp);
  }
#endif

  /* free */
  gth_chain_delete(actual_chain);
  gth_chain_delete(contracted_chain);

  return 0;
}