Example #1
0
void gth_backtrace_path_remove_zero_base_exons(GthBacktracePath *bp,
                                               GthStat *stat)
{
  gt_assert(bp);
  gt_remove_zero_base_exons(gth_backtrace_path_get(bp),
                         gth_backtrace_path_length(bp), stat);
}
Example #2
0
bool gth_backtrace_path_contains_no_zero_base_exons(const GthBacktracePath *bp)
{
  gt_assert(bp);
  return contains_no_zero_base_exons(gth_backtrace_path_get(bp) +
                                     bp->cutoffs.end.eopcutoff,
                                     gth_backtrace_path_length(bp) -
                                     bp->cutoffs.start.eopcutoff -
                                     bp->cutoffs.end.eopcutoff);
}
Example #3
0
void gth_backtrace_path_show(const GthBacktracePath *bp, bool xmlout,
                             unsigned int indentlevel, GtFile *outfp)
{
  gt_assert(bp);
  gt_assert(bp->alphatype == DNA_ALPHA || bp->alphatype == PROTEIN_ALPHA);
  gt_editoperation_show(gth_backtrace_path_get(bp),
                        gth_backtrace_path_length(bp),
                        bp->alphatype == PROTEIN_ALPHA, xmlout, indentlevel,
                        outfp);
}
Example #4
0
GthPathWalker* gth_path_walker_new(const GthBacktracePath *bp, bool forward)
{
  GthPathWalker *pw;
  gt_assert(bp);
  gt_assert(forward); /* XXX: implement reverse walking */
  pw = gt_calloc(1, sizeof *pw);
  pw->forward = forward;
  pw->proteineop = gth_backtrace_path_alphatype(bp) == PROTEIN_ALPHA;
  pw->alignment = gth_backtrace_path_get(bp);
  pw->alignmentlength = gth_backtrace_path_length(bp);
  pw->eopptr = pw->alignment + pw->alignmentlength - 1;
  pw->last_eop_type = NUM_OF_EOP_TYPES;
  return pw;
}
Example #5
0
static void determine_cutoffs(GthBacktracePath *bp,
                              GthCutoffmode leadcutoffsmode,
                              GthCutoffmode termcutoffsmode,
                              unsigned long cutoffsminexonlen)
{
  Traversealignmentfunctions travfunctions;
  Traversealignmentstate travstate;
  Relaxedcutoffsdata relaxedcutoffsdata;
  Strictcutoffsdata strictcutoffsdata;
  Minimalcutoffsdata minimalcutoffsdata;
  bool proteineop = bp->alphatype == PROTEIN_ALPHA;

  /* sum of edit operations equals referencelength (before cutoffs)", */
  gt_assert(gth_backtrace_path_is_valid(bp));

  /* setting the traverse alignment state */
  travstate.proteineop      = proteineop;
  travstate.processing_intron_with_1_base_left  = false;
  travstate.processing_intron_with_2_bases_left = false;
  travstate.alignment       = gth_backtrace_path_get(bp);
  travstate.alignmentlength = gth_backtrace_path_length(bp);
  travstate.eopptr          = travstate.alignment +
                              travstate.alignmentlength - 1;
  travstate.genomicptr      = 0;
  travstate.referenceptr    = 0;

  /* cutting of leading indels in the sequences */
  switch (leadcutoffsmode) {
    case RELAXED:
      gt_initRelaxedcutoffsTravfunctions(&travfunctions);
      gt_initRelaxedcutoffsdata(&relaxedcutoffsdata, &bp->cutoffs.start);
      gthtraversealignment(true, &travstate, proteineop, &relaxedcutoffsdata,
                           &travfunctions);
      break;
    case STRICT:
      gt_initStrictcutoffsTravfunctions(&travfunctions);
      gt_initStrictcutoffsdata(&strictcutoffsdata, &bp->cutoffs.start,
                            cutoffsminexonlen);
      gthtraversealignment(true , &travstate , proteineop, &strictcutoffsdata,
                           &travfunctions);
      break;
    case MINIMAL:
      gt_initMinimalcutoffsTravfunctions(&travfunctions);
      gt_initMinimalcutoffsdata(&minimalcutoffsdata, &bp->cutoffs.start);
      gthtraversealignment(true, &travstate, proteineop, &minimalcutoffsdata,
                           &travfunctions);
      break;
    default: gt_assert(0);
  }

  /* resetting the traverse alignment state */
  travstate.processing_intron_with_1_base_left  = false;
  travstate.processing_intron_with_2_bases_left = false;
  travstate.eopptr = gth_backtrace_path_get(bp);
  travstate.genomicptr = 0;
  travstate.referenceptr = 0;

  /* cutting of terminal indels in the sequences */
  switch (termcutoffsmode) {
    case RELAXED:
      gt_initRelaxedcutoffsTravfunctions(&travfunctions);
      gt_initRelaxedcutoffsdata(&relaxedcutoffsdata, &bp->cutoffs.end);
      gthtraversealignment(false, &travstate, proteineop, &relaxedcutoffsdata,
                           &travfunctions);
      break;
    case STRICT:
      gt_initStrictcutoffsTravfunctions(&travfunctions);
      gt_initStrictcutoffsdata(&strictcutoffsdata, &bp->cutoffs.end,
                            cutoffsminexonlen);
      gthtraversealignment(false, &travstate, proteineop, &strictcutoffsdata,
                           &travfunctions);
      break;
    case MINIMAL:
      gt_initMinimalcutoffsTravfunctions(&travfunctions);
      gt_initMinimalcutoffsdata(&minimalcutoffsdata, &bp->cutoffs.end);
      gthtraversealignment(false, &travstate, proteineop, &minimalcutoffsdata,
                           &travfunctions);
      break;
    default: gt_assert(0);
  }

  /* sum of edit operations equals referencelength (after cutoffs) */
  gt_assert(gth_backtrace_path_is_valid(bp));
}
Example #6
0
GtUword gth_sa_get_editoperations_length(const GthSA *sa)
{
  gt_assert(sa);
  return gth_backtrace_path_length(sa->backtrace_path);
}
Example #7
0
bool gth_sas_are_equal(const GthSA *saA, const GthSA *saB)
{
  Exoninfo *exoninfoA, *exoninfoB;
  Introninfo *introninfoA, *introninfoB;
  GtUword i;

  /* compare element 0 */
  if (gth_sa_alphatype(saA) != gth_sa_alphatype(saB))
    return false;

  /* compare element 1 */
  if (gth_backtrace_path_length(saA->backtrace_path) !=
      gth_backtrace_path_length(saB->backtrace_path)) {
    return false;
  }
  for (i = 0; i < gth_backtrace_path_length(saA->backtrace_path); i++) {
    if (((Editoperation*) gth_backtrace_path_get(saA->backtrace_path))[i] !=
        ((Editoperation*) gth_backtrace_path_get(saB->backtrace_path))[i]) {
      return false;
    }
  }

  /* element 2 has been removed (indelcount) */

  /* compare element 3 */
  if (gth_sa_gen_dp_length(saA) != gth_sa_gen_dp_length(saB))
    return false;

  /* compare element 4 */
  if (saA->gen_total_length != saB->gen_total_length)
    return false;

  /* compare element 5 */
  if (saA->gen_offset != saB->gen_offset)
    return false;

  /* compare element 6 */
  if (gth_sa_ref_total_length(saA) != gth_sa_ref_total_length(saB))
    return false;

  /* compare element 7 */
  if (gth_sa_gen_dp_start(saA) != gth_sa_gen_dp_start(saB))
    return false;

  /* element 8 has been removed (gen_dp_end) */

  /* compare element 9 */
  if (saA->gen_file_num != saB->gen_file_num)
    return false;

  /* compare element 10 */
  if (saA->gen_seq_num != saB->gen_seq_num)
    return false;

  /* compare element 11 */
  if (saA->ref_file_num != saB->ref_file_num)
    return false;

  /* compare element 12 */
  if (saA->ref_seq_num != saB->ref_seq_num)
    return false;

  /* compare element 13 */
  if (gt_str_cmp(saA->gen_id, saB->gen_id))
    return false;

  /* compare element 14 */
  if (gt_str_cmp(saA->ref_id, saB->ref_id))
    return false;

  /* compare element 15 */
  if (saA->gen_strand_forward != saB->gen_strand_forward)
    return false;

  /* compare element 16 */
  if (saA->ref_strand_forward != saB->ref_strand_forward)
    return false;

  /* compare element 17 */
  if (gth_sa_genomiccutoff_start(saA) != gth_sa_genomiccutoff_start(saB))
    return false;
  if (gth_sa_referencecutoff_start(saA) != gth_sa_referencecutoff_start(saB))
    return false;
  if (gth_sa_eopcutoff_start(saA) != gth_sa_eopcutoff_start(saB))
    return false;
  if (gth_sa_genomiccutoff_end(saA) != gth_sa_genomiccutoff_end(saB))
    return false;
  if (gth_sa_referencecutoff_end(saA) != gth_sa_referencecutoff_end(saB))
    return false;
  if (gth_sa_eopcutoff_end(saA) != gth_sa_eopcutoff_end(saB))
    return false;

  /* compare element 18 */
  if (gt_array_size(saA->exons) != gt_array_size(saB->exons))
    return false;
  for (i = 0; i < gt_array_size(saA->exons); i++) {
    exoninfoA = (Exoninfo*) gt_array_get(saA->exons, i);
    exoninfoB = (Exoninfo*) gt_array_get(saB->exons, i);
    if (exoninfoA->leftgenomicexonborder != exoninfoB->leftgenomicexonborder)
      return false;
    if (exoninfoA->rightgenomicexonborder != exoninfoB->rightgenomicexonborder)
      return false;
    if (exoninfoA->leftreferenceexonborder !=
        exoninfoB->leftreferenceexonborder) {
      return false;
    }
    if (exoninfoA->rightreferenceexonborder !=
        exoninfoB->rightreferenceexonborder) {
      return false;
    }
    if (!gt_double_equals_double(exoninfoA->exonscore, exoninfoB->exonscore)) {
      return false;
    }
  }

  /* compare element 19 */
  if (gt_array_size(saA->introns) != gt_array_size(saB->introns))
    return false;
  for (i = 0; i < gt_array_size(saA->introns); i++) {
    introninfoA = (Introninfo*) gt_array_get(saA->introns, i);
    introninfoB = (Introninfo*) gt_array_get(saB->introns, i);
    if (!gt_double_equals_double(introninfoA->donorsiteprobability,
                                 introninfoB->donorsiteprobability)) {
      return false;
    }
    if (!gt_double_equals_double(introninfoA->acceptorsiteprobability,
                                 introninfoB->acceptorsiteprobability)) {
      return false;
    }
    if (!gt_double_equals_double(introninfoA->donorsitescore,
                                 introninfoB->donorsitescore)) {
      return false;
    }
    if (!gt_double_equals_double(introninfoA->acceptorsitescore,
                                 introninfoB->acceptorsitescore)) {
      return false;
    }
  }

  /* compare element 20 */
  if (saA->polyAtailpos.start != saB->polyAtailpos.start)
    return false;
  if (saA->polyAtailpos.end != saB->polyAtailpos.end)
    return false;

  /* compare element 21 */
  if (saA->alignmentscore != saB->alignmentscore)
    return false;

  /* compare element 22 */
  if (saA->coverage != saB->coverage)
    return false;

  /* compare element 23 */
  if (saA->genomic_cov_is_highest != saB->genomic_cov_is_highest)
    return false;

  /* compare element 24 */
  if (saA->cumlen_scored_exons != saB->cumlen_scored_exons)
    return false;

  return true;
}