Esempio n. 1
0
static void evalnewexonifpossible(bool proteineop, bool *newexon,
                                  bool *newintron, bool *firstexon,
                                  bool introncutout, GthSplicedSeq *spliced_seq,
                                  Exoninfo *exon, Introninfo *intron,
                                  GthSA *sa,
                                  Traversealignmentstate *travstate,
                                  GtAlphabet *gen_alphabet,
                                  GthDPParam *dp_param,
                                  GthDPOptionsEST *dp_options_est,
                                  const unsigned char *gen_seq_tran,
                                  const unsigned char *ref_seq_tran,
                                  unsigned long gen_dp_start)
{
  unsigned long splicedpos;

  if (*newexon) { /* in this case an intron will be saved */
    exon->leftgenomicexonborder = gen_dp_start + travstate->genomicptr;
    exon->leftreferenceexonborder = gt_safe_cast2ulong(travstate->referenceptr);
    *newexon   = false;
    *newintron = true;
    if (*firstexon)
      *firstexon = false;
    else
    {
      /* save acceptorsiteprobability */
      if (introncutout) {
        splicedpos =
          gth_spliced_seq_orig_to_spliced_pos(spliced_seq,
                  gt_safe_cast2ulong(travstate->genomicptr - 1 + gen_dp_start));
        if (splicedpos == GT_UNDEF_ULONG) {
          /* XXX: no spliced position has been found -> this is an artificially
             introduced intron, use 0.0 as acceptor site probabilty */
          intron->acceptorsiteprobability = 0.0;
        }
        else {
          intron->acceptorsiteprobability = (GthFlt)
                              exp((double) dp_param->log_Pacceptor[splicedpos]);
        }
      }
      else {
        intron->acceptorsiteprobability = (GthFlt) exp((double)
                              dp_param->log_Pacceptor[travstate->genomicptr-1]);
      }

      /* for cDNAs/ESTs: calculationg acceptorsitescore: going forward from here
       */
      if (proteineop)
        intron->acceptorsitescore = UNDEFINED_SPLICE_SITE_SCORE;
      else {
        gthcalcsplicesitescore(&intron->acceptorsitescore, travstate,
                               gen_seq_tran, ref_seq_tran, gen_alphabet,
                               dp_options_est, true);
      }

      /* saving the intron */
      gth_sa_add_intron(sa, intron);
    }
  }
}
Esempio n. 2
0
static void evalnewintronifpossible(bool proteineop, bool *newexon,
                                    bool *newintron, bool lastintron,
                                    bool introncutout, bool gs2out,
                                    GthSplicedSeq *spliced_seq,
                                    Exoninfo *exon, Introninfo *intron,
                                    GthFlt *singleexonweight,
                                    GthFlt *maxsingleexonweight,
                                    GthFlt *overallexonweight,
                                    GthFlt *maxoverallexonweight,
                                    unsigned long
                                    *cumulativelengthofscoredexons,
                                    GthSA *sa,
                                    Traversealignmentstate *travstate,
                                    GtAlphabet *gen_alphabet,
                                    GthDPParam *dp_param,
                                    GthDPOptionsEST *dp_options_est,
                                    const unsigned char *gen_seq_tran,
                                    const unsigned char *ref_seq_tran,
                                    unsigned long gen_dp_start,
                                    unsigned long scoreminexonlen)
{
  unsigned long genomicexonlength, splicedpos;

  if (*newintron) { /* in this case an exon will be saved */
    exon->rightgenomicexonborder = gen_dp_start + travstate->genomicptr - 1;
    exon->rightreferenceexonborder = gt_safe_cast2ulong(travstate
                                                        ->referenceptr - 1);
    *newintron = false;
    *newexon   = true;
    if (*maxsingleexonweight > 0.0) {
      exon->exonscore = (GthDbl) ((*singleexonweight) /
                                            (*maxsingleexonweight));
    }
    else
      exon->exonscore = 0.0;

    /* for calculating the alignmentscore and the cumulative length of scored
       exons */
    genomicexonlength = exon->rightgenomicexonborder -
                        exon->leftgenomicexonborder + 1;

    /* short exons are not used for the alignment score */
    if (genomicexonlength >= scoreminexonlen) {
      *overallexonweight             += *singleexonweight;
      *maxoverallexonweight          += *maxsingleexonweight;
    }
    /* coverage includes short exons (not for gs2out): */
    if (gs2out) {
      if (genomicexonlength >= scoreminexonlen)
        *cumulativelengthofscoredexons += genomicexonlength;
    }
    else
      *cumulativelengthofscoredexons += genomicexonlength;

    /* saving the exon */
    gt_assert(exon->leftgenomicexonborder <= exon->rightgenomicexonborder);
    gth_sa_add_exon(sa, exon);

    /* resetting scores */
    *singleexonweight    = (GthFlt) 0.0;
    *maxsingleexonweight = (GthFlt) 0.0;

    /* if this is not the last intron, save the donor site stuff.
       if this is the last intron, this function has been called to save the
       last exon. Therefore, no saving of donor site stuff is necessary. */
    if (!lastintron) {
      /* save donorsiteprobability */
      if (introncutout) {
        splicedpos =
          gth_spliced_seq_orig_to_spliced_pos(spliced_seq,
                      gt_safe_cast2ulong(travstate->genomicptr + gen_dp_start));
        if (splicedpos == GT_UNDEF_ULONG) {
          /* XXX: no spliced position has been found -> this is an artificially
             introduced intron, use 0.0 as donor site probabilty */
          intron->donorsiteprobability = 0.0;
        }
        else {
          intron->donorsiteprobability = (GthFlt)
                                 exp((double) dp_param->log_Pdonor[splicedpos]);
        }
      }
      else {
        intron->donorsiteprobability = (GthFlt)
                      exp((double) dp_param->log_Pdonor[travstate->genomicptr]);
      }

      /* for the cDNAs/ESTs: calculationg donorsitescore: going back from here
       */
      if (proteineop)
        intron->donorsitescore = UNDEFINED_SPLICE_SITE_SCORE;
      else {
        gthcalcsplicesitescore(&intron->donorsitescore, travstate, gen_seq_tran,
                               ref_seq_tran, gen_alphabet, dp_options_est,
                               false);
      }
    }
  }
}
Esempio n. 3
0
/* XXX: change this function: add more sophisticated extension strategy */
void gth_chain_extend_borders(GthChain *chain, const GtRange *gen_seq_bounds,
                              const GtRange *gen_seq_bounds_rc,
                              GT_UNUSED unsigned long gen_total_length,
                              GT_UNUSED unsigned long gen_offset)
{
  long tmpborder;

  /* at least one range in chain */
  gt_assert(gt_array_size(chain->forwardranges));
  /* forward range borders are in considered genomic region */
  gt_assert(gt_ranges_borders_are_in_region(chain->forwardranges,
                                            gen_seq_bounds));
  /* reverse range borders are in considered genomic region */
  gt_assert(gt_ranges_borders_are_in_region(chain->reverseranges,
                                            gen_seq_bounds_rc));
  /* chain->forwardranges is forward and consecutive */
  gt_assert(gt_ranges_are_consecutive(chain->forwardranges));
  /* valid sequence bounds */
  gt_assert(gen_seq_bounds->start <= gen_seq_bounds->end);
  gt_assert(gen_seq_bounds_rc->start <= gen_seq_bounds_rc->end);

  /* set start border, forward strand */
  tmpborder = gt_safe_cast2long(((GtRange*)
                                 gt_array_get_first(chain->forwardranges))
                                 ->start);
  tmpborder -= DPEXTENSION;
  if (tmpborder < gt_safe_cast2long(gen_seq_bounds->start))
    tmpborder = gen_seq_bounds->start;
  ((GtRange*) gt_array_get_first(chain->forwardranges))->start =
    gt_safe_cast2ulong(tmpborder);

  /* set start border, reverse complement strand */
  tmpborder = gt_safe_cast2long(((GtRange*)
                                 gt_array_get_first(chain->reverseranges))
                                ->start);
  tmpborder -= DPEXTENSION;
  if (tmpborder < gt_safe_cast2long(gen_seq_bounds_rc->start))
    tmpborder = gen_seq_bounds_rc->start;
  ((GtRange*) gt_array_get_first(chain->reverseranges))->start =
    gt_safe_cast2ulong(tmpborder);

  /* set end border, forward strand */
  tmpborder = gt_safe_cast2long(((GtRange*)
                                gt_array_get_last(chain->forwardranges))
                                ->end);
  tmpborder += DPEXTENSION;
  if (tmpborder > gt_safe_cast2long(gen_seq_bounds->end))
    tmpborder = gen_seq_bounds->end;
  ((GtRange*) gt_array_get_last(chain->forwardranges))->end =
    gt_safe_cast2ulong(tmpborder);

  /* set end border, reverse complement strand */
  tmpborder = gt_safe_cast2long(((GtRange*)
                                gt_array_get_last(chain->reverseranges))
                                ->end);
  tmpborder += DPEXTENSION;
  if (tmpborder > gt_safe_cast2long(gen_seq_bounds_rc->end))
    tmpborder = gen_seq_bounds_rc->end;
  ((GtRange*) gt_array_get_last(chain->reverseranges))->end =
    gt_safe_cast2ulong(tmpborder);

  gt_assert(chain_is_filled_and_consistent(chain, gen_total_length,
                                           gen_offset));
}
Esempio n. 4
0
void gth_sa_calc_polyAtailpos(GthSA *sa, const unsigned char *ref_seq_tran,
                              GtAlphabet *ref_alphabet)
{
  GtUword ppa, mma, rightreferenceborder, referencelength;
  GtWord i, leftreferenceborder;

  sa->polyAtailpos.start = 0;
  sa->polyAtailpos.end = 0;
  ppa = mma = 0;

  rightreferenceborder = ((Exoninfo*) gt_array_get_last(sa->exons))
                         ->rightreferenceexonborder;
  leftreferenceborder  = ((Exoninfo*) gt_array_get_first(sa->exons))
                         ->leftreferenceexonborder;

  /* setting i */
  referencelength = gth_sa_ref_total_length(sa);
  if ((rightreferenceborder + 1) >=
      (referencelength - 1 - CALCPOLYATAILWINDOW)) {
    i = gt_safe_cast2long(rightreferenceborder + 1);
  }
  else {
    if (referencelength < 1 + CALCPOLYATAILWINDOW)
      i = 0;
    else
      i =  referencelength - 1 - CALCPOLYATAILWINDOW;
  }

  for (/* i already set */; i < gt_safe_cast2long(referencelength); i++) {
    if (ref_seq_tran[i] == gt_alphabet_encode(ref_alphabet, 'A'))
      ppa++;
    else {
      if (ppa > 0 && mma < 1) {
        mma++;
        continue;
      }
      else {
        if (ppa >= MINIMUMPOLYATAILLENGTH)
          break;
        else {
          ppa = mma = 0;
          continue;
        }
      }
    }
  }

  if (ppa >= MINIMUMPOLYATAILLENGTH) {
    sa->polyAtailpos.start = gt_safe_cast2ulong(i - ppa - mma);
    sa->polyAtailpos.end = i - 1;
  }
  else {
    ppa = mma = 0;

    /* setting i */
    if ((leftreferenceborder - 1) <= CALCPOLYATAILWINDOW)
      i = leftreferenceborder - 1;
    else
      i =  CALCPOLYATAILWINDOW - 1;

    for (/* i already set */; i >= 0; i--) {
      if (ref_seq_tran[i] == gt_alphabet_encode(ref_alphabet, 'T'))
        ppa++;
      else {
        if (ppa > 0 && mma < 1) {
          mma++;
          continue;
        }
        else {
          if (ppa >= MINIMUMPOLYATAILLENGTH)
            break;
          else {
            ppa = mma = 0;
            continue;
          }
        }
      }
    }

    if (ppa >= MINIMUMPOLYATAILLENGTH) {
      sa->polyAtailpos.start  = gt_safe_cast2ulong(i + ppa + mma);
      sa->polyAtailpos.end = i + 1;
    }
  }
}