Beispiel #1
0
static void addSAtointrondistribution(GtDiscDistri *introndistribution,
                                      GthSA *sa)
{
  unsigned long i;

  /* add values to introndistribution */
  for (i = 0; i < gth_sa_num_of_introns(sa); i++) {
    gt_disc_distri_add(introndistribution,
                       gth_sa_get_exon(sa, i+1)
                       ->leftgenomicexonborder -
                       gth_sa_get_exon(sa, i)
                       ->rightgenomicexonborder - 1);
  }
}
static void showintrons(GthSA *sa, bool dnaalpha,
                        unsigned int indentlevel, GtFile *outfp)
{
  Introninfo *introninfo;
  GtUword i;

  for (i = 0; i < gth_sa_num_of_introns(sa); i++) {
    introninfo = gth_sa_get_intron(sa, i);

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<introninfo>\n");
    indentlevel++;

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp,
                    "<donorsiteprobability>%.*f</donorsiteprobability>\n",
                    PRECISION, introninfo->donorsiteprobability);

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp,
                    "<acceptorsiteprobability>%.*f</acceptorsiteprobability>\n",
                    PRECISION, introninfo->acceptorsiteprobability);

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<donorsitescore>%.*f</donorsitescore>\n",
                       PRECISION,
                       dnaalpha ? introninfo->donorsitescore
                                : UNDEFINED_SPLICE_SITE_SCORE);

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<acceptorsitescore>%.*f</acceptorsitescore>\n",
                       PRECISION,
                       dnaalpha ? introninfo->acceptorsitescore
                                : UNDEFINED_SPLICE_SITE_SCORE);

    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</introninfo>\n");
  }
}
Beispiel #3
0
void gth_compute_scores(GthSA *sa,
                        bool proteineop,
                        GthDPParam *dp_param,
                        void *dp_options_est,
                        const unsigned char *gen_seq_tran,
                        const unsigned char *ref_seq_tran,
                        const unsigned char *ref_seq_orig,
                        const GtTransTable *transtable,
                        unsigned long gen_dp_start,
                        unsigned long scoreminexonlen,
                        bool introncutout,
                        bool gs2out,
                        GthSplicedSeq *spliced_seq,
                        unsigned long ref_dp_length,
                        GtAlphabet *gen_alphabet,
                        GtAlphabet *ref_alphabet,
                        GthDPScoresProtein *dp_scores_protein)
{
  Traversealignmentfunctions travfunctions;
  Traversealignmentstate travstate;
  Computebordersandscoresdata data;
  GthFlt score, coverageofgenomicsegment, coverageofreferencesegment;

  gt_assert(!gth_sa_num_of_exons(sa));
  gt_assert(!gth_sa_num_of_introns(sa));

  travfunctions.processmismatch  = computescoresprocmismatch;
  travfunctions.processdeletion  = computescoresprocdeletion;
  travfunctions.processinsertion = computebordersandscoresprocinsertion;
  travfunctions.processmatch     = computebordersandscoresprocmatch;
  travfunctions.processintron    = computebordersandscoresprocintron;
  travfunctions.breakcondition   = NULL;

  /* additional functions for protein edit operations */
  travfunctions.processintron_with_1_base_left  =
    computebordersandscoresprocintron;
  travfunctions.processintron_with_2_bases_left =
    computebordersandscoresprocintron;
  travfunctions.processmismatch_with_1_gap      =
    computescoresprocmismatchordeletionwithgap;
  travfunctions.processmismatch_with_2_gaps     =
    computescoresprocmismatchordeletionwithgap;
  travfunctions.processdeletion_with_1_gap      =
    computescoresprocmismatchordeletionwithgap;
  travfunctions.processdeletion_with_2_gaps     =
    computescoresprocmismatchordeletionwithgap;

  travstate.proteineop = proteineop;
  travstate.processing_intron_with_1_base_left  = false;
  travstate.processing_intron_with_2_bases_left = false;
  travstate.alignment = gth_sa_get_editoperations(sa);
  travstate.alignmentlength =  gth_sa_get_editoperations_length(sa);
  travstate.eopptr       = travstate.alignment + travstate.alignmentlength - 1;
  travstate.genomicptr   = gth_sa_genomiccutoff_start(sa);
  travstate.referenceptr = gth_sa_referencecutoff_start(sa);

  if (travstate.alignmentlength <= 0) {
    /* in this case the alignmentscore is set to 0, which leads to discarding
       this alignment later */
    gth_sa_set_score(sa, 0.0);
    return;
  }

  /* editoperations contain no zero base exons */
  gt_assert(gth_sa_contains_no_zero_base_exons(sa));
  /* editoperations contain no leading or terminal introns or insertions */
  gt_assert(containsnoleadingorterminalintronsorinsertions(travstate.alignment,
                                                        travstate
                                                        .alignmentlength,
                                                        proteineop));
  /* sum of edit operations equals referencelength */
  gt_assert(gt_eops_equal_referencelength(travstate.alignment,
                                       travstate.alignmentlength,
                                       ref_dp_length
                                       - gth_sa_referencecutoff_start(sa)
                                       - gth_sa_referencecutoff_end(sa),
                                       proteineop));

  data.proteineop                     = proteineop;
  data.newexon                        = true;
  data.newintron                      = true;
  data.firstexon                      = true;
  data.introncutout                   = introncutout;
  data.gs2out                         = gs2out;
  data.spliced_seq                    = spliced_seq;
  data.singleexonweight               = (GthFlt) 0.0;
  data.maxsingleexonweight            = (GthFlt) 0.0;
  data.overallexonweight              = (GthFlt) 0.0;
  data.maxoverallexonweight           = (GthFlt) 0.0;
  data.cumulativelengthofscoredexons  = 0;

  data.exon.leftgenomicexonborder     = GT_UNDEF_ULONG;
  data.exon.rightgenomicexonborder    = GT_UNDEF_ULONG;
  data.exon.leftreferenceexonborder   = GT_UNDEF_ULONG;
  data.exon.rightreferenceexonborder  = GT_UNDEF_ULONG;
  data.exon.exonscore                 = GTH_UNDEF_GTHDBL;

  data.intron.donorsiteprobability    = GTH_UNDEF_GTHFLT;
  data.intron.acceptorsiteprobability = GTH_UNDEF_GTHFLT;
  data.intron.donorsitescore          = GTH_UNDEF_GTHDBL;
  data.intron.acceptorsitescore       = GTH_UNDEF_GTHDBL;

  data.sa                             = sa;
  data.dp_param                       = dp_param;
  data.dp_options_est                 = dp_options_est;
  data.gen_seq_tran                   = gen_seq_tran;
  data.ref_seq_tran                   = ref_seq_tran;
  data.ref_seq_orig                   = ref_seq_orig;
  data.transtable                     = transtable;
  data.gen_dp_start                   = gen_dp_start;
  data.scoreminexonlen                = scoreminexonlen;
  data.ref_dp_length                  = ref_dp_length;
  data.gen_alphabet                   = gen_alphabet;
  data.gen_alphabet_characters        = gen_alphabet
                                        ? gt_alphabet_characters(gen_alphabet)
                                        : NULL;
  data.dp_scores_protein              = dp_scores_protein;

  gthtraversealignment(true, &travstate, proteineop, &data, &travfunctions);

  /* this is for saving the last exon */
  evalnewintronifpossible(proteineop, &data.newexon, &data.newintron, true,
                          data.introncutout, data.gs2out, data.spliced_seq,
                          &data.exon, &data.intron, &data.singleexonweight,
                          &data.maxsingleexonweight, &data.overallexonweight,
                          &data.maxoverallexonweight,
                          &data.cumulativelengthofscoredexons, sa, &travstate,
                          gen_alphabet, data.dp_param, data.dp_options_est,
                          data.gen_seq_tran, data.ref_seq_tran,
                          data.gen_dp_start, data.scoreminexonlen);

  /* saving the scores for the whole alignment */
  if (data.maxoverallexonweight > 0.0) {
    score = data.overallexonweight / data.maxoverallexonweight;
    /* XXX: the way the alignmentscore is computed, it is possible to get a
       score > 1.0. Since we don't want this, we cap it */
    if (score > 1.0)
      score = 1.0;
  }
  else
    score = 0.0;
  gth_sa_set_score(sa, score);
  gth_sa_set_cumlen_scored_exons(sa, data.cumulativelengthofscoredexons);

  /* fraction of the gen_dp_length which is scored/weighted */
  coverageofgenomicsegment   = (GthFlt) data.cumulativelengthofscoredexons /
                               (GthFlt) gth_sa_gen_dp_length(sa);
  /* coverage of genomic segment is valid value */
  gt_assert(coverageofgenomicsegment >= 0.0 && coverageofgenomicsegment <= 1.0);

  /* fraction of the referencelength which is scored/weighted */
  coverageofreferencesegment = (GthFlt) data.cumulativelengthofscoredexons /
                               (GthFlt) ((proteineop ? GT_CODON_LENGTH : 1) *
                                         gth_sa_ref_total_length(sa));

  if (coverageofgenomicsegment > coverageofreferencesegment) {
    gth_sa_set_coverage(sa, coverageofgenomicsegment);
    gth_sa_set_highest_cov(sa, true);
  }
  else {
    gth_sa_set_coverage(sa, coverageofreferencesegment);
    gth_sa_set_highest_cov(sa, false);
  }

  /* test the assumption that the coverage is never larger then the default */
  gt_assert(gth_sa_coverage(sa) <= GTH_DEFAULT_MAX_COVERAGE);

  /* compute poly(A) tail position */
  gth_sa_calc_polyAtailpos(sa, ref_seq_tran, ref_alphabet);

  /* determined exons are forward and consecutive */
  gt_assert(gth_sa_exons_are_forward_and_consecutive(sa));
}