Exemplo n.º 1
0
static void evalnewexonifpossible(bool proteineop, bool *newexon,
                                  bool *newintron, bool *firstexon,
                                  bool introncutout, GthSplicedSeq *spliced_seq,
                                  Exoninfo *exon, Introninfo *intron,
                                  GthSA *sa,
                                  Traversealignmentstate *travstate,
                                  GtAlphabet *gen_alphabet,
                                  GthDPParam *dp_param,
                                  GthDPOptionsEST *dp_options_est,
                                  const unsigned char *gen_seq_tran,
                                  const unsigned char *ref_seq_tran,
                                  unsigned long gen_dp_start)
{
  unsigned long splicedpos;

  if (*newexon) { /* in this case an intron will be saved */
    exon->leftgenomicexonborder = gen_dp_start + travstate->genomicptr;
    exon->leftreferenceexonborder = gt_safe_cast2ulong(travstate->referenceptr);
    *newexon   = false;
    *newintron = true;
    if (*firstexon)
      *firstexon = false;
    else
    {
      /* save acceptorsiteprobability */
      if (introncutout) {
        splicedpos =
          gth_spliced_seq_orig_to_spliced_pos(spliced_seq,
                  gt_safe_cast2ulong(travstate->genomicptr - 1 + gen_dp_start));
        if (splicedpos == GT_UNDEF_ULONG) {
          /* XXX: no spliced position has been found -> this is an artificially
             introduced intron, use 0.0 as acceptor site probabilty */
          intron->acceptorsiteprobability = 0.0;
        }
        else {
          intron->acceptorsiteprobability = (GthFlt)
                              exp((double) dp_param->log_Pacceptor[splicedpos]);
        }
      }
      else {
        intron->acceptorsiteprobability = (GthFlt) exp((double)
                              dp_param->log_Pacceptor[travstate->genomicptr-1]);
      }

      /* for cDNAs/ESTs: calculationg acceptorsitescore: going forward from here
       */
      if (proteineop)
        intron->acceptorsitescore = UNDEFINED_SPLICE_SITE_SCORE;
      else {
        gthcalcsplicesitescore(&intron->acceptorsitescore, travstate,
                               gen_seq_tran, ref_seq_tran, gen_alphabet,
                               dp_options_est, true);
      }

      /* saving the intron */
      gth_sa_add_intron(sa, intron);
    }
  }
}
Exemplo n.º 2
0
static void end_element_handler(void *info, const XML_Char *name)
{
  Parseinfo *parseinfo = (Parseinfo*) info;
  GthSA *sa = parseinfo->currentSA;
  GtUword datalength;
  double retdouble;
  GtWord ret;
  char *data;

  /* save data and data length */
  data       = gt_str_get(parseinfo->databuf);
  datalength = gt_str_length(parseinfo->databuf);

  /* perform actions depending on end tag */
  if (strcmp(name, SPLICEDALIGNMENT_TAG) == 0) {
    /* before we store the spliced alignment we have to reverse its edit
       operations */
    gt_assert(sa && gth_sa_backtrace_path(sa));
    gth_backtrace_path_reverse(gth_sa_backtrace_path(sa));

    /* ensure that before an intron which is not in phase the edit operation
       has length 1 (only for protein spliced alignments) */
    gth_backtrace_path_ensure_length_1_before_introns(
                                                     gth_sa_backtrace_path(sa));

    if (parseinfo->saprocessfunc(parseinfo->data , sa,
                                 parseinfo->outputfilename, parseinfo->err)) {
      /* XXX */
      fprintf(stderr, "error: %s\n", gt_error_get(parseinfo->err));
      exit(EXIT_FAILURE);
    }
    /* reset current spliced alignment */
    parseinfo->currentSA = NULL;
 }
  else if (strcmp(name, REFERENCEALPHATYPE_TAG) == 0) {
    if (strcmp(data, "DNA_ALPHA") == 0)
      gth_sa_set_alphatype(sa, DNA_ALPHA);
    else if (strcmp(data, "PROTEIN_ALPHA") == 0) {
      gth_sa_set_alphatype(sa, PROTEIN_ALPHA);
    }
    else {
      ILLEGAL_DATA;
    }
  }
  else if (strcmp(name, DNA_EOP_TYPE_TAG) == 0) {
    if (strcmp(data, "match") == 0)
      parseinfo->eoptype = EOP_TYPE_MATCH;
    else if (strcmp(data, "deletion") == 0)
      parseinfo->eoptype = EOP_TYPE_DELETION;
    else if (strcmp(data, "insertion") == 0)
      parseinfo->eoptype = EOP_TYPE_INSERTION;
    else if (strcmp(data, "mismatch") == 0)
      parseinfo->eoptype = EOP_TYPE_MISMATCH;
    else if (strcmp(data, "intron") == 0)
      parseinfo->eoptype = EOP_TYPE_INTRON;
    else {
      ILLEGAL_DATA;
    }
  }
  else if (strcmp(name, DNA_EOP_LENGTH_TAG) == 0) {
    SCANUINT;
    gth_backtrace_path_add_eop(gth_sa_backtrace_path(sa), parseinfo->eoptype,
                               ret);
  }
  else if (strcmp(name, PROTEIN_EOP_TYPE_TAG) == 0) {
    if (strcmp(data, "match") == 0)
      parseinfo->eoptype = EOP_TYPE_MATCH;
    else if (strcmp(data, "deletion") == 0)
      parseinfo->eoptype = EOP_TYPE_DELETION;
    else if (strcmp(data, "insertion") == 0)
      parseinfo->eoptype = EOP_TYPE_INSERTION;
    else if (strcmp(data, "mismatch") == 0)
      parseinfo->eoptype = EOP_TYPE_MISMATCH;
    else if (strcmp(data, "intron") == 0)
      parseinfo->eoptype = EOP_TYPE_INTRON;
    else if (strcmp(data, "mismatch_with_1_gap") == 0)
      parseinfo->eoptype = EOP_TYPE_MISMATCH_WITH_1_GAP;
    else if (strcmp(data, "mismatch_with_2_gaps") == 0)
      parseinfo->eoptype = EOP_TYPE_MISMATCH_WITH_2_GAPS;
    else if (strcmp(data, "deletion_with_1_gap") == 0)
      parseinfo->eoptype = EOP_TYPE_DELETION_WITH_1_GAP;
    else if (strcmp(data, "deletion_with_2_gaps") == 0)
      parseinfo->eoptype = EOP_TYPE_DELETION_WITH_2_GAPS;
    else if (strcmp(data, "intron_with_1_base_left") == 0)
      parseinfo->eoptype = EOP_TYPE_INTRON_WITH_1_BASE_LEFT;
    else if (strcmp(data, "intron_with_2_bases_left") == 0)
      parseinfo->eoptype = EOP_TYPE_INTRON_WITH_2_BASES_LEFT;
    else {
      ILLEGAL_DATA;
    }
  }
  else if (strcmp(name, PROTEIN_EOP_LENGTH_TAG) == 0) {
    SCANUINT;
    gth_backtrace_path_add_eop(gth_sa_backtrace_path(sa), parseinfo->eoptype,
                               ret);
  }
  else if (strcmp(name, INDELCOUNT_TAG) == 0) {
    SCANUINT;
    /* ignore indelcount, gets recomputed anyway */
  }
  else if (strcmp(name, GENOMICLENGTHDP_TAG) == 0) {
    SCANUINT;
    gth_sa_set_gen_dp_length(sa, ret);
  }
  else if (strcmp(name, GENOMICLENGTHTOTAL_TAG) == 0) {
    SCANUINT;
    gth_sa_set_gen_total_length(sa, ret);
  }
  else if (strcmp(name, GENOMICOFFSET_TAG) == 0) {
    SCANUINT;
    gth_sa_set_gen_offset(sa, ret);
  }
  else if (strcmp(name, REFERENCELENGTH_TAG) == 0) {
    SCANUINT;
    gth_sa_set_ref_total_length(sa, ret);
  }
  else if (strcmp(name, DPSTARTPOS_TAG) == 0) {
    SCANUINT;
    gth_sa_set_gen_dp_start(sa, ret);
  }
  else if (strcmp(name, DPENDPOS_TAG) == 0) {
    SCANUINT;
    /* ignore DP end pos, gets recomputed from gen_dp_length anyway */
    gt_assert(gth_sa_gen_dp_end(sa) == ret);
  }
  else if (strcmp(name, GENOMICFILENAME_TAG) == 0) {
    /* save genomic file name */
    gt_str_append_cstr_nt(parseinfo->genomicfilename, data, datalength);
  }
  else if (strcmp(name, GENOMICFILEHASH_TAG) == 0) {
    gth_sa_set_gen_file_num(sa, process_file(parseinfo->input,
                            gt_str_get(parseinfo->genomicfilename), data, false,
                            UNDEF_ALPHA));
    /* reset genomic filename */
    gt_str_reset(parseinfo->genomicfilename);
  }
  else if (strcmp(name, GENOMICSEQNUM_TAG) == 0) {
    SCANUINT;
    gth_sa_set_gen_seq_num(sa, ret);
  }
  else if (strcmp(name, REFERENCEFILENAME_TAG) == 0) {
    /* save reference file name */
    gt_str_append_cstr_nt(parseinfo->referencefilename, data, datalength);
  }
  else if (strcmp(name, REFERENCEFILEHASH_TAG) == 0) {
    gth_sa_set_ref_file_num(sa, process_file(parseinfo->input,
                                       gt_str_get(parseinfo->referencefilename),
                                                  data, true,
                                                  gth_sa_alphatype(sa)));

    /* reset reference filename */
    gt_str_reset(parseinfo->referencefilename);
  }
  else if (strcmp(name, REFERENCESEQNUM_TAG) == 0) {
    SCANUINT;
    gth_sa_set_ref_seq_num(sa, ret);
  }
  else if (strcmp(name, GENOMICID_TAG) == 0)
    gth_sa_set_gen_id(sa, data);
  else if (strcmp(name, REFERENCEID_TAG) == 0)
    gth_sa_set_ref_id(sa, data);
  else if (strcmp(name, GENOMICSTRANDISFORWARD_TAG) == 0)
    gth_sa_set_gen_strand(sa, parse_boolean(data, parseinfo));
  else if (strcmp(name, REFERENCESTRANDISFORWARD_TAG) == 0)
    gth_sa_set_ref_strand(sa, parse_boolean(data, parseinfo));
  else if (strcmp(name, GENOMICCUTOFF_TAG) == 0) {
    SCANUINT;
    parseinfo->cutoffs.genomiccutoff = ret;
  }
  else if (strcmp(name, REFERENCECUTOFF_TAG) == 0) {
    SCANUINT;
    parseinfo->cutoffs.referencecutoff = ret;
  }
  else if (strcmp(name, EOPCUTOFF_TAG) == 0) {
    SCANUINT;
    parseinfo->cutoffs.eopcutoff = ret;
  }
  else if (strcmp(name, CUTOFFSSTART_TAG) == 0)
    gth_sa_set_cutoffs_start(sa, &parseinfo->cutoffs);
  else if (strcmp(name, CUTOFFSEND_TAG) == 0)
    gth_sa_set_cutoffs_end(sa, &parseinfo->cutoffs);
  else if (strcmp(name, LEFTGENOMICEXONBORDER_TAG) == 0) {
    SCANUINT;
    parseinfo->exoninfo.leftgenomicexonborder = ret;
  }
  else if (strcmp(name, RIGHTGENOMICEXONBORDER_TAG) == 0) {
    SCANUINT;
    parseinfo->exoninfo.rightgenomicexonborder = ret;
  }
  else if (strcmp(name, LEFTREFERENCEEXONBORDER_TAG) == 0) {
    SCANUINT;
    parseinfo->exoninfo.leftreferenceexonborder = ret;
  }
  else if (strcmp(name, RIGHTREFERENCEEXONBORDER_TAG) == 0) {
    SCANUINT;
    parseinfo->exoninfo.rightreferenceexonborder = ret;
  }
  else if (strcmp(name, EXONSCORE_TAG) == 0) {
    SCANDOUBLE;
    parseinfo->exoninfo.exonscore = retdouble;
  }
  else if (strcmp(name, EXONINFO_TAG) == 0)
    gth_sa_add_exon(sa, &parseinfo->exoninfo);
  else if (strcmp(name, DONORSITEPROBABILITY_TAG) == 0) {
    SCANDOUBLE;
    parseinfo->introninfo.donorsiteprobability = (GthFlt) retdouble;
  }
  else if (strcmp(name, ACCEPTORSITEPROBABILITY_TAG) == 0) {
    SCANDOUBLE;
    parseinfo->introninfo.acceptorsiteprobability = (GthFlt) retdouble;
  }
  else if (strcmp(name, DONORSITESCORE_TAG) == 0) {
    SCANDOUBLE;
    parseinfo->introninfo.donorsitescore = retdouble;
  }
  else if (strcmp(name, ACCEPTORSITESCORE_TAG) == 0) {
    SCANDOUBLE;
    parseinfo->introninfo.acceptorsitescore = retdouble;
  }
  else if (strcmp(name, INTRONINFO_TAG) == 0)
    gth_sa_add_intron(sa, &parseinfo->introninfo);
  else if (strcmp(name, POLYASTART_TAG) == 0) {
    SCANUINT;
    gth_sa_set_polyAtail_start(sa, ret);
  }
  else if (strcmp(name, POLYAEND_TAG) == 0) {
    SCANUINT;
    gth_sa_set_polyAtail_stop(sa, ret);
  }
  else if (strcmp(name, ALIGNMENTSCORE_TAG) == 0) {
    SCANDOUBLE;
    gth_sa_set_score(sa, retdouble);
  }
  else if (strcmp(name, COVERAGE_TAG) == 0) {
    SCANDOUBLE;
    gth_sa_set_coverage(sa, retdouble);
  }
  else if (strcmp(name, COVERAGEOFGENOMICSEGMENTISHIGHEST_TAG) == 0) {
    gth_sa_set_highest_cov(sa, parse_boolean(data, parseinfo));
  }
  else if (strcmp(name, CUMULATIVELENGTHOFSCOREDEXONS_TAG) == 0) {
    SCANUINT;
    gth_sa_set_cumlen_scored_exons(sa, ret);
  }
}