コード例 #1
0
ファイル: xml_final_sa_visitor.c プロジェクト: 9beckert/TIR
static void xml_showgthgenomicinformation(GthSA *sa,
                                          GthInput *input,
                                          unsigned int indentlevel,
                                          GtFile *outfp)
{
  gt_assert(gth_sa_gen_file_num(sa) != GT_UNDEF_ULONG);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<gDNA_segment>\n");
  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<template temp_file=\"%s\" temp_id=\"%s\" "
                            "temp_strand=\"%c\" temp_description=\"",
                     gth_input_get_genomic_filename(input,
                                                    gth_sa_gen_file_num(sa)),
                     gth_sa_gen_id(sa),
                     gth_sa_gen_strand_char(sa));

  gth_input_echo_genomic_description(input, gth_sa_gen_file_num(sa),
                                     gth_sa_gen_seq_num(sa), outfp);

  gt_file_xprintf(outfp, "\">\n");
  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<position start=\"%lu\" stop=\"%lu\"/>\n",
                     gth_sa_gen_dp_start_show(sa),
                     gth_sa_gen_dp_end_show(sa));
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</template>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</gDNA_segment>\n");
}
コード例 #2
0
ファイル: xml_final_sa_visitor.c プロジェクト: 9beckert/TIR
/* The following function prints the "classic" GeneSeqer2 MATCH line */
static void xml_showmatchline(GthSA *sa, unsigned int indentlevel,
                              GtFile *outfp)
{
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<MATCH_line gen_id=\"%s\" gen_strand=\"%c\" ",
                     gth_sa_gen_id(sa),
                     gth_sa_gen_strand_char(sa));
  if (gth_sa_alphatype(sa) == DNA_ALPHA) {
    gt_file_xprintf(outfp, "ref_id=\"%s\" ref_strand=\"%c\">\n",
                       gth_sa_ref_id(sa),
                       gth_sa_ref_strand_char(sa));
  }
  else
    gt_file_xprintf(outfp, "ref_id=\"%s\">\n", gth_sa_ref_id(sa));

  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                     "<total_alignment_score>%.3f</total_alignment_score>\n",
                     gth_sa_score(sa));
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<cumulative_length_of_scored_exons>%lu"
                     "</cumulative_length_of_scored_exons>\n",
                     gth_sa_cumlen_scored_exons(sa));
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverage percentage=\"%.3f\" high_type=\"",
                     gth_sa_coverage(sa));
  gt_file_xfputc(gth_sa_coverage_char(sa), outfp);

  gt_file_xprintf(outfp, "\"/>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</MATCH_line>\n");
}
コード例 #3
0
/* The following function prints the "classic" GeneSeqer2 MATCH line */
static void showmatchline(GthSA *sa, GtFile *outfp)
{
  gt_file_xprintf(outfp, "MATCH\t%s%c\t%s%c\t%5.3f\t"GT_WU"\t%5.3f\t%c\n",
                     gth_sa_gen_id(sa),
                     gth_sa_gen_strand_char(sa),
                     gth_sa_ref_id(sa),
                     gth_sa_ref_strand_char(sa),
                     gth_sa_score(sa),
                     gth_sa_cumlen_scored_exons(sa),
                     gth_sa_coverage(sa),
                     gth_sa_coverage_char(sa));
}
コード例 #4
0
/*
  The following function prints the "classic" GeneSeqer2 PGS line
*/
static void showpgsline(GthSA *sa, GtFile *outfp)
{
  GtUword i, numofexons;
  gt_assert(sa);
  numofexons = gth_sa_num_of_exons(sa);
  gt_file_xprintf(outfp, "PGS_%s%c_%s%c\t(",
                     gth_sa_gen_id(sa),
                     gth_sa_gen_strand_char(sa),
                     gth_sa_ref_id(sa),
                     gth_sa_ref_strand_char(sa));

  for (i = 0; i < numofexons; i++) {
    gt_file_xprintf(outfp, ""GT_WU"  "GT_WU"",
                    gth_sa_left_genomic_exon_border(sa, i),
                    gth_sa_right_genomic_exon_border(sa, i));
    if (i == numofexons - 1)
      gt_file_xprintf(outfp, ")\n\n");
    else
      gt_file_xfputc(',', outfp);
  }
}
コード例 #5
0
ファイル: xml_final_sa_visitor.c プロジェクト: 9beckert/TIR
/*
  The following function prints the "classic" GeneSeqer2 PGS line
*/
static void xml_showpgsline(GthSA *sa, unsigned int indentlevel,
                            GtFile *outfp)
{
  unsigned long i, numofexons;
  gt_assert(sa);
  numofexons = gth_sa_num_of_exons(sa);
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<PGS_line>\n");
  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<gDNA gen_id=\"%s\" gen_strand=\"%c\"/>\n",
                     gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa));
  gth_indent(outfp, indentlevel);
  if (gth_sa_alphatype(sa) == DNA_ALPHA) {
    gt_file_xprintf(outfp, "<rDNA rDNA_id=\"%s\" rDNA_strand=\"%c\"/>\n",
                       gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa));
  }
  else {
    gt_file_xprintf(outfp, "<rProt rProt_id=\"%s\"/>\n",
                       gth_sa_ref_id(sa));
  }
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<gDNA_exon_coordinates>\n");
  indentlevel++;

  for (i = 0; i < numofexons; i++) {
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<exon e_start=\"%lu\" e_stop=\"%lu\"/>\n",
                    gth_sa_left_genomic_exon_border(sa, i),
                    gth_sa_right_genomic_exon_border(sa, i));
  }

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</gDNA_exon_coordinates>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</PGS_line>\n");
}
コード例 #6
0
static void xml_inter_show_spliced_alignment(GthSA *sa, GthInput *input,
                                             unsigned int indentlevel,
                                             GtFile *outfp)
{
  bool dnaalpha = true;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<spliced_alignment xmlns=\"http://www.GenomeThreader.org/"
                  "SplicedAlignment/spliced_alignment/\">\n");
  indentlevel++;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referencealphatype>");
  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp, "DNA_ALPHA");
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "PROTEIN_ALPHA");
      dnaalpha = false;
      break;
    default: gt_assert(0);
  }
  gt_file_xprintf(outfp, "</referencealphatype>\n");

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<editoperations>\n");
  indentlevel++;
  gth_backtrace_path_show_complete(gth_sa_backtrace_path(sa), true, indentlevel,
                                   outfp);
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</editoperations>\n");

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<indelcount>"GT_WU"</indelcount>\n",
                     gth_sa_indelcount(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomiclengthDP>"GT_WU"</genomiclengthDP>\n",
                     gth_sa_gen_dp_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomiclengthtotal>"GT_WU"</genomiclengthtotal>\n",
                     gth_sa_gen_total_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicoffset>"GT_WU"</genomicoffset>\n",
                     gth_sa_gen_offset(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referencelength>"GT_WU"</referencelength>\n",
                     gth_sa_ref_total_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<dpstartpos>"GT_WU"</dpstartpos>\n",
                     gth_sa_gen_dp_start(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<dpendpos>"GT_WU"</dpendpos>\n",
                     gth_sa_gen_dp_end(sa));

  showgenomicfilename(sa, input, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicseqnum>"GT_WU"</genomicseqnum>\n",
                     gth_sa_gen_seq_num(sa));

  showreferencefilename(sa, input, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referenceseqnum>"GT_WU"</referenceseqnum>\n",
                     gth_sa_ref_seq_num(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicid>%s</genomicid>\n", gth_sa_gen_id(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referenceid>%s</referenceid>\n",
                  gth_sa_ref_id(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<genomicstrandisforward>%s</genomicstrandisforward>\n",
                  GTH_SHOWBOOL(gth_sa_gen_strand_forward(sa)));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                    "<referencestrandisforward>%s</referencestrandisforward>\n",
                    GTH_SHOWBOOL(gth_sa_ref_strand_forward(sa)));

  showalignmentcutoffs(sa, indentlevel, outfp);

  showexons(sa, indentlevel, outfp);

  showintrons(sa, dnaalpha, indentlevel, outfp);

  showpolyAtailpos(sa, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<alignmentscore>%.*f</alignmentscore>\n",
                  PRECISION, gth_sa_score(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverage>%.*f</coverage>\n", PRECISION,
                     gth_sa_coverage(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverageofgenomicsegmentishighest>%s"
                  "</coverageofgenomicsegmentishighest>\n",
                  GTH_SHOWBOOL(gth_sa_genomic_cov_is_highest(sa)));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<cumulativelengthofscoredexons>"GT_WU""
                     "</cumulativelengthofscoredexons>\n",
                     gth_sa_cumlen_scored_exons(sa));

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</spliced_alignment>\n");
}
コード例 #7
0
static int callsahmt(bool call_dna_dp,
                     GthSA *sa,
                     bool forward,
                     GtUword gen_file_num,
                     GtUword ref_file_num,
                     GthChain *raw_chain,
                     GtUword gen_total_length,
                     GtUword gen_offset,
                     const GtRange *gen_seq_bounds,
                     const GtRange *gen_seq_bounds_rc,
                     const unsigned char *ref_seq_tran,
                     const unsigned char *ref_seq_orig,
                     GtUword ref_total_length,
                     GtUword ref_offset,
                     GthInput *input,
                     Introncutoutinfo *introncutoutinfo,
                     GthStat *stat,
                     GtUword chainctr,
                     GtUword num_of_chains,
                     GtUword translationtable,
                     bool directmatches,
                     bool proteinexonpenal,
                     GthSpliceSiteModel *splice_site_model,
                     GthDPOptionsCore *dp_options_core,
                     GthDPOptionsEST *dp_options_est,
                     GthDPOptionsPostpro *dp_options_postpro,
                     GthDNACompletePathMatrixJT dna_complete_path_matrix_jt,
                     GthProteinCompletePathMatrixJT
                     protein_complete_path_matrix_jt,
                     GthOutput *out)
{
  int rval;
  GthChain *actual_chain, *contracted_chain, *used_chain;
  GtUword icdelta = introncutoutinfo->icinitialdelta,
                iciterations = introncutoutinfo->iciterations;
  bool useintroncutout = introncutoutinfo->introncutout;
  /* initially useintron is set to the value of introncutoutinfo->introncutout,
     if the automatic intron cutotu technique is acitvated it can be set to
     true if an matrix allocation error (ERROR_MATRIX_ALLOCATION_FAILED) occurs
   */

  gt_assert(sa);

  actual_chain = gth_chain_new();
  contracted_chain = gth_chain_new();

  for (;;) {
    /* reset actualDPrange; */
    gt_array_set_size(actual_chain->forwardranges, 0);
    gt_array_set_size(actual_chain->reverseranges, 0);

    /* copy raw chain to actual chain */
    gth_chain_copy(actual_chain, raw_chain);

    /* shorten potential introns and compute spliced sequence, if the intron
       cutout technique is used */
    if (useintroncutout) {
      /* shorten potential introns */
      gth_chain_shorten_introns(actual_chain, icdelta,
                                introncutoutinfo->icminremintronlength,
                                gen_total_length, gen_offset, out->comments,
                                out->outfp);
    }
    else
      gth_chain_contract(contracted_chain, actual_chain);

    if (out->showverbose) {
      show_matrix_calculation_status(out->showverbose, forward,
                                     gth_sa_ref_strand_forward(sa),
                                     useintroncutout, chainctr, num_of_chains,
                                     icdelta, gen_file_num,
                                     gth_input_num_of_gen_files(input),
                                     ref_file_num,
                                     gth_input_num_of_ref_files(input),
                                     directmatches, out->verboseseqs,
                                     gth_sa_gen_id(sa), gth_sa_ref_id(sa));
    }

    /* allocate space for DP parameter */
    if (out->comments) {
      gt_file_xprintf(out->outfp, "%c alloc space for DP param "
                         "(genomicid=%s, referenceid=%s)\n", COMMENTCHAR,
                         gth_sa_gen_id(sa), gth_sa_ref_id(sa));
    }
    used_chain = useintroncutout ? actual_chain : contracted_chain;

    /* The variable 'forward' denotes the genomic strand on which the DP is
       applied. */
    if (forward) {
      if (call_dna_dp) {
        rval = gth_align_dna(sa, used_chain->forwardranges,
                             gth_input_current_gen_seq_tran(input),
                             gth_input_current_gen_seq_orig(input),
                             ref_seq_tran, ref_seq_orig, ref_total_length,
                             gth_input_current_gen_alphabet(input),
                             gth_input_current_ref_alphabet(input),
                             useintroncutout,
                             introncutoutinfo->autoicmaxmatrixsize,
                             out->showeops, out->comments, out->gs2out,
                             gen_seq_bounds, splice_site_model, dp_options_core,
                             dp_options_est, dp_options_postpro,
                             dna_complete_path_matrix_jt,
                             raw_chain->forward_jump_table, ref_offset, stat,
                             out->outfp);
      }
      else { /* call_protein_dp */
        rval = gth_align_protein(sa, used_chain->forwardranges,
                                 gth_input_current_gen_seq_tran(input),
                                 ref_seq_tran, ref_seq_orig, ref_total_length,
                                 gth_input_current_gen_alphabet(input),
                                 gth_input_current_ref_alphabet(input),
                                 input, useintroncutout,
                                 introncutoutinfo->autoicmaxmatrixsize,
                                 proteinexonpenal, out->showeops, out->comments,
                                 out->gs2out, translationtable, gen_seq_bounds,
                                 splice_site_model, dp_options_core,
                                 dp_options_postpro,
                                 protein_complete_path_matrix_jt,
                                 raw_chain->forward_jump_table, ref_offset,
                                 stat, out->outfp);
      }
    }
    else {
      /* the DP is called with the revers positions specifiers */
      if (call_dna_dp) {
        rval = gth_align_dna(sa, used_chain->reverseranges,
                             gth_input_current_gen_seq_tran_rc(input),
                             gth_input_current_gen_seq_orig_rc(input),
                             ref_seq_tran, ref_seq_orig, ref_total_length,
                             gth_input_current_gen_alphabet(input),
                             gth_input_current_ref_alphabet(input),
                             useintroncutout,
                             introncutoutinfo->autoicmaxmatrixsize,
                             out->showeops, out->comments, out->gs2out,
                             gen_seq_bounds_rc, splice_site_model,
                             dp_options_core, dp_options_est,
                             dp_options_postpro, dna_complete_path_matrix_jt,
                             raw_chain->reverse_jump_table, ref_offset, stat,
                             out->outfp);
      }
      else { /* call_protein_dp */
        rval = gth_align_protein(sa, used_chain->reverseranges,
                                 gth_input_current_gen_seq_tran_rc(input),
                                 ref_seq_tran, ref_seq_orig, ref_total_length,
                                 gth_input_current_gen_alphabet(input),
                                 gth_input_current_ref_alphabet(input),
                                 input, useintroncutout,
                                 introncutoutinfo->autoicmaxmatrixsize,
                                 proteinexonpenal, out->showeops, out->comments,
                                 out->gs2out, translationtable, gen_seq_bounds,
                                 splice_site_model, dp_options_core,
                                 dp_options_postpro,
                                 protein_complete_path_matrix_jt,
                                 raw_chain->reverse_jump_table, ref_offset,
                                 stat, out->outfp);
      }
    }

    if (rval == GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED)
      return GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED;

    /* handling of special error codes ERROR_CUTOUT_NOT_IN_INTRON and
       ERROR_MATRIX_ALLOCATION_FAILED from DP
       the only possible special error code given back by this function is
       ERROR_SA_COULD_NOT_BE_DETERMINED */
#ifndef NDEBUG
    if (!useintroncutout) gt_assert(rval != GTH_ERROR_CUTOUT_NOT_IN_INTRON);
#endif
    if (useintroncutout && rval == GTH_ERROR_CUTOUT_NOT_IN_INTRON) {
      /* the intron cutout technique failed -> increase counter */
      gth_stat_increment_numofunsuccessfulintroncutoutDPs(stat);
      if (--iciterations > 0) {
        /* if an iterations is left, increase icdelta, decrease the remaining
           iterations, and continue the while-loop */
        icdelta += introncutoutinfo->icdeltaincrease;
        continue;
      }
      else {
        /* no iteration left, discard SA */
        gth_stat_increment_numofundeterminedSAs(stat);
        gth_chain_delete(actual_chain);
        gth_chain_delete(contracted_chain);
        return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED;
      }
    }
    else if (rval == GTH_ERROR_MATRIX_ALLOCATION_FAILED) {
      if (introncutoutinfo->autoicmaxmatrixsize > 0 && !useintroncutout) {
        /* if the automatic intron cutout technique is enabled and a ``normal''
           DP returned with the matrix allocation error, set useintroncutout,
           increase counter, and continue */
        if (out->showverbose) {
          out->showverbose("matrix allocation failed, use intron cutout "
                           "technique");
        }
        gth_stat_increment_numofautointroncutoutcalls(stat);
        useintroncutout = true;
        continue;
      }
      else {
        /* otherwise increase relevant statistics, free space and return with
           error */
        gth_stat_increment_numoffailedmatrixallocations(stat);
        gth_stat_increment_numofundeterminedSAs(stat);
        gth_chain_delete(actual_chain);
        gth_chain_delete(contracted_chain);
        return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED;
      }
    }
    else if (rval) /* ``normal'' DP */
      return -1;
    break;
  }

#if 0
  if (out->comments) {
    gt_file_xprintf(out->outfp, "%c this SA has been computed:\n", COMMENTCHAR);
    gth_sa_show(sa, input, out->outfp);
  }
#endif

  /* free */
  gth_chain_delete(actual_chain);
  gth_chain_delete(contracted_chain);

  return 0;
}