コード例 #1
0
static void xml_inter_show_spliced_alignment(GthSA *sa, GthInput *input,
                                             unsigned int indentlevel,
                                             GtFile *outfp)
{
  bool dnaalpha = true;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<spliced_alignment xmlns=\"http://www.GenomeThreader.org/"
                  "SplicedAlignment/spliced_alignment/\">\n");
  indentlevel++;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referencealphatype>");
  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp, "DNA_ALPHA");
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "PROTEIN_ALPHA");
      dnaalpha = false;
      break;
    default: gt_assert(0);
  }
  gt_file_xprintf(outfp, "</referencealphatype>\n");

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<editoperations>\n");
  indentlevel++;
  gth_backtrace_path_show_complete(gth_sa_backtrace_path(sa), true, indentlevel,
                                   outfp);
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</editoperations>\n");

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<indelcount>"GT_WU"</indelcount>\n",
                     gth_sa_indelcount(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomiclengthDP>"GT_WU"</genomiclengthDP>\n",
                     gth_sa_gen_dp_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomiclengthtotal>"GT_WU"</genomiclengthtotal>\n",
                     gth_sa_gen_total_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicoffset>"GT_WU"</genomicoffset>\n",
                     gth_sa_gen_offset(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referencelength>"GT_WU"</referencelength>\n",
                     gth_sa_ref_total_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<dpstartpos>"GT_WU"</dpstartpos>\n",
                     gth_sa_gen_dp_start(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<dpendpos>"GT_WU"</dpendpos>\n",
                     gth_sa_gen_dp_end(sa));

  showgenomicfilename(sa, input, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicseqnum>"GT_WU"</genomicseqnum>\n",
                     gth_sa_gen_seq_num(sa));

  showreferencefilename(sa, input, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referenceseqnum>"GT_WU"</referenceseqnum>\n",
                     gth_sa_ref_seq_num(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicid>%s</genomicid>\n", gth_sa_gen_id(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referenceid>%s</referenceid>\n",
                  gth_sa_ref_id(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<genomicstrandisforward>%s</genomicstrandisforward>\n",
                  GTH_SHOWBOOL(gth_sa_gen_strand_forward(sa)));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                    "<referencestrandisforward>%s</referencestrandisforward>\n",
                    GTH_SHOWBOOL(gth_sa_ref_strand_forward(sa)));

  showalignmentcutoffs(sa, indentlevel, outfp);

  showexons(sa, indentlevel, outfp);

  showintrons(sa, dnaalpha, indentlevel, outfp);

  showpolyAtailpos(sa, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<alignmentscore>%.*f</alignmentscore>\n",
                  PRECISION, gth_sa_score(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverage>%.*f</coverage>\n", PRECISION,
                     gth_sa_coverage(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverageofgenomicsegmentishighest>%s"
                  "</coverageofgenomicsegmentishighest>\n",
                  GTH_SHOWBOOL(gth_sa_genomic_cov_is_highest(sa)));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<cumulativelengthofscoredexons>"GT_WU""
                     "</cumulativelengthofscoredexons>\n",
                     gth_sa_cumlen_scored_exons(sa));

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</spliced_alignment>\n");
}
コード例 #2
0
ファイル: sa.c プロジェクト: AnnSeidel/genometools
void gth_sa_echo_alignment(const GthSA *sa, GtUword showintronmaxlen,
                           GtUword translationtable,
                           bool wildcardimplosion, GthInput *input,
                           GtFile *outfp)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  bool reverse_subject_pos = false;
  const unsigned char *gen_seq_orig, *ref_seq_orig;
  GthSeqCon *ref_seq_con;
  GtAlphabet *ref_alphabet;

  gt_assert(sa && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);
  ref_alphabet = gth_input_current_ref_alphabet(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig =
    gth_input_original_genomic_sequence(input, sa->gen_file_num,
                                        sa->gen_strand_forward)
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gthshowalignmentdna(outfp,ALIGNMENTLINEWIDTH,
                          gth_sa_get_editoperations(sa),
                          gth_sa_get_editoperations_length(sa),
                          gth_sa_indelcount(sa),
                          gen_seq_orig + genomicstartcutoff,
                          gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                          ref_seq_orig + referencestartcutoff,
                          gth_sa_ref_total_length(sa) -
                          referencetotalcutoff,
                          gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                          gth_sa_gen_offset(sa), referencestartcutoff,
                          gth_sa_gen_total_length(sa), showintronmaxlen,
                          ref_alphabet, reverse_subject_pos,
                          wildcardimplosion);
      break;
    case PROTEIN_ALPHA:
      gthshowalignmentprotein(outfp, ALIGNMENTLINEWIDTH,
                              gth_sa_get_editoperations(sa),
                              gth_sa_get_editoperations_length(sa),
                              gth_sa_indelcount(sa),
                              gen_seq_orig + genomicstartcutoff,
                              gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                              ref_seq_orig + referencestartcutoff,
                              gth_sa_ref_total_length(sa) -
                              referencetotalcutoff,
                              gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                              gth_sa_gen_offset(sa), referencestartcutoff,
                              gth_sa_gen_total_length(sa), showintronmaxlen,
                              ref_alphabet, translationtable,
                              gth_input_score_matrix(input),
                              gth_input_score_matrix_alpha(input),
                              reverse_subject_pos, wildcardimplosion);
      break;
    default: gt_assert(0);
  }
}
コード例 #3
0
ファイル: sa.c プロジェクト: AnnSeidel/genometools
GtUword gth_sa_get_alignment_lines(const GthSA *sa,
                                         unsigned char **first_line,
                                         unsigned char **second_line,
                                         unsigned char **third_line,
                                         GtUword translationtable,
                                         GthInput *input)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  GT_UNUSED bool reverse_subject_pos = false;

  gt_assert(sa && first_line && second_line && third_line && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* sequences */
  unsigned char *gen_seq_orig, *ref_seq_orig;
  GtUword cols = 0;
  GthSeqCon *ref_seq_con;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig = (unsigned char*)
    gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa),
                                        gth_sa_gen_strand_forward(sa))
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      /* compute the two alignment lines */
      cols = gthfillthetwoalignmentlines(first_line,
                                         second_line,
                                         gen_seq_orig +
                                         genomicstartcutoff,
                                         gth_sa_gen_dp_length(sa) -
                                         genomictotalcutoff,
                                         ref_seq_orig +
                                         referencestartcutoff,
                                         gth_sa_ref_total_length(sa) -
                                         referencetotalcutoff,
                                         gth_sa_get_editoperations(sa),
                                         gth_sa_get_editoperations_length(sa),
                                         0,   /* linewidth not important here */
                                         0,   /* no short introns here */
                                         NULL,/* therefore no shortintroninfo */
                                         gth_sa_indelcount(sa));
      *third_line = NULL;
      break;
    case PROTEIN_ALPHA:
      /* compute the three alignment lines */
      cols = gthfillthethreealignmentlines(first_line,
                                           second_line,
                                           third_line,
                                           gth_sa_get_editoperations(sa),
                                           gth_sa_get_editoperations_length(sa),
                                           gth_sa_indelcount(sa),
                                           gen_seq_orig +
                                           genomicstartcutoff,
                                           gth_sa_gen_dp_length(sa) -
                                           genomictotalcutoff,
                                           ref_seq_orig +
                                           referencestartcutoff,
                                           gth_sa_ref_total_length(sa) -
                                           referencetotalcutoff,
                                           translationtable);
      break;
    default: gt_assert(0);
  }

  return cols;
}
コード例 #4
0
static int callsahmt(bool call_dna_dp,
                     GthSA *sa,
                     bool forward,
                     GtUword gen_file_num,
                     GtUword ref_file_num,
                     GthChain *raw_chain,
                     GtUword gen_total_length,
                     GtUword gen_offset,
                     const GtRange *gen_seq_bounds,
                     const GtRange *gen_seq_bounds_rc,
                     const unsigned char *ref_seq_tran,
                     const unsigned char *ref_seq_orig,
                     GtUword ref_total_length,
                     GtUword ref_offset,
                     GthInput *input,
                     Introncutoutinfo *introncutoutinfo,
                     GthStat *stat,
                     GtUword chainctr,
                     GtUword num_of_chains,
                     GtUword translationtable,
                     bool directmatches,
                     bool proteinexonpenal,
                     GthSpliceSiteModel *splice_site_model,
                     GthDPOptionsCore *dp_options_core,
                     GthDPOptionsEST *dp_options_est,
                     GthDPOptionsPostpro *dp_options_postpro,
                     GthDNACompletePathMatrixJT dna_complete_path_matrix_jt,
                     GthProteinCompletePathMatrixJT
                     protein_complete_path_matrix_jt,
                     GthOutput *out)
{
  int rval;
  GthChain *actual_chain, *contracted_chain, *used_chain;
  GtUword icdelta = introncutoutinfo->icinitialdelta,
                iciterations = introncutoutinfo->iciterations;
  bool useintroncutout = introncutoutinfo->introncutout;
  /* initially useintron is set to the value of introncutoutinfo->introncutout,
     if the automatic intron cutotu technique is acitvated it can be set to
     true if an matrix allocation error (ERROR_MATRIX_ALLOCATION_FAILED) occurs
   */

  gt_assert(sa);

  actual_chain = gth_chain_new();
  contracted_chain = gth_chain_new();

  for (;;) {
    /* reset actualDPrange; */
    gt_array_set_size(actual_chain->forwardranges, 0);
    gt_array_set_size(actual_chain->reverseranges, 0);

    /* copy raw chain to actual chain */
    gth_chain_copy(actual_chain, raw_chain);

    /* shorten potential introns and compute spliced sequence, if the intron
       cutout technique is used */
    if (useintroncutout) {
      /* shorten potential introns */
      gth_chain_shorten_introns(actual_chain, icdelta,
                                introncutoutinfo->icminremintronlength,
                                gen_total_length, gen_offset, out->comments,
                                out->outfp);
    }
    else
      gth_chain_contract(contracted_chain, actual_chain);

    if (out->showverbose) {
      show_matrix_calculation_status(out->showverbose, forward,
                                     gth_sa_ref_strand_forward(sa),
                                     useintroncutout, chainctr, num_of_chains,
                                     icdelta, gen_file_num,
                                     gth_input_num_of_gen_files(input),
                                     ref_file_num,
                                     gth_input_num_of_ref_files(input),
                                     directmatches, out->verboseseqs,
                                     gth_sa_gen_id(sa), gth_sa_ref_id(sa));
    }

    /* allocate space for DP parameter */
    if (out->comments) {
      gt_file_xprintf(out->outfp, "%c alloc space for DP param "
                         "(genomicid=%s, referenceid=%s)\n", COMMENTCHAR,
                         gth_sa_gen_id(sa), gth_sa_ref_id(sa));
    }
    used_chain = useintroncutout ? actual_chain : contracted_chain;

    /* The variable 'forward' denotes the genomic strand on which the DP is
       applied. */
    if (forward) {
      if (call_dna_dp) {
        rval = gth_align_dna(sa, used_chain->forwardranges,
                             gth_input_current_gen_seq_tran(input),
                             gth_input_current_gen_seq_orig(input),
                             ref_seq_tran, ref_seq_orig, ref_total_length,
                             gth_input_current_gen_alphabet(input),
                             gth_input_current_ref_alphabet(input),
                             useintroncutout,
                             introncutoutinfo->autoicmaxmatrixsize,
                             out->showeops, out->comments, out->gs2out,
                             gen_seq_bounds, splice_site_model, dp_options_core,
                             dp_options_est, dp_options_postpro,
                             dna_complete_path_matrix_jt,
                             raw_chain->forward_jump_table, ref_offset, stat,
                             out->outfp);
      }
      else { /* call_protein_dp */
        rval = gth_align_protein(sa, used_chain->forwardranges,
                                 gth_input_current_gen_seq_tran(input),
                                 ref_seq_tran, ref_seq_orig, ref_total_length,
                                 gth_input_current_gen_alphabet(input),
                                 gth_input_current_ref_alphabet(input),
                                 input, useintroncutout,
                                 introncutoutinfo->autoicmaxmatrixsize,
                                 proteinexonpenal, out->showeops, out->comments,
                                 out->gs2out, translationtable, gen_seq_bounds,
                                 splice_site_model, dp_options_core,
                                 dp_options_postpro,
                                 protein_complete_path_matrix_jt,
                                 raw_chain->forward_jump_table, ref_offset,
                                 stat, out->outfp);
      }
    }
    else {
      /* the DP is called with the revers positions specifiers */
      if (call_dna_dp) {
        rval = gth_align_dna(sa, used_chain->reverseranges,
                             gth_input_current_gen_seq_tran_rc(input),
                             gth_input_current_gen_seq_orig_rc(input),
                             ref_seq_tran, ref_seq_orig, ref_total_length,
                             gth_input_current_gen_alphabet(input),
                             gth_input_current_ref_alphabet(input),
                             useintroncutout,
                             introncutoutinfo->autoicmaxmatrixsize,
                             out->showeops, out->comments, out->gs2out,
                             gen_seq_bounds_rc, splice_site_model,
                             dp_options_core, dp_options_est,
                             dp_options_postpro, dna_complete_path_matrix_jt,
                             raw_chain->reverse_jump_table, ref_offset, stat,
                             out->outfp);
      }
      else { /* call_protein_dp */
        rval = gth_align_protein(sa, used_chain->reverseranges,
                                 gth_input_current_gen_seq_tran_rc(input),
                                 ref_seq_tran, ref_seq_orig, ref_total_length,
                                 gth_input_current_gen_alphabet(input),
                                 gth_input_current_ref_alphabet(input),
                                 input, useintroncutout,
                                 introncutoutinfo->autoicmaxmatrixsize,
                                 proteinexonpenal, out->showeops, out->comments,
                                 out->gs2out, translationtable, gen_seq_bounds,
                                 splice_site_model, dp_options_core,
                                 dp_options_postpro,
                                 protein_complete_path_matrix_jt,
                                 raw_chain->reverse_jump_table, ref_offset,
                                 stat, out->outfp);
      }
    }

    if (rval == GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED)
      return GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED;

    /* handling of special error codes ERROR_CUTOUT_NOT_IN_INTRON and
       ERROR_MATRIX_ALLOCATION_FAILED from DP
       the only possible special error code given back by this function is
       ERROR_SA_COULD_NOT_BE_DETERMINED */
#ifndef NDEBUG
    if (!useintroncutout) gt_assert(rval != GTH_ERROR_CUTOUT_NOT_IN_INTRON);
#endif
    if (useintroncutout && rval == GTH_ERROR_CUTOUT_NOT_IN_INTRON) {
      /* the intron cutout technique failed -> increase counter */
      gth_stat_increment_numofunsuccessfulintroncutoutDPs(stat);
      if (--iciterations > 0) {
        /* if an iterations is left, increase icdelta, decrease the remaining
           iterations, and continue the while-loop */
        icdelta += introncutoutinfo->icdeltaincrease;
        continue;
      }
      else {
        /* no iteration left, discard SA */
        gth_stat_increment_numofundeterminedSAs(stat);
        gth_chain_delete(actual_chain);
        gth_chain_delete(contracted_chain);
        return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED;
      }
    }
    else if (rval == GTH_ERROR_MATRIX_ALLOCATION_FAILED) {
      if (introncutoutinfo->autoicmaxmatrixsize > 0 && !useintroncutout) {
        /* if the automatic intron cutout technique is enabled and a ``normal''
           DP returned with the matrix allocation error, set useintroncutout,
           increase counter, and continue */
        if (out->showverbose) {
          out->showverbose("matrix allocation failed, use intron cutout "
                           "technique");
        }
        gth_stat_increment_numofautointroncutoutcalls(stat);
        useintroncutout = true;
        continue;
      }
      else {
        /* otherwise increase relevant statistics, free space and return with
           error */
        gth_stat_increment_numoffailedmatrixallocations(stat);
        gth_stat_increment_numofundeterminedSAs(stat);
        gth_chain_delete(actual_chain);
        gth_chain_delete(contracted_chain);
        return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED;
      }
    }
    else if (rval) /* ``normal'' DP */
      return -1;
    break;
  }

#if 0
  if (out->comments) {
    gt_file_xprintf(out->outfp, "%c this SA has been computed:\n", COMMENTCHAR);
    gth_sa_show(sa, input, out->outfp);
  }
#endif

  /* free */
  gth_chain_delete(actual_chain);
  gth_chain_delete(contracted_chain);

  return 0;
}