static void xml_showgthgenomicinformation(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { gt_assert(gth_sa_gen_file_num(sa) != GT_UNDEF_ULONG); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_segment>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<template temp_file=\"%s\" temp_id=\"%s\" " "temp_strand=\"%c\" temp_description=\"", gth_input_get_genomic_filename(input, gth_sa_gen_file_num(sa)), gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa)); gth_input_echo_genomic_description(input, gth_sa_gen_file_num(sa), gth_sa_gen_seq_num(sa), outfp); gt_file_xprintf(outfp, "\">\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<position start=\"%lu\" stop=\"%lu\"/>\n", gth_sa_gen_dp_start_show(sa), gth_sa_gen_dp_end_show(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</template>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</gDNA_segment>\n"); }
/* The following function prints the "classic" GeneSeqer2 MATCH line */ static void xml_showmatchline(GthSA *sa, unsigned int indentlevel, GtFile *outfp) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<MATCH_line gen_id=\"%s\" gen_strand=\"%c\" ", gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa)); if (gth_sa_alphatype(sa) == DNA_ALPHA) { gt_file_xprintf(outfp, "ref_id=\"%s\" ref_strand=\"%c\">\n", gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa)); } else gt_file_xprintf(outfp, "ref_id=\"%s\">\n", gth_sa_ref_id(sa)); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<total_alignment_score>%.3f</total_alignment_score>\n", gth_sa_score(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<cumulative_length_of_scored_exons>%lu" "</cumulative_length_of_scored_exons>\n", gth_sa_cumlen_scored_exons(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<coverage percentage=\"%.3f\" high_type=\"", gth_sa_coverage(sa)); gt_file_xfputc(gth_sa_coverage_char(sa), outfp); gt_file_xprintf(outfp, "\"/>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</MATCH_line>\n"); }
/* The following function prints the "classic" GeneSeqer2 MATCH line */ static void showmatchline(GthSA *sa, GtFile *outfp) { gt_file_xprintf(outfp, "MATCH\t%s%c\t%s%c\t%5.3f\t"GT_WU"\t%5.3f\t%c\n", gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa), gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa), gth_sa_score(sa), gth_sa_cumlen_scored_exons(sa), gth_sa_coverage(sa), gth_sa_coverage_char(sa)); }
/* The following function prints the "classic" GeneSeqer2 PGS line */ static void showpgsline(GthSA *sa, GtFile *outfp) { GtUword i, numofexons; gt_assert(sa); numofexons = gth_sa_num_of_exons(sa); gt_file_xprintf(outfp, "PGS_%s%c_%s%c\t(", gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa), gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa)); for (i = 0; i < numofexons; i++) { gt_file_xprintf(outfp, ""GT_WU" "GT_WU"", gth_sa_left_genomic_exon_border(sa, i), gth_sa_right_genomic_exon_border(sa, i)); if (i == numofexons - 1) gt_file_xprintf(outfp, ")\n\n"); else gt_file_xfputc(',', outfp); } }
/* The following function prints the "classic" GeneSeqer2 PGS line */ static void xml_showpgsline(GthSA *sa, unsigned int indentlevel, GtFile *outfp) { unsigned long i, numofexons; gt_assert(sa); numofexons = gth_sa_num_of_exons(sa); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<PGS_line>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA gen_id=\"%s\" gen_strand=\"%c\"/>\n", gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa)); gth_indent(outfp, indentlevel); if (gth_sa_alphatype(sa) == DNA_ALPHA) { gt_file_xprintf(outfp, "<rDNA rDNA_id=\"%s\" rDNA_strand=\"%c\"/>\n", gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa)); } else { gt_file_xprintf(outfp, "<rProt rProt_id=\"%s\"/>\n", gth_sa_ref_id(sa)); } gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_exon_coordinates>\n"); indentlevel++; for (i = 0; i < numofexons; i++) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon e_start=\"%lu\" e_stop=\"%lu\"/>\n", gth_sa_left_genomic_exon_border(sa, i), gth_sa_right_genomic_exon_border(sa, i)); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</gDNA_exon_coordinates>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</PGS_line>\n"); }
static void xml_inter_show_spliced_alignment(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { bool dnaalpha = true; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<spliced_alignment xmlns=\"http://www.GenomeThreader.org/" "SplicedAlignment/spliced_alignment/\">\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencealphatype>"); switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: gt_file_xprintf(outfp, "DNA_ALPHA"); break; case PROTEIN_ALPHA: gt_file_xprintf(outfp, "PROTEIN_ALPHA"); dnaalpha = false; break; default: gt_assert(0); } gt_file_xprintf(outfp, "</referencealphatype>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<editoperations>\n"); indentlevel++; gth_backtrace_path_show_complete(gth_sa_backtrace_path(sa), true, indentlevel, outfp); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</editoperations>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<indelcount>"GT_WU"</indelcount>\n", gth_sa_indelcount(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomiclengthDP>"GT_WU"</genomiclengthDP>\n", gth_sa_gen_dp_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomiclengthtotal>"GT_WU"</genomiclengthtotal>\n", gth_sa_gen_total_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicoffset>"GT_WU"</genomicoffset>\n", gth_sa_gen_offset(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencelength>"GT_WU"</referencelength>\n", gth_sa_ref_total_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<dpstartpos>"GT_WU"</dpstartpos>\n", gth_sa_gen_dp_start(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<dpendpos>"GT_WU"</dpendpos>\n", gth_sa_gen_dp_end(sa)); showgenomicfilename(sa, input, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicseqnum>"GT_WU"</genomicseqnum>\n", gth_sa_gen_seq_num(sa)); showreferencefilename(sa, input, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referenceseqnum>"GT_WU"</referenceseqnum>\n", gth_sa_ref_seq_num(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicid>%s</genomicid>\n", gth_sa_gen_id(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referenceid>%s</referenceid>\n", gth_sa_ref_id(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicstrandisforward>%s</genomicstrandisforward>\n", GTH_SHOWBOOL(gth_sa_gen_strand_forward(sa))); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencestrandisforward>%s</referencestrandisforward>\n", GTH_SHOWBOOL(gth_sa_ref_strand_forward(sa))); showalignmentcutoffs(sa, indentlevel, outfp); showexons(sa, indentlevel, outfp); showintrons(sa, dnaalpha, indentlevel, outfp); showpolyAtailpos(sa, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<alignmentscore>%.*f</alignmentscore>\n", PRECISION, gth_sa_score(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<coverage>%.*f</coverage>\n", PRECISION, gth_sa_coverage(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<coverageofgenomicsegmentishighest>%s" "</coverageofgenomicsegmentishighest>\n", GTH_SHOWBOOL(gth_sa_genomic_cov_is_highest(sa))); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<cumulativelengthofscoredexons>"GT_WU"" "</cumulativelengthofscoredexons>\n", gth_sa_cumlen_scored_exons(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</spliced_alignment>\n"); }
static int callsahmt(bool call_dna_dp, GthSA *sa, bool forward, GtUword gen_file_num, GtUword ref_file_num, GthChain *raw_chain, GtUword gen_total_length, GtUword gen_offset, const GtRange *gen_seq_bounds, const GtRange *gen_seq_bounds_rc, const unsigned char *ref_seq_tran, const unsigned char *ref_seq_orig, GtUword ref_total_length, GtUword ref_offset, GthInput *input, Introncutoutinfo *introncutoutinfo, GthStat *stat, GtUword chainctr, GtUword num_of_chains, GtUword translationtable, bool directmatches, bool proteinexonpenal, GthSpliceSiteModel *splice_site_model, GthDPOptionsCore *dp_options_core, GthDPOptionsEST *dp_options_est, GthDPOptionsPostpro *dp_options_postpro, GthDNACompletePathMatrixJT dna_complete_path_matrix_jt, GthProteinCompletePathMatrixJT protein_complete_path_matrix_jt, GthOutput *out) { int rval; GthChain *actual_chain, *contracted_chain, *used_chain; GtUword icdelta = introncutoutinfo->icinitialdelta, iciterations = introncutoutinfo->iciterations; bool useintroncutout = introncutoutinfo->introncutout; /* initially useintron is set to the value of introncutoutinfo->introncutout, if the automatic intron cutotu technique is acitvated it can be set to true if an matrix allocation error (ERROR_MATRIX_ALLOCATION_FAILED) occurs */ gt_assert(sa); actual_chain = gth_chain_new(); contracted_chain = gth_chain_new(); for (;;) { /* reset actualDPrange; */ gt_array_set_size(actual_chain->forwardranges, 0); gt_array_set_size(actual_chain->reverseranges, 0); /* copy raw chain to actual chain */ gth_chain_copy(actual_chain, raw_chain); /* shorten potential introns and compute spliced sequence, if the intron cutout technique is used */ if (useintroncutout) { /* shorten potential introns */ gth_chain_shorten_introns(actual_chain, icdelta, introncutoutinfo->icminremintronlength, gen_total_length, gen_offset, out->comments, out->outfp); } else gth_chain_contract(contracted_chain, actual_chain); if (out->showverbose) { show_matrix_calculation_status(out->showverbose, forward, gth_sa_ref_strand_forward(sa), useintroncutout, chainctr, num_of_chains, icdelta, gen_file_num, gth_input_num_of_gen_files(input), ref_file_num, gth_input_num_of_ref_files(input), directmatches, out->verboseseqs, gth_sa_gen_id(sa), gth_sa_ref_id(sa)); } /* allocate space for DP parameter */ if (out->comments) { gt_file_xprintf(out->outfp, "%c alloc space for DP param " "(genomicid=%s, referenceid=%s)\n", COMMENTCHAR, gth_sa_gen_id(sa), gth_sa_ref_id(sa)); } used_chain = useintroncutout ? actual_chain : contracted_chain; /* The variable 'forward' denotes the genomic strand on which the DP is applied. */ if (forward) { if (call_dna_dp) { rval = gth_align_dna(sa, used_chain->forwardranges, gth_input_current_gen_seq_tran(input), gth_input_current_gen_seq_orig(input), ref_seq_tran, ref_seq_orig, ref_total_length, gth_input_current_gen_alphabet(input), gth_input_current_ref_alphabet(input), useintroncutout, introncutoutinfo->autoicmaxmatrixsize, out->showeops, out->comments, out->gs2out, gen_seq_bounds, splice_site_model, dp_options_core, dp_options_est, dp_options_postpro, dna_complete_path_matrix_jt, raw_chain->forward_jump_table, ref_offset, stat, out->outfp); } else { /* call_protein_dp */ rval = gth_align_protein(sa, used_chain->forwardranges, gth_input_current_gen_seq_tran(input), ref_seq_tran, ref_seq_orig, ref_total_length, gth_input_current_gen_alphabet(input), gth_input_current_ref_alphabet(input), input, useintroncutout, introncutoutinfo->autoicmaxmatrixsize, proteinexonpenal, out->showeops, out->comments, out->gs2out, translationtable, gen_seq_bounds, splice_site_model, dp_options_core, dp_options_postpro, protein_complete_path_matrix_jt, raw_chain->forward_jump_table, ref_offset, stat, out->outfp); } } else { /* the DP is called with the revers positions specifiers */ if (call_dna_dp) { rval = gth_align_dna(sa, used_chain->reverseranges, gth_input_current_gen_seq_tran_rc(input), gth_input_current_gen_seq_orig_rc(input), ref_seq_tran, ref_seq_orig, ref_total_length, gth_input_current_gen_alphabet(input), gth_input_current_ref_alphabet(input), useintroncutout, introncutoutinfo->autoicmaxmatrixsize, out->showeops, out->comments, out->gs2out, gen_seq_bounds_rc, splice_site_model, dp_options_core, dp_options_est, dp_options_postpro, dna_complete_path_matrix_jt, raw_chain->reverse_jump_table, ref_offset, stat, out->outfp); } else { /* call_protein_dp */ rval = gth_align_protein(sa, used_chain->reverseranges, gth_input_current_gen_seq_tran_rc(input), ref_seq_tran, ref_seq_orig, ref_total_length, gth_input_current_gen_alphabet(input), gth_input_current_ref_alphabet(input), input, useintroncutout, introncutoutinfo->autoicmaxmatrixsize, proteinexonpenal, out->showeops, out->comments, out->gs2out, translationtable, gen_seq_bounds, splice_site_model, dp_options_core, dp_options_postpro, protein_complete_path_matrix_jt, raw_chain->reverse_jump_table, ref_offset, stat, out->outfp); } } if (rval == GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED) return GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED; /* handling of special error codes ERROR_CUTOUT_NOT_IN_INTRON and ERROR_MATRIX_ALLOCATION_FAILED from DP the only possible special error code given back by this function is ERROR_SA_COULD_NOT_BE_DETERMINED */ #ifndef NDEBUG if (!useintroncutout) gt_assert(rval != GTH_ERROR_CUTOUT_NOT_IN_INTRON); #endif if (useintroncutout && rval == GTH_ERROR_CUTOUT_NOT_IN_INTRON) { /* the intron cutout technique failed -> increase counter */ gth_stat_increment_numofunsuccessfulintroncutoutDPs(stat); if (--iciterations > 0) { /* if an iterations is left, increase icdelta, decrease the remaining iterations, and continue the while-loop */ icdelta += introncutoutinfo->icdeltaincrease; continue; } else { /* no iteration left, discard SA */ gth_stat_increment_numofundeterminedSAs(stat); gth_chain_delete(actual_chain); gth_chain_delete(contracted_chain); return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED; } } else if (rval == GTH_ERROR_MATRIX_ALLOCATION_FAILED) { if (introncutoutinfo->autoicmaxmatrixsize > 0 && !useintroncutout) { /* if the automatic intron cutout technique is enabled and a ``normal'' DP returned with the matrix allocation error, set useintroncutout, increase counter, and continue */ if (out->showverbose) { out->showverbose("matrix allocation failed, use intron cutout " "technique"); } gth_stat_increment_numofautointroncutoutcalls(stat); useintroncutout = true; continue; } else { /* otherwise increase relevant statistics, free space and return with error */ gth_stat_increment_numoffailedmatrixallocations(stat); gth_stat_increment_numofundeterminedSAs(stat); gth_chain_delete(actual_chain); gth_chain_delete(contracted_chain); return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED; } } else if (rval) /* ``normal'' DP */ return -1; break; } #if 0 if (out->comments) { gt_file_xprintf(out->outfp, "%c this SA has been computed:\n", COMMENTCHAR); gth_sa_show(sa, input, out->outfp); } #endif /* free */ gth_chain_delete(actual_chain); gth_chain_delete(contracted_chain); return 0; }