static void end_element_handler(void *info, const XML_Char *name) { Parseinfo *parseinfo = (Parseinfo*) info; GthSA *sa = parseinfo->currentSA; GtUword datalength; double retdouble; GtWord ret; char *data; /* save data and data length */ data = gt_str_get(parseinfo->databuf); datalength = gt_str_length(parseinfo->databuf); /* perform actions depending on end tag */ if (strcmp(name, SPLICEDALIGNMENT_TAG) == 0) { /* before we store the spliced alignment we have to reverse its edit operations */ gt_assert(sa && gth_sa_backtrace_path(sa)); gth_backtrace_path_reverse(gth_sa_backtrace_path(sa)); /* ensure that before an intron which is not in phase the edit operation has length 1 (only for protein spliced alignments) */ gth_backtrace_path_ensure_length_1_before_introns( gth_sa_backtrace_path(sa)); if (parseinfo->saprocessfunc(parseinfo->data , sa, parseinfo->outputfilename, parseinfo->err)) { /* XXX */ fprintf(stderr, "error: %s\n", gt_error_get(parseinfo->err)); exit(EXIT_FAILURE); } /* reset current spliced alignment */ parseinfo->currentSA = NULL; } else if (strcmp(name, REFERENCEALPHATYPE_TAG) == 0) { if (strcmp(data, "DNA_ALPHA") == 0) gth_sa_set_alphatype(sa, DNA_ALPHA); else if (strcmp(data, "PROTEIN_ALPHA") == 0) { gth_sa_set_alphatype(sa, PROTEIN_ALPHA); } else { ILLEGAL_DATA; } } else if (strcmp(name, DNA_EOP_TYPE_TAG) == 0) { if (strcmp(data, "match") == 0) parseinfo->eoptype = EOP_TYPE_MATCH; else if (strcmp(data, "deletion") == 0) parseinfo->eoptype = EOP_TYPE_DELETION; else if (strcmp(data, "insertion") == 0) parseinfo->eoptype = EOP_TYPE_INSERTION; else if (strcmp(data, "mismatch") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH; else if (strcmp(data, "intron") == 0) parseinfo->eoptype = EOP_TYPE_INTRON; else { ILLEGAL_DATA; } } else if (strcmp(name, DNA_EOP_LENGTH_TAG) == 0) { SCANUINT; gth_backtrace_path_add_eop(gth_sa_backtrace_path(sa), parseinfo->eoptype, ret); } else if (strcmp(name, PROTEIN_EOP_TYPE_TAG) == 0) { if (strcmp(data, "match") == 0) parseinfo->eoptype = EOP_TYPE_MATCH; else if (strcmp(data, "deletion") == 0) parseinfo->eoptype = EOP_TYPE_DELETION; else if (strcmp(data, "insertion") == 0) parseinfo->eoptype = EOP_TYPE_INSERTION; else if (strcmp(data, "mismatch") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH; else if (strcmp(data, "intron") == 0) parseinfo->eoptype = EOP_TYPE_INTRON; else if (strcmp(data, "mismatch_with_1_gap") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH_WITH_1_GAP; else if (strcmp(data, "mismatch_with_2_gaps") == 0) parseinfo->eoptype = EOP_TYPE_MISMATCH_WITH_2_GAPS; else if (strcmp(data, "deletion_with_1_gap") == 0) parseinfo->eoptype = EOP_TYPE_DELETION_WITH_1_GAP; else if (strcmp(data, "deletion_with_2_gaps") == 0) parseinfo->eoptype = EOP_TYPE_DELETION_WITH_2_GAPS; else if (strcmp(data, "intron_with_1_base_left") == 0) parseinfo->eoptype = EOP_TYPE_INTRON_WITH_1_BASE_LEFT; else if (strcmp(data, "intron_with_2_bases_left") == 0) parseinfo->eoptype = EOP_TYPE_INTRON_WITH_2_BASES_LEFT; else { ILLEGAL_DATA; } } else if (strcmp(name, PROTEIN_EOP_LENGTH_TAG) == 0) { SCANUINT; gth_backtrace_path_add_eop(gth_sa_backtrace_path(sa), parseinfo->eoptype, ret); } else if (strcmp(name, INDELCOUNT_TAG) == 0) { SCANUINT; /* ignore indelcount, gets recomputed anyway */ } else if (strcmp(name, GENOMICLENGTHDP_TAG) == 0) { SCANUINT; gth_sa_set_gen_dp_length(sa, ret); } else if (strcmp(name, GENOMICLENGTHTOTAL_TAG) == 0) { SCANUINT; gth_sa_set_gen_total_length(sa, ret); } else if (strcmp(name, GENOMICOFFSET_TAG) == 0) { SCANUINT; gth_sa_set_gen_offset(sa, ret); } else if (strcmp(name, REFERENCELENGTH_TAG) == 0) { SCANUINT; gth_sa_set_ref_total_length(sa, ret); } else if (strcmp(name, DPSTARTPOS_TAG) == 0) { SCANUINT; gth_sa_set_gen_dp_start(sa, ret); } else if (strcmp(name, DPENDPOS_TAG) == 0) { SCANUINT; /* ignore DP end pos, gets recomputed from gen_dp_length anyway */ gt_assert(gth_sa_gen_dp_end(sa) == ret); } else if (strcmp(name, GENOMICFILENAME_TAG) == 0) { /* save genomic file name */ gt_str_append_cstr_nt(parseinfo->genomicfilename, data, datalength); } else if (strcmp(name, GENOMICFILEHASH_TAG) == 0) { gth_sa_set_gen_file_num(sa, process_file(parseinfo->input, gt_str_get(parseinfo->genomicfilename), data, false, UNDEF_ALPHA)); /* reset genomic filename */ gt_str_reset(parseinfo->genomicfilename); } else if (strcmp(name, GENOMICSEQNUM_TAG) == 0) { SCANUINT; gth_sa_set_gen_seq_num(sa, ret); } else if (strcmp(name, REFERENCEFILENAME_TAG) == 0) { /* save reference file name */ gt_str_append_cstr_nt(parseinfo->referencefilename, data, datalength); } else if (strcmp(name, REFERENCEFILEHASH_TAG) == 0) { gth_sa_set_ref_file_num(sa, process_file(parseinfo->input, gt_str_get(parseinfo->referencefilename), data, true, gth_sa_alphatype(sa))); /* reset reference filename */ gt_str_reset(parseinfo->referencefilename); } else if (strcmp(name, REFERENCESEQNUM_TAG) == 0) { SCANUINT; gth_sa_set_ref_seq_num(sa, ret); } else if (strcmp(name, GENOMICID_TAG) == 0) gth_sa_set_gen_id(sa, data); else if (strcmp(name, REFERENCEID_TAG) == 0) gth_sa_set_ref_id(sa, data); else if (strcmp(name, GENOMICSTRANDISFORWARD_TAG) == 0) gth_sa_set_gen_strand(sa, parse_boolean(data, parseinfo)); else if (strcmp(name, REFERENCESTRANDISFORWARD_TAG) == 0) gth_sa_set_ref_strand(sa, parse_boolean(data, parseinfo)); else if (strcmp(name, GENOMICCUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.genomiccutoff = ret; } else if (strcmp(name, REFERENCECUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.referencecutoff = ret; } else if (strcmp(name, EOPCUTOFF_TAG) == 0) { SCANUINT; parseinfo->cutoffs.eopcutoff = ret; } else if (strcmp(name, CUTOFFSSTART_TAG) == 0) gth_sa_set_cutoffs_start(sa, &parseinfo->cutoffs); else if (strcmp(name, CUTOFFSEND_TAG) == 0) gth_sa_set_cutoffs_end(sa, &parseinfo->cutoffs); else if (strcmp(name, LEFTGENOMICEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.leftgenomicexonborder = ret; } else if (strcmp(name, RIGHTGENOMICEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.rightgenomicexonborder = ret; } else if (strcmp(name, LEFTREFERENCEEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.leftreferenceexonborder = ret; } else if (strcmp(name, RIGHTREFERENCEEXONBORDER_TAG) == 0) { SCANUINT; parseinfo->exoninfo.rightreferenceexonborder = ret; } else if (strcmp(name, EXONSCORE_TAG) == 0) { SCANDOUBLE; parseinfo->exoninfo.exonscore = retdouble; } else if (strcmp(name, EXONINFO_TAG) == 0) gth_sa_add_exon(sa, &parseinfo->exoninfo); else if (strcmp(name, DONORSITEPROBABILITY_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.donorsiteprobability = (GthFlt) retdouble; } else if (strcmp(name, ACCEPTORSITEPROBABILITY_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.acceptorsiteprobability = (GthFlt) retdouble; } else if (strcmp(name, DONORSITESCORE_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.donorsitescore = retdouble; } else if (strcmp(name, ACCEPTORSITESCORE_TAG) == 0) { SCANDOUBLE; parseinfo->introninfo.acceptorsitescore = retdouble; } else if (strcmp(name, INTRONINFO_TAG) == 0) gth_sa_add_intron(sa, &parseinfo->introninfo); else if (strcmp(name, POLYASTART_TAG) == 0) { SCANUINT; gth_sa_set_polyAtail_start(sa, ret); } else if (strcmp(name, POLYAEND_TAG) == 0) { SCANUINT; gth_sa_set_polyAtail_stop(sa, ret); } else if (strcmp(name, ALIGNMENTSCORE_TAG) == 0) { SCANDOUBLE; gth_sa_set_score(sa, retdouble); } else if (strcmp(name, COVERAGE_TAG) == 0) { SCANDOUBLE; gth_sa_set_coverage(sa, retdouble); } else if (strcmp(name, COVERAGEOFGENOMICSEGMENTISHIGHEST_TAG) == 0) { gth_sa_set_highest_cov(sa, parse_boolean(data, parseinfo)); } else if (strcmp(name, CUMULATIVELENGTHOFSCOREDEXONS_TAG) == 0) { SCANUINT; gth_sa_set_cumlen_scored_exons(sa, ret); } }
static int call_dna_DP(bool directmatches, GthCallInfo *call_info, GthInput *input, GthStat *stat, GthSACollection *sa_collection, GthSA *saA, GtUword gen_file_num, GtUword ref_file_num, GtUword gen_total_length, GtUword gen_offset, const GtRange *gen_seq_bounds, const GtRange *gen_seq_bounds_rc, GtUword ref_total_length, GtUword ref_offset, GtUword chainctr, GtUword num_of_chains, GthMatchInfo *match_info, const unsigned char *ref_seq_tran, const unsigned char *ref_seq_orig, const unsigned char *ref_seq_tran_rc, const unsigned char *ref_seq_orig_rc, GthChain *chain, GthDNACompletePathMatrixJT dna_complete_path_matrix_jt, GthProteinCompletePathMatrixJT protein_complete_path_matrix_jt) { int rval; bool bothstrandsanalyzed, firstdp = true, GT_UNUSED gs2outdirectmatches = directmatches; GthSA *saB = NULL; GtFile *outfp = call_info->out->outfp; if (directmatches ? gth_input_forward(input) : gth_input_reverse(input)) { /* calculate alignment */ rval = callsahmt(true, saA, directmatches, gen_file_num, ref_file_num, chain, gen_total_length, gen_offset, gen_seq_bounds, gen_seq_bounds_rc, ref_seq_tran, ref_seq_orig, ref_total_length, ref_offset, input, &call_info->simfilterparam.introncutoutinfo, stat, chainctr, num_of_chains, call_info->translationtable, directmatches, call_info->proteinexonpenal, call_info->splice_site_model, call_info->dp_options_core, call_info->dp_options_est, call_info->dp_options_postpro, dna_complete_path_matrix_jt, protein_complete_path_matrix_jt, call_info->out); if (rval && rval != GTH_ERROR_SA_COULD_NOT_BE_DETERMINED) { /* ^ this error is treated below */ return rval; } firstdp = false; bothstrandsanalyzed = gth_input_both(input); if (rval == GTH_ERROR_SA_COULD_NOT_BE_DETERMINED || isunsuccessfulalignment(saA, call_info->out->comments, outfp)) { match_info->call_number--; /* if the spliced alignment was unsuccessful, it is deleted and the next hit is considered. */ gth_sa_delete(saA); return 0; /* continue */ } /* if not both strands are analyzed, we can save this alignment now. Otherwise we have to calculate the alignment to the other strand first and then save the better one. */ if (!bothstrandsanalyzed) save_sa(sa_collection, saA, call_info->sa_filter, match_info, stat); } if (directmatches ? gth_input_reverse(input) : gth_input_forward(input)) { if ((firstdp || gth_sa_is_poor(saA, call_info->minaveragessp)) && !call_info->cdnaforwardonly) { if (firstdp) { /* space for first alignment is already allocated, bu we have to change the direction of the genomic and the reference strand */ gth_sa_set_gen_strand(saA, !directmatches); gth_sa_set_ref_strand(saA, false); } else { /* allocating space for second alignment */ saB = gth_sa_new_and_set(!directmatches, false, input, chain->gen_file_num, chain->gen_seq_num, chain->ref_file_num, chain->ref_seq_num, match_info->call_number, gen_total_length, gen_offset, ref_total_length); } /* setting gs2outdirectmatches (for compatibility) */ gs2outdirectmatches = (bool) !directmatches; /* calculate alignment */ rval = callsahmt(true, firstdp ? saA : saB, !directmatches, gen_file_num, ref_file_num, chain, gen_total_length, gen_offset, gen_seq_bounds, gen_seq_bounds_rc, ref_seq_tran_rc, ref_seq_orig_rc, ref_total_length, ref_offset, input, &call_info->simfilterparam.introncutoutinfo, stat, chainctr, num_of_chains, call_info->translationtable, directmatches, call_info->proteinexonpenal, call_info->splice_site_model, call_info->dp_options_core, call_info->dp_options_est, call_info->dp_options_postpro, dna_complete_path_matrix_jt, protein_complete_path_matrix_jt, call_info->out); if (rval && rval != GTH_ERROR_SA_COULD_NOT_BE_DETERMINED) { /* ^ this error is treated below */ return rval; } if (firstdp) { if (rval == GTH_ERROR_SA_COULD_NOT_BE_DETERMINED || isunsuccessfulalignment(saA, call_info->out->comments, outfp)) { /* for compatibility with GS2 */ /* XXX: makes no sense. Possibly only if -gs2out is used. */ match_info->significant_match_found= true; /* if the spliced alignment was unsuccessful, it is deleted and the next hit is considered. */ gth_sa_delete(saA); return 0; /* continue */ } save_sa(sa_collection, saA, call_info->sa_filter, match_info, stat); } else /* !firstdp */ { if (rval == GTH_ERROR_SA_COULD_NOT_BE_DETERMINED || isunsuccessfulalignment(saB, call_info->out->comments, outfp) || !gth_sa_B_is_better_than_A(saA, saB)) { /* insert first SA */ save_sa(sa_collection, saA, call_info->sa_filter, match_info, stat); /* discard second SA */ gth_sa_delete(saB); } else { /* insert second SA */ save_sa(sa_collection, saB, call_info->sa_filter, match_info, stat); /* free first SA */ gth_sa_delete(saA); } } } else save_sa(sa_collection, saA, call_info->sa_filter, match_info, stat); } return 0; }