static bool conversion_is_correct(GthChain *orig_chain, GthInvertedChain *inverted_chain, unsigned long gen_total_length, unsigned long gen_offset) { GthChain *check_chain; unsigned long i; check_chain = gth_chain_new(); convert_inverted_chain_to_chain(check_chain, inverted_chain, gen_total_length, gen_offset); /* compare number of (potential) exons */ if ((gt_array_size(orig_chain->forwardranges) != gt_array_size(check_chain->forwardranges)) || (gt_array_size(orig_chain->reverseranges) != gt_array_size(check_chain->reverseranges))) { gth_chain_delete(check_chain); return false; } /* compare positions of (potential) exon */ for (i = 0; i < gt_array_size(orig_chain->forwardranges); i++) { if ((((GtRange*)gt_array_get(orig_chain->forwardranges, i))->start != ((GtRange*)gt_array_get(check_chain->forwardranges, i))->start) || (((GtRange*)gt_array_get(orig_chain->forwardranges, i))->end != ((GtRange*)gt_array_get(check_chain->forwardranges, i))->end) || (((GtRange*)gt_array_get(orig_chain->reverseranges, i))->start != ((GtRange*)gt_array_get(check_chain->reverseranges, i))->start) || (((GtRange*)gt_array_get(orig_chain->reverseranges, i))->end != ((GtRange*)gt_array_get(check_chain->reverseranges, i))->end)) { gth_chain_delete(check_chain); return false; } } gth_chain_delete(check_chain); return true; }
void gth_save_chain(GtChain *chain, GtFragment *fragments, unsigned long num_of_fragments, GT_UNUSED unsigned long max_gap_width, void *data) { GthSaveChainInfo *info = (GthSaveChainInfo*) data; GtRange range; GthChain *gth_chain; unsigned long i, fragnum; gt_assert(chain_is_colinear(chain, fragments)); if (info->comments) { gt_file_xprintf(info->outfp, "%c process global chain with score %ld\n", COMMENTCHAR, gt_chain_get_score(chain)); gt_file_xprintf(info->outfp, "%c process global chain with the " "following fragments\n", COMMENTCHAR); for (i = 0; i < gt_chain_size(chain); i++) showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp); } /* init */ gth_chain = gth_chain_new(); gth_chain->gen_file_num = info->gen_file_num; gth_chain->gen_seq_num = info->gen_seq_num; gth_chain->ref_file_num = info->ref_file_num; gth_chain->ref_seq_num = info->ref_seq_num; /* chain has a minimum length of 1 */ gt_assert(gt_chain_size(chain)); /* global chain filter */ if (globalchainislongenough(chain, fragments, >h_chain->refseqcoverage, info->gcmincoverage, info->referencelength, info->stat, info->comments, info->outfp)) { /* save all potential exons */ for (i = 0; i < gt_chain_size(chain); i++) { fragnum = gt_chain_get_fragnum(chain, i); range.start = fragments[fragnum].startpos2; range.end = fragments[fragnum].endpos2; /* check for overlap */ if (i > 0 && range.start <= ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end) { /* overlap found -> modify last range */ gt_assert(((GtRange*) gt_array_get_last(gth_chain->forwardranges)) ->end <= range.end); ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end = range.end; } else { #ifndef NDEBUG if (i > 0) { /* gap width is smaller or equal than the maximum gap width */ gt_assert((range.start - 1 - ((GtRange*) gt_array_get_last(gth_chain->forwardranges)) ->end + 1 - 1) <= max_gap_width); } #endif /* save range */ gt_array_add(gth_chain->forwardranges, range); } } GtRange genomicrange = chain_get_genomicrange(gth_chain); if (info->enrichchains) { enrich_chain(gth_chain, fragments, num_of_fragments, info->comments, info->outfp); } gt_assert(gt_ranges_are_consecutive(gth_chain->forwardranges)); /* copy ranges to opposite strand */ gt_ranges_copy_to_opposite_strand(gth_chain->reverseranges, gth_chain->forwardranges, info->gen_total_length, info->gen_offset); /* compute jump table if necessary */ if (info->jump_table) { GthJumpTable *forward_jump_table, *reverse_jump_table; GtArray *chain_fragments; chain_fragments = make_list_of_chain_fragments(chain, fragments, num_of_fragments, info->enrichchains, &genomicrange); forward_jump_table = info->jump_table_new(gt_array_get_space(chain_fragments), gt_array_size(chain_fragments), info->jtdebug); reverse_jump_table = info->jump_table_new_reverse(forward_jump_table, info->gen_total_length, info->gen_offset, info->ref_total_length, info->ref_offset); gt_assert(!gth_chain->forward_jump_table); gth_chain->forward_jump_table = forward_jump_table; gt_assert(!gth_chain->reverse_jump_table); gth_chain->reverse_jump_table = reverse_jump_table; gt_array_delete(chain_fragments); gth_chain->jump_table_delete = info->jump_table_delete; } /* save array of potential exons */ gth_chain_collection_add(info->chain_collection, gth_chain); if (info->comments) { gt_file_xprintf(info->outfp, "%c global chain with the following " "ranges has been saved\n",COMMENTCHAR); gt_file_xprintf(info->outfp, "%c forward ranges:\n", COMMENTCHAR); gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR); gt_ranges_show(gth_chain->forwardranges, info->outfp); gt_file_xprintf(info->outfp, "%c reverse ranges:\n", COMMENTCHAR); gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR); gt_ranges_show(gth_chain->reverseranges, info->outfp); } /* output stored chains here (Mohamed needed this to compare the chaining phase of gth with CHAINER) */ if (info->stopafterchaining) { gt_file_xprintf(info->outfp, "%c gl. chain with coverage=%.2f and score %ld " "(genseq=%lu, str.=%c, refseq=%lu)\n", COMMENTCHAR, gth_chain->refseqcoverage, gt_chain_get_score(chain), gth_chain->gen_seq_num, SHOWSTRAND(info->directmatches), gth_chain->ref_seq_num); for (i = 0; i < gt_chain_size(chain); i++) showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp); } } else { /* for -paralogs this case is not supposed to occur */ gt_assert(!info->paralogs); if (info->comments) gt_file_xprintf(info->outfp, "%c global chain discarded\n", COMMENTCHAR); gth_chain_delete(gth_chain); } }
static int callsahmt(bool call_dna_dp, GthSA *sa, bool forward, GtUword gen_file_num, GtUword ref_file_num, GthChain *raw_chain, GtUword gen_total_length, GtUword gen_offset, const GtRange *gen_seq_bounds, const GtRange *gen_seq_bounds_rc, const unsigned char *ref_seq_tran, const unsigned char *ref_seq_orig, GtUword ref_total_length, GtUword ref_offset, GthInput *input, Introncutoutinfo *introncutoutinfo, GthStat *stat, GtUword chainctr, GtUword num_of_chains, GtUword translationtable, bool directmatches, bool proteinexonpenal, GthSpliceSiteModel *splice_site_model, GthDPOptionsCore *dp_options_core, GthDPOptionsEST *dp_options_est, GthDPOptionsPostpro *dp_options_postpro, GthDNACompletePathMatrixJT dna_complete_path_matrix_jt, GthProteinCompletePathMatrixJT protein_complete_path_matrix_jt, GthOutput *out) { int rval; GthChain *actual_chain, *contracted_chain, *used_chain; GtUword icdelta = introncutoutinfo->icinitialdelta, iciterations = introncutoutinfo->iciterations; bool useintroncutout = introncutoutinfo->introncutout; /* initially useintron is set to the value of introncutoutinfo->introncutout, if the automatic intron cutotu technique is acitvated it can be set to true if an matrix allocation error (ERROR_MATRIX_ALLOCATION_FAILED) occurs */ gt_assert(sa); actual_chain = gth_chain_new(); contracted_chain = gth_chain_new(); for (;;) { /* reset actualDPrange; */ gt_array_set_size(actual_chain->forwardranges, 0); gt_array_set_size(actual_chain->reverseranges, 0); /* copy raw chain to actual chain */ gth_chain_copy(actual_chain, raw_chain); /* shorten potential introns and compute spliced sequence, if the intron cutout technique is used */ if (useintroncutout) { /* shorten potential introns */ gth_chain_shorten_introns(actual_chain, icdelta, introncutoutinfo->icminremintronlength, gen_total_length, gen_offset, out->comments, out->outfp); } else gth_chain_contract(contracted_chain, actual_chain); if (out->showverbose) { show_matrix_calculation_status(out->showverbose, forward, gth_sa_ref_strand_forward(sa), useintroncutout, chainctr, num_of_chains, icdelta, gen_file_num, gth_input_num_of_gen_files(input), ref_file_num, gth_input_num_of_ref_files(input), directmatches, out->verboseseqs, gth_sa_gen_id(sa), gth_sa_ref_id(sa)); } /* allocate space for DP parameter */ if (out->comments) { gt_file_xprintf(out->outfp, "%c alloc space for DP param " "(genomicid=%s, referenceid=%s)\n", COMMENTCHAR, gth_sa_gen_id(sa), gth_sa_ref_id(sa)); } used_chain = useintroncutout ? actual_chain : contracted_chain; /* The variable 'forward' denotes the genomic strand on which the DP is applied. */ if (forward) { if (call_dna_dp) { rval = gth_align_dna(sa, used_chain->forwardranges, gth_input_current_gen_seq_tran(input), gth_input_current_gen_seq_orig(input), ref_seq_tran, ref_seq_orig, ref_total_length, gth_input_current_gen_alphabet(input), gth_input_current_ref_alphabet(input), useintroncutout, introncutoutinfo->autoicmaxmatrixsize, out->showeops, out->comments, out->gs2out, gen_seq_bounds, splice_site_model, dp_options_core, dp_options_est, dp_options_postpro, dna_complete_path_matrix_jt, raw_chain->forward_jump_table, ref_offset, stat, out->outfp); } else { /* call_protein_dp */ rval = gth_align_protein(sa, used_chain->forwardranges, gth_input_current_gen_seq_tran(input), ref_seq_tran, ref_seq_orig, ref_total_length, gth_input_current_gen_alphabet(input), gth_input_current_ref_alphabet(input), input, useintroncutout, introncutoutinfo->autoicmaxmatrixsize, proteinexonpenal, out->showeops, out->comments, out->gs2out, translationtable, gen_seq_bounds, splice_site_model, dp_options_core, dp_options_postpro, protein_complete_path_matrix_jt, raw_chain->forward_jump_table, ref_offset, stat, out->outfp); } } else { /* the DP is called with the revers positions specifiers */ if (call_dna_dp) { rval = gth_align_dna(sa, used_chain->reverseranges, gth_input_current_gen_seq_tran_rc(input), gth_input_current_gen_seq_orig_rc(input), ref_seq_tran, ref_seq_orig, ref_total_length, gth_input_current_gen_alphabet(input), gth_input_current_ref_alphabet(input), useintroncutout, introncutoutinfo->autoicmaxmatrixsize, out->showeops, out->comments, out->gs2out, gen_seq_bounds_rc, splice_site_model, dp_options_core, dp_options_est, dp_options_postpro, dna_complete_path_matrix_jt, raw_chain->reverse_jump_table, ref_offset, stat, out->outfp); } else { /* call_protein_dp */ rval = gth_align_protein(sa, used_chain->reverseranges, gth_input_current_gen_seq_tran_rc(input), ref_seq_tran, ref_seq_orig, ref_total_length, gth_input_current_gen_alphabet(input), gth_input_current_ref_alphabet(input), input, useintroncutout, introncutoutinfo->autoicmaxmatrixsize, proteinexonpenal, out->showeops, out->comments, out->gs2out, translationtable, gen_seq_bounds, splice_site_model, dp_options_core, dp_options_postpro, protein_complete_path_matrix_jt, raw_chain->reverse_jump_table, ref_offset, stat, out->outfp); } } if (rval == GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED) return GTH_ERROR_DP_PARAMETER_ALLOCATION_FAILED; /* handling of special error codes ERROR_CUTOUT_NOT_IN_INTRON and ERROR_MATRIX_ALLOCATION_FAILED from DP the only possible special error code given back by this function is ERROR_SA_COULD_NOT_BE_DETERMINED */ #ifndef NDEBUG if (!useintroncutout) gt_assert(rval != GTH_ERROR_CUTOUT_NOT_IN_INTRON); #endif if (useintroncutout && rval == GTH_ERROR_CUTOUT_NOT_IN_INTRON) { /* the intron cutout technique failed -> increase counter */ gth_stat_increment_numofunsuccessfulintroncutoutDPs(stat); if (--iciterations > 0) { /* if an iterations is left, increase icdelta, decrease the remaining iterations, and continue the while-loop */ icdelta += introncutoutinfo->icdeltaincrease; continue; } else { /* no iteration left, discard SA */ gth_stat_increment_numofundeterminedSAs(stat); gth_chain_delete(actual_chain); gth_chain_delete(contracted_chain); return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED; } } else if (rval == GTH_ERROR_MATRIX_ALLOCATION_FAILED) { if (introncutoutinfo->autoicmaxmatrixsize > 0 && !useintroncutout) { /* if the automatic intron cutout technique is enabled and a ``normal'' DP returned with the matrix allocation error, set useintroncutout, increase counter, and continue */ if (out->showverbose) { out->showverbose("matrix allocation failed, use intron cutout " "technique"); } gth_stat_increment_numofautointroncutoutcalls(stat); useintroncutout = true; continue; } else { /* otherwise increase relevant statistics, free space and return with error */ gth_stat_increment_numoffailedmatrixallocations(stat); gth_stat_increment_numofundeterminedSAs(stat); gth_chain_delete(actual_chain); gth_chain_delete(contracted_chain); return GTH_ERROR_SA_COULD_NOT_BE_DETERMINED; } } else if (rval) /* ``normal'' DP */ return -1; break; } #if 0 if (out->comments) { gt_file_xprintf(out->outfp, "%c this SA has been computed:\n", COMMENTCHAR); gth_sa_show(sa, input, out->outfp); } #endif /* free */ gth_chain_delete(actual_chain); gth_chain_delete(contracted_chain); return 0; }