GtUword gth_sa_get_alignment_lines(const GthSA *sa, unsigned char **first_line, unsigned char **second_line, unsigned char **third_line, GtUword translationtable, GthInput *input) { GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff, referencestartcutoff, referenceendcutoff, referencetotalcutoff; GT_UNUSED bool reverse_subject_pos = false; gt_assert(sa && first_line && second_line && third_line && input); /* only for cosmetic reasons */ genomicstartcutoff = gth_sa_genomiccutoff_start(sa); genomicendcutoff = gth_sa_genomiccutoff_end(sa); genomictotalcutoff = genomicstartcutoff + genomicendcutoff; referencestartcutoff = gth_sa_referencecutoff_start(sa); referenceendcutoff = gth_sa_referencecutoff_end(sa); referencetotalcutoff = referencestartcutoff + referenceendcutoff; /* sequences */ unsigned char *gen_seq_orig, *ref_seq_orig; GtUword cols = 0; GthSeqCon *ref_seq_con; /* make sure that the correct files are loaded */ gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false); ref_seq_con = gth_input_current_ref_seq_con(input); /* If the reverse complement of the genomic DNA is considered, this opition is needed for correct output of the genomic sequence positions by the function showalignmentgeneric() */ if (!gth_sa_gen_strand_forward(sa)) reverse_subject_pos = true; /* get genomic sequence */ gen_seq_orig = (unsigned char*) gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa), gth_sa_gen_strand_forward(sa)) + gth_sa_gen_dp_start(sa); /* get reference sequence */ if (gth_sa_ref_strand_forward(sa)) { ref_seq_orig = gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa)); } else { ref_seq_orig = gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa)); } switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: /* compute the two alignment lines */ cols = gthfillthetwoalignmentlines(first_line, second_line, gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), 0, /* linewidth not important here */ 0, /* no short introns here */ NULL,/* therefore no shortintroninfo */ gth_sa_indelcount(sa)); *third_line = NULL; break; case PROTEIN_ALPHA: /* compute the three alignment lines */ cols = gthfillthethreealignmentlines(first_line, second_line, third_line, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), gth_sa_indelcount(sa), gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, translationtable); break; default: gt_assert(0); } return cols; }
void gth_chaining(GthChainCollection *chain_collection, GtUword gen_file_num, GtUword ref_file_num, GthCallInfo *call_info, GthInput *input, GthStat *stat, bool directmatches, const GthPlugins *plugins) { GtUword i, numofsequences = 0; GtArray *matches; GthChainingInfo chaining_info; void *matcher_arguments; GtFile *outfp = call_info->out->outfp; GthMatchProcessorInfo match_processor_info; bool refseqisdna = gth_input_ref_file_is_dna(input, ref_file_num); /* make sure matcher is defined */ gt_assert(plugins); gt_assert(plugins->matcher_arguments_new); gt_assert(plugins->matcher_arguments_delete); gt_assert(plugins->matcher_runner); /* init */ matches = gt_array_new(sizeof (GthMatch)); chaining_info_init(&chaining_info, directmatches, refseqisdna, call_info, input, stat, gen_file_num, ref_file_num); matcher_arguments = plugins->matcher_arguments_new(true, input, call_info->simfilterparam.inverse || !refseqisdna ? gth_input_get_genomic_filename(input, gen_file_num) : gth_input_get_reference_filename(input, ref_file_num), call_info->simfilterparam.inverse || !refseqisdna ? gth_input_get_reference_filename(input, ref_file_num) : gth_input_get_genomic_filename(input, gen_file_num), directmatches, refseqisdna, call_info->progname, gt_str_get(gth_input_proteinsmap(input)), call_info->simfilterparam.exact, call_info->simfilterparam.edist, false, 0, call_info->simfilterparam.minmatchlength, call_info->simfilterparam.seedlength, call_info->simfilterparam.exdrop, call_info->simfilterparam.prminmatchlen, call_info->simfilterparam.prseedlength, call_info->simfilterparam.prhdist, call_info->translationtable, call_info->simfilterparam.online, call_info->simfilterparam.noautoindex, call_info->simfilterparam.maskpolyAtails, false); match_processor_info_init(&match_processor_info, matches, chain_collection, directmatches, refseqisdna, call_info->simfilterparam.online, call_info->simfilterparam.inverse, stat, &chaining_info, call_info->simfilterparam.maxnumofmatches, call_info->simfilterparam.rare, call_info->fragweightfactor, plugins->jump_table_new, plugins->jump_table_new_reverse, plugins->jump_table_delete); if (call_info->simfilterparam.maxnumofmatches > 0 || gth_stat_get_matchnumdistri(stat)) { /* alloc space of match number counter */ numofsequences = gth_input_num_of_ref_seqs(input, ref_file_num); match_processor_info.matchnumcounter = gt_malloc(sizeof (GtUword) * numofsequences); /* init match number counter to 0 */ memset(match_processor_info.matchnumcounter, 0, (size_t) numofsequences * sizeof (GtUword)); } /* free input, which contains the virtual trees. because vmatch loads the virtual trees into memory, too. this prevents that the virtual trees are loaded twice. */ gth_input_delete_current(input); /* call matcher */ if (call_info->out->showverbose) call_info->out->showverbose("call vmatch to compute matches"); plugins->matcher_runner(matcher_arguments, call_info->out->showverbose, call_info->out->showverboseVM, &match_processor_info); /* free matcher stuff here, because otherwise the reference file is mapped twice below */ plugins->matcher_arguments_delete(matcher_arguments); /* free sequence collections (if they have been filled by the matcher) */ gth_seq_con_delete(match_processor_info.gen_seq_con); gth_seq_con_delete(match_processor_info.ref_seq_con); /* save match numbers of match number distribution, if necessary */ if (gth_stat_get_matchnumdistri(stat)) { for (i = 0; i < numofsequences; i++) { if (match_processor_info.matchnumcounter[i] > 0) { gth_stat_add_to_matchnumdistri(stat, match_processor_info.matchnumcounter[i]); } } } /* free match number counter */ gt_free(match_processor_info.matchnumcounter); /* return if no match has been found */ if (!gt_array_size(matches)) { if (call_info->out->comments) gt_file_xprintf(outfp, "%c no match has been found\n", COMMENTCHAR); gt_array_delete(matches); return; } /* load genomic file back into memory */ gth_input_load_genomic_file(input, gen_file_num, true); /* load reference file back into memory */ gth_input_load_reference_file(input, ref_file_num, true); /* compute chains from matches */ calc_chains_from_matches(chain_collection, matches, &chaining_info, gth_input_current_gen_seq_con(input), gth_input_current_ref_seq_con(input), call_info->simfilterparam.rare, call_info->fragweightfactor, plugins->jump_table_new, plugins->jump_table_new_reverse, plugins->jump_table_delete); if (call_info->out->showverbose) { call_info->out->showverbose("sort global chains according to reference " "sequence coverage"); } /* sort chains */ gth_chain_collection_sort(chain_collection); /* free */ gt_array_delete(matches); }
void gth_sa_echo_alignment(const GthSA *sa, GtUword showintronmaxlen, GtUword translationtable, bool wildcardimplosion, GthInput *input, GtFile *outfp) { GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff, referencestartcutoff, referenceendcutoff, referencetotalcutoff; bool reverse_subject_pos = false; const unsigned char *gen_seq_orig, *ref_seq_orig; GthSeqCon *ref_seq_con; GtAlphabet *ref_alphabet; gt_assert(sa && input); /* only for cosmetic reasons */ genomicstartcutoff = gth_sa_genomiccutoff_start(sa); genomicendcutoff = gth_sa_genomiccutoff_end(sa); genomictotalcutoff = genomicstartcutoff + genomicendcutoff; referencestartcutoff = gth_sa_referencecutoff_start(sa); referenceendcutoff = gth_sa_referencecutoff_end(sa); referencetotalcutoff = referencestartcutoff + referenceendcutoff; /* make sure that the correct files are loaded */ gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false); ref_seq_con = gth_input_current_ref_seq_con(input); ref_alphabet = gth_input_current_ref_alphabet(input); /* If the reverse complement of the genomic DNA is considered, this opition is needed for correct output of the genomic sequence positions by the function showalignmentgeneric() */ if (!gth_sa_gen_strand_forward(sa)) reverse_subject_pos = true; /* get genomic sequence */ gen_seq_orig = gth_input_original_genomic_sequence(input, sa->gen_file_num, sa->gen_strand_forward) + gth_sa_gen_dp_start(sa); /* get reference sequence */ if (gth_sa_ref_strand_forward(sa)) { ref_seq_orig = gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa)); } else { ref_seq_orig = gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa)); } switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: gthshowalignmentdna(outfp,ALIGNMENTLINEWIDTH, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), gth_sa_indelcount(sa), gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, gth_sa_gen_dp_start(sa) + genomicstartcutoff - gth_sa_gen_offset(sa), referencestartcutoff, gth_sa_gen_total_length(sa), showintronmaxlen, ref_alphabet, reverse_subject_pos, wildcardimplosion); break; case PROTEIN_ALPHA: gthshowalignmentprotein(outfp, ALIGNMENTLINEWIDTH, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), gth_sa_indelcount(sa), gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, gth_sa_gen_dp_start(sa) + genomicstartcutoff - gth_sa_gen_offset(sa), referencestartcutoff, gth_sa_gen_total_length(sa), showintronmaxlen, ref_alphabet, translationtable, gth_input_score_matrix(input), gth_input_score_matrix_alpha(input), reverse_subject_pos, wildcardimplosion); break; default: gt_assert(0); } }