GtQuerymatchoutoptions *gt_querymatchoutoptions_new( GtUword alignmentwidth, GtUword errorpercentage, GtUword maxalignedlendifference, GtUword history, GtUword perc_mat_history, GtExtendCharAccess extend_char_access, GtUword sensitivity) { GtQuerymatchoutoptions *querymatchoutoptions = gt_malloc(sizeof *querymatchoutoptions); querymatchoutoptions->alignmentwidth = alignmentwidth; if (alignmentwidth > 0 && errorpercentage > 0) { querymatchoutoptions->front_trace = front_trace_new(); querymatchoutoptions->ggemi = gt_greedy_extend_matchinfo_new(errorpercentage, maxalignedlendifference, history, /* default value */ perc_mat_history, 0,/* userdefinedleastlength not used */ extend_char_access, sensitivity); } else { querymatchoutoptions->front_trace = NULL; querymatchoutoptions->ggemi = NULL; } querymatchoutoptions->totallength = GT_UWORD_MAX; return querymatchoutoptions; }
static int gt_seed_extend_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtSeedExtendArguments *arguments = tool_arguments; GtEncseqLoader *encseq_loader = NULL; GtEncseq *aencseq = NULL, *bencseq = NULL; GtGreedyextendmatchinfo *grextinfo = NULL; GtXdropmatchinfo *xdropinfo = NULL; GtQuerymatchoutoptions *querymatchoutopt = NULL; GtTimer *seedextendtimer = NULL; GtExtendCharAccess cam = GT_EXTEND_CHAR_ACCESS_ANY; GtUword errorpercentage = 0UL; int had_err = 0; gt_error_check(err); gt_assert(arguments != NULL); gt_assert(arguments->se_minidentity >= GT_EXTEND_MIN_IDENTITY_PERCENTAGE && arguments->se_minidentity <= 100UL); /* Calculate error percentage from minidentity */ errorpercentage = 100UL - arguments->se_minidentity; /* Measure whole running time */ if (arguments->benchmark || arguments->verbose) { gt_showtime_enable(); } if (gt_showtime_enabled()) { seedextendtimer = gt_timer_new(); gt_timer_start(seedextendtimer); } /* Load encseq A */ encseq_loader = gt_encseq_loader_new(); gt_encseq_loader_enable_autosupport(encseq_loader); aencseq = gt_encseq_loader_load(encseq_loader, gt_str_get(arguments->dbs_indexname), err); if (aencseq == NULL) had_err = -1; /* If there is a 2nd read set: Load encseq B */ if (!had_err) { if (strcmp(gt_str_get(arguments->dbs_queryname), "") != 0) { bencseq = gt_encseq_loader_load(encseq_loader, gt_str_get(arguments->dbs_queryname), err); } else { bencseq = gt_encseq_ref(aencseq); } if (bencseq == NULL) { had_err = -1; gt_encseq_delete(aencseq); } } gt_encseq_loader_delete(encseq_loader); /* set character access method */ if (!had_err && (gt_option_is_set(arguments->se_option_greedy) || gt_option_is_set(arguments->se_option_xdrop) || arguments->se_alignmentwidth > 0)) { cam = gt_greedy_extend_char_access(gt_str_get (arguments->se_char_access_mode), err); if ((int) cam == -1) { had_err = -1; gt_encseq_delete(aencseq); gt_encseq_delete(bencseq); } } /* Use bias dependent parameters, adapted from E. Myers' DALIGNER */ if (!had_err && arguments->bias_parameters) { const GtAlphabet *alpha = gt_encseq_alphabet(aencseq); const double bias_factor[10] = {.690, .690, .690, .690, .780, .850, .900, .933, .966, 1.000}; if (gt_alphabet_is_dna(alpha)) { GtUword at, cg; at = gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'a')); at += gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 't')); cg = gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'c')); cg += gt_encseq_charcount(aencseq, gt_alphabet_encode(alpha, 'g')); if (at + cg > 0) { const double ratio = (double)MIN(at, cg) / (at + cg); int bias_index = (int)MAX(0.0, (ratio + 0.025) * 20.0 - 1.0); gt_assert(bias_index < 10); arguments->se_maxalilendiff = 30; arguments->se_perc_match_hist = (GtUword)(100.0 - errorpercentage * bias_factor[bias_index]); if (arguments->verbose) { printf("# Base ratio = %4.2lf -> percmathistory = "GT_WU"\n", ratio, arguments->se_perc_match_hist); } } else { had_err = -1; } } else { had_err = -1; } if (had_err) { gt_error_set(err, "option \"-bias-parameters\" can only be applied to " "the DNA alphabet"); gt_encseq_delete(aencseq); gt_encseq_delete(bencseq); } } /* Prepare options for greedy extension */ if (!had_err && gt_option_is_set(arguments->se_option_greedy)) { grextinfo = gt_greedy_extend_matchinfo_new(errorpercentage, arguments->se_maxalilendiff, arguments->se_historysize, arguments->se_perc_match_hist, arguments->se_alignlength, cam, arguments->se_extendgreedy); if (arguments->benchmark) { gt_greedy_extend_matchinfo_silent_set(grextinfo); } } /* Prepare options for xdrop extension */ if (!had_err && gt_option_is_set(arguments->se_option_xdrop)) { xdropinfo = gt_xdrop_matchinfo_new(arguments->se_alignlength, errorpercentage, arguments->se_xdropbelowscore, arguments->se_extendxdrop); if (arguments->benchmark) { gt_xdrop_matchinfo_silent_set(xdropinfo); } } /* Prepare output options */ if (!had_err && (arguments->se_alignmentwidth > 0 || gt_option_is_set(arguments->se_option_xdrop))) { querymatchoutopt = gt_querymatchoutoptions_new(arguments->se_alignmentwidth); if (gt_option_is_set(arguments->se_option_xdrop) || gt_option_is_set(arguments->se_option_greedy)) { const GtUword sensitivity = gt_option_is_set(arguments->se_option_greedy) ? arguments->se_extendgreedy : 100; gt_querymatchoutoptions_extend(querymatchoutopt, errorpercentage, arguments->se_maxalilendiff, arguments->se_historysize, arguments->se_perc_match_hist, cam, sensitivity); } } /* Start algorithm */ if (!had_err) { GtDiagbandseed dbsarguments; dbsarguments.errorpercentage = errorpercentage; dbsarguments.userdefinedleastlength = arguments->se_alignlength; dbsarguments.seedlength = arguments->dbs_seedlength; dbsarguments.logdiagbandwidth = arguments->dbs_logdiagbandwidth; dbsarguments.mincoverage = arguments->dbs_mincoverage; dbsarguments.maxfreq = arguments->dbs_maxfreq; dbsarguments.memlimit = arguments->dbs_memlimit; dbsarguments.mirror = arguments->mirror; dbsarguments.overlappingseeds = arguments->overlappingseeds; dbsarguments.verify = arguments->dbs_verify; dbsarguments.verbose = arguments->verbose; dbsarguments.debug_kmer = arguments->dbs_debug_kmer; dbsarguments.debug_seedpair = arguments->dbs_debug_seedpair; dbsarguments.seed_display = arguments->seed_display; dbsarguments.extendgreedyinfo = grextinfo; dbsarguments.extendxdropinfo = xdropinfo; dbsarguments.querymatchoutopt = querymatchoutopt; had_err = gt_diagbandseed_run(aencseq, bencseq, &dbsarguments, err); /* clean up */ gt_encseq_delete(aencseq); gt_encseq_delete(bencseq); if (gt_option_is_set(arguments->se_option_greedy)) { gt_greedy_extend_matchinfo_delete(grextinfo); } if (gt_option_is_set(arguments->se_option_xdrop)) { gt_xdrop_matchinfo_delete(xdropinfo); } if (arguments->se_alignmentwidth > 0 || gt_option_is_set(arguments->se_option_xdrop)) { gt_querymatchoutoptions_delete(querymatchoutopt); } } if (gt_showtime_enabled()) { if (!had_err) { char *keystring = gt_seed_extend_params_keystring(gt_option_is_set(arguments-> se_option_greedy), gt_option_is_set(arguments-> se_option_xdrop), arguments->dbs_seedlength, arguments->se_alignlength, arguments->se_minidentity, arguments->se_maxalilendiff, arguments->se_perc_match_hist, arguments->se_extendgreedy, arguments->se_extendxdrop, arguments->se_xdropbelowscore); printf("# TIME seedextend-%s", keystring); gt_free(keystring); gt_timer_show_formatted(seedextendtimer, " overall " GT_WD ".%06ld\n", stdout); } gt_timer_delete(seedextendtimer); } return had_err; }
static int gt_show_seedext_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { int had_err = 0; GtUword alignmentwidth; GtShowSeedextArguments *arguments = tool_arguments; GtSeedextendMatchIterator *semi; gt_error_check(err); gt_assert(arguments != NULL); /* Parse option string in first line of file specified by filename. */ alignmentwidth = arguments->show_alignment ? 70 : 0; semi = gt_seedextend_match_iterator_new(arguments->matchfilename,err); if (semi == NULL) { had_err = -1; } /* Parse seed extensions. */ if (!had_err) { const GtEncseq *aencseq = gt_seedextend_match_iterator_aencseq(semi), *bencseq = gt_seedextend_match_iterator_bencseq(semi); GtAlignment *alignment = gt_alignment_new(); Polishing_info *pol_info = NULL; GtSequencepairbuffer seqpairbuf = {NULL,NULL,0,0}; /* the following are used if seed_extend is set */ GtGreedyextendmatchinfo *greedyextendmatchinfo = NULL; GtProcessinfo_and_querymatchspaceptr processinfo_and_querymatchspaceptr; const GtUchar *characters = gt_encseq_alphabetcharacters(aencseq); const GtUchar wildcardshow = gt_encseq_alphabetwildcardshow(aencseq); GtUchar *alignment_show_buffer = arguments->show_alignment ? gt_alignment_buffer_new(alignmentwidth) : NULL; GtLinspaceManagement *linspace_spacemanager = gt_linspaceManagement_new(); GtScoreHandler *linspace_scorehandler = gt_scorehandler_new(0,1,0,1);; if (!arguments->relax_polish) { double matchscore_bias = GT_DEFAULT_MATCHSCORE_BIAS; if (gt_seedextend_match_iterator_bias_parameters(semi)) { matchscore_bias = gt_greedy_dna_sequence_bias_get(aencseq); } pol_info = polishing_info_new_with_bias( gt_seedextend_match_iterator_errorpercentage(semi), matchscore_bias, gt_seedextend_match_iterator_history_size(semi)); } if (arguments->seed_display) { gt_seedextend_match_iterator_seed_display_set(semi); } if (arguments->show_alignment || arguments->showeoplist) { gt_seedextend_match_iterator_querymatchoutoptions_set(semi, true, arguments->showeoplist, alignmentwidth, !arguments->relax_polish, arguments->seed_display); } if (arguments->seed_extend) { greedyextendmatchinfo = gt_greedy_extend_matchinfo_new(70, GT_MAX_ALI_LEN_DIFF, gt_seedextend_match_iterator_history_size(semi), GT_MIN_PERC_MAT_HISTORY, 0, /* userdefinedleastlength */ GT_EXTEND_CHAR_ACCESS_ANY, 100, pol_info); } if (pol_info != NULL) { gt_alignment_polished_ends(alignment,pol_info,false); } processinfo_and_querymatchspaceptr.processinfo = greedyextendmatchinfo; if (arguments->sortmatches) { (void) gt_seedextend_match_iterator_all_sorted(semi,true); } while (true) { GtQuerymatch *querymatchptr = gt_seedextend_match_iterator_next(semi); if (querymatchptr == NULL) { break; } if (gt_seedextend_match_iterator_has_seedline(semi)) { if (arguments->seed_extend) { if (aencseq == bencseq) { const GtUword seedlen = gt_seedextend_match_iterator_seedlen(semi), seedpos1 = gt_seedextend_match_iterator_seedpos1(semi), seedpos2 = gt_seedextend_match_iterator_seedpos2(semi); processinfo_and_querymatchspaceptr.querymatchspaceptr = querymatchptr; had_err = gt_greedy_extend_selfmatch_with_output( &processinfo_and_querymatchspaceptr, aencseq, seedlen, seedpos1, seedpos2, err); if (had_err) { break; } } else { gt_assert(false); } } else { const GtUword query_totallength = gt_encseq_seqlength(bencseq, gt_querymatch_queryseqnum(querymatchptr)); gt_show_seed_extend_encseq(querymatchptr, aencseq, bencseq, query_totallength); } } else { gt_show_seed_extend_plain(&seqpairbuf, linspace_spacemanager, linspace_scorehandler, alignment, alignment_show_buffer, alignmentwidth, arguments->showeoplist, characters, wildcardshow, aencseq, bencseq, querymatchptr); } } polishing_info_delete(pol_info); gt_greedy_extend_matchinfo_delete(greedyextendmatchinfo); gt_free(alignment_show_buffer); gt_scorehandler_delete(linspace_scorehandler); gt_linspaceManagement_delete(linspace_spacemanager); gt_free(seqpairbuf.a_sequence); gt_free(seqpairbuf.b_sequence); gt_alignment_delete(alignment); } gt_seedextend_match_iterator_delete(semi); return had_err; }