int gt_genomediff_pck_shu_simple(GtLogger *logger, const GtGenomediffArguments *arguments, GtError *err) { int had_err = 0; int retval; GtSeqIterator *queries = NULL; const GtUchar *symbolmap, *currentQuery; const GtAlphabet *alphabet; GtUchar c_sym = 0, g_sym = 0; uint64_t queryNo; char *description = NULL; unsigned long queryLength, subjectLength = 0, currentSuffix; double avgShuLength, currentShuLength = 0.0, /*gc_subject,*/ gc_query /*, gc*/; const FMindex *subjectindex = NULL; Genericindex *genericindexSubject; const GtEncseq *encseq = NULL; double *ln_n_fac; /* get the precalculation of ln(n!) for 0<n<max_ln_n_fac */ ln_n_fac = gt_get_ln_n_fac(arguments->max_ln_n_fac); gt_log_log("ln(max_ln_n_fac!) = %f\n", ln_n_fac[arguments->max_ln_n_fac]); genericindexSubject = genericindex_new(gt_str_get( arguments->indexname), arguments->with_esa, true, false, true, arguments->user_max_depth, logger, err); if (genericindexSubject == NULL) { had_err = 1; } else { encseq = genericindex_getencseq(genericindexSubject); } if (!had_err) { subjectLength = genericindex_get_totallength(genericindexSubject) - 1; /*subjectLength /= 2;*/ /*gt_log_log("subject length: %lu", subjectLength);*/ subjectindex = genericindex_get_packedindex(genericindexSubject); queries = gt_seqiterator_sequence_buffer_new( arguments->queryname, err); gt_assert(queries); alphabet = gt_encseq_alphabet(encseq); /* makes assumption that alphabet is dna, it has to calculate the gc! */ if (!gt_alphabet_is_dna(alphabet)) { fprintf(stderr, "error: Sequences need to be dna"); had_err = 1; } else { symbolmap = gt_alphabet_symbolmap(alphabet); gt_seqiterator_set_symbolmap(queries, symbolmap); c_sym = gt_alphabet_encode(alphabet, 'c'); g_sym = gt_alphabet_encode(alphabet, 'g'); } } for (queryNo = 0; !had_err; queryNo++) { retval = gt_seqiterator_next(queries, ¤tQuery, &queryLength, &description, err); if ( retval != 1) { if (retval < 0) { gt_free(description); } break; } gt_logger_log(logger, "found query of length: %lu", queryLength); avgShuLength = 0.0; gc_query = 0.0; for (currentSuffix = 0; currentSuffix < queryLength; currentSuffix++) { currentShuLength = (double) gt_pck_getShuStringLength( subjectindex, ¤tQuery[currentSuffix], queryLength - currentSuffix); avgShuLength += currentShuLength; if (currentQuery[currentSuffix] == c_sym || currentQuery[currentSuffix] == g_sym) { gc_query++; } } if (arguments->shulen_only) { printf("# Query %d sum of shulen:\n %.0f\n", (int) queryNo, avgShuLength); } else { avgShuLength /= (double) queryLength; gc_query /= (double) queryLength; gt_logger_log(logger, "Query %d has an average SHUstring length " "of\n# shulength: %f", (int) queryNo, avgShuLength); gt_logger_log(logger, "Query description: %s", description); gt_log_log("Query (i): %s", description); /* XXX Fehlerabfragen einbauen */ if ( !had_err ) { double div, kr; gt_logger_log(logger, "shulen:\n%f", avgShuLength); gt_log_log("shu: %f, gc: %f, len: %lu", avgShuLength, gc_query, subjectLength); div = gt_divergence(arguments->divergence_rel_err, arguments->divergence_abs_err, arguments->divergence_m, arguments->divergence_threshold, avgShuLength, subjectLength, gc_query, ln_n_fac, arguments->max_ln_n_fac); gt_logger_log(logger, "divergence:\n%f", div); kr = gt_calculateKr(div); printf("# Kr:\n%f\n", kr); } } } gt_free(ln_n_fac); gt_seqiterator_delete(queries); genericindex_delete(genericindexSubject); return had_err; }
int gt_findsubquerygmatchforward(const GtEncseq *encseq, const void *genericindex, unsigned long totallength, Greedygmatchforwardfunction gmatchforward, const GtAlphabet *alphabet, const GtStrArray *queryfilenames, Definedunsignedlong minlength, Definedunsignedlong maxlength, bool showsequence, bool showquerypos, bool showsubjectpos, GtError *err) { Substringinfo substringinfo; Rangespecinfo rangespecinfo; bool haserr = false; GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; uint64_t unitnum; gt_error_check(err); substringinfo.genericindex = genericindex; substringinfo.totallength = totallength; rangespecinfo.minlength = minlength; rangespecinfo.maxlength = maxlength; rangespecinfo.showsequence = showsequence; rangespecinfo.showquerypos = showquerypos; rangespecinfo.showsubjectpos = showsubjectpos; substringinfo.preprocessgmatchlength = showunitnum; substringinfo.processgmatchlength = showifinlengthrange; substringinfo.postprocessgmatchlength = NULL; substringinfo.alphabet = alphabet; substringinfo.processinfo = &rangespecinfo; substringinfo.gmatchforward = gmatchforward; substringinfo.encseq = encseq; seqit = gt_seqiterator_sequence_buffer_new(queryfilenames, err); if (!seqit) haserr = true; if (!haserr) { gt_seqiterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alphabet)); for (unitnum = 0; /* Nothing */; unitnum++) { retval = gt_seqiterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } gmatchposinsinglesequence(&substringinfo, unitnum, query, querylen, desc); } gt_seqiterator_delete(seqit); } return haserr ? -1 : 0; }
int gt_tyrsearch(const char *tyrindexname, const GtStrArray *queryfilenames, unsigned int showmode, unsigned int searchstrand, bool verbose, bool performtest, GtError *err) { Tyrindex *tyrindex; Tyrcountinfo *tyrcountinfo = NULL; Tyrbckinfo *tyrbckinfo = NULL; bool haserr = false; gt_error_check(err); tyrindex = gt_tyrindex_new(tyrindexname,err); if (tyrindex == NULL) { haserr = true; } else { if (verbose) { gt_tyrindex_show(tyrindex); } if (performtest) { gt_tyrindex_check(tyrindex); } } if (!haserr) { gt_assert(tyrindex != NULL); if ((showmode & SHOWCOUNTS) && !gt_tyrindex_isempty(tyrindex)) { tyrcountinfo = gt_tyrcountinfo_new(tyrindex,tyrindexname,err); if (tyrcountinfo == NULL) { haserr = true; } } } if (!haserr) { gt_assert(tyrindex != NULL); if (!gt_tyrindex_isempty(tyrindex)) { tyrbckinfo = gt_tyrbckinfo_new(tyrindexname, gt_tyrindex_alphasize(tyrindex), err); if (tyrbckinfo == NULL) { haserr = true; } } } if (!haserr) { const GtUchar *query; unsigned long querylen; char *desc = NULL; uint64_t unitnum; int retval; Tyrsearchinfo tyrsearchinfo; GtSeqIterator *seqit; gt_assert(tyrindex != NULL); gt_tyrsearchinfo_init(&tyrsearchinfo,tyrindex,showmode,searchstrand); seqit = gt_seqiterator_sequence_buffer_new(queryfilenames, err); if (!seqit) haserr = true; if (!haserr) { gt_seqiterator_set_symbolmap(seqit, gt_alphabet_symbolmap(tyrsearchinfo.dnaalpha)); for (unitnum = 0; /* Nothing */; unitnum++) { retval = gt_seqiterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } singleseqtyrsearch(tyrindex, tyrcountinfo, &tyrsearchinfo, tyrbckinfo, unitnum, query, querylen, desc); } gt_seqiterator_delete(seqit); } gt_tyrsearchinfo_delete(&tyrsearchinfo); } if (tyrbckinfo != NULL) { gt_tyrbckinfo_delete(&tyrbckinfo); } if (tyrcountinfo != NULL) { gt_tyrcountinfo_delete(&tyrcountinfo); } if (tyrindex != NULL) { gt_tyrindex_delete(&tyrindex); } return haserr ? -1 : 0; }
int runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err) { Genericindex *genericindex = NULL; bool haserr = false; Verboseinfo *verboseinfo; const Encodedsequence *encseq = NULL; verboseinfo = newverboseinfo(idxlocalioptions->verbose); if (idxlocalioptions->doonline) { encseq = mapencodedsequence (true, idxlocalioptions->indexname, true, false, false, true, verboseinfo, err); if (encseq == NULL) { haserr = true; } } else { genericindex = genericindex_new(idxlocalioptions->indexname, idxlocalioptions->withesa, idxlocalioptions->withesa || idxlocalioptions->docompare, false, true, 0, verboseinfo, err); if (genericindex == NULL) { haserr = true; } else { encseq = genericindex_getencseq(genericindex); } } if (!haserr) { GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; Limdfsresources *limdfsresources = NULL; const AbstractDfstransformer *dfst; SWdpresource *swdpresource = NULL; Showmatchinfo showmatchinfo; Processmatch processmatch; void *processmatchinfoonline, *processmatchinfooffline; Storematchinfo storeonline, storeoffline; if (idxlocalioptions->docompare) { processmatch = storematch; initstorematch(&storeonline,encseq); initstorematch(&storeoffline,encseq); processmatchinfoonline = &storeonline; processmatchinfooffline = &storeoffline; } else { processmatch = showmatch; showmatchinfo.encseq = encseq; showmatchinfo.characters = getencseqAlphabetcharacters(encseq); showmatchinfo.wildcardshow = getencseqAlphabetwildcardshow(encseq); showmatchinfo.showalignment = idxlocalioptions->showalignment; processmatchinfoonline = processmatchinfooffline = &showmatchinfo; } if (idxlocalioptions->doonline || idxlocalioptions->docompare) { swdpresource = newSWdpresource(idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapextend, idxlocalioptions->threshold, idxlocalioptions->showalignment, processmatch, processmatchinfoonline); } dfst = locali_AbstractDfstransformer(); if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_assert(genericindex != NULL); limdfsresources = newLimdfsresources(genericindex, true, 0, 0, /* maxpathlength */ true, /* keepexpandedonstack */ processmatch, processmatchinfooffline, NULL, /* processresult */ NULL, /* processresult info */ dfst); } seqit = gt_seqiterator_new(idxlocalioptions->queryfiles, err); if (!seqit) haserr = true; if (!haserr) { gt_seqiterator_set_symbolmap(seqit, getencseqAlphabetsymbolmap(encseq)); for (showmatchinfo.queryunit = 0; /* Nothing */; showmatchinfo.queryunit++) { retval = gt_seqiterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } printf("process sequence " Formatuint64_t " of length %lu\n", PRINTuint64_tcast(showmatchinfo.queryunit),querylen); if (idxlocalioptions->doonline || idxlocalioptions->docompare) { multiapplysmithwaterman(swdpresource,encseq,query,querylen); } if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { indexbasedlocali(limdfsresources, idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapstart, idxlocalioptions->gapextend, idxlocalioptions->threshold, query, querylen, dfst); } if (idxlocalioptions->docompare) { checkandresetstorematch(showmatchinfo.queryunit, &storeonline,&storeoffline); } gt_free(desc); } if (limdfsresources != NULL) { freeLimdfsresources(&limdfsresources,dfst); } if (swdpresource != NULL) { freeSWdpresource(swdpresource); swdpresource = NULL; } gt_seqiterator_delete(seqit); } if (idxlocalioptions->docompare) { freestorematch(&storeonline); freestorematch(&storeoffline); } } if (genericindex == NULL) { gt_assert(encseq != NULL); encodedsequence_free((Encodedsequence **) &encseq); } else { genericindex_delete(genericindex); } freeverboseinfo(&verboseinfo); return haserr ? -1 : 0; }