int gt_runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err) { Genericindex *genericindex = NULL; bool haserr = false; GtLogger *logger; const GtEncseq *encseq = NULL; logger = gt_logger_new(idxlocalioptions->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); if (idxlocalioptions->doonline) { GtEncseqLoader *el; el = gt_encseq_loader_new(); gt_encseq_loader_require_multiseq_support(el); gt_encseq_loader_drop_description_support(el); gt_encseq_loader_set_logger(el, logger); encseq = gt_encseq_loader_load(el, gt_str_get(idxlocalioptions->indexname), err); gt_encseq_loader_delete(el); if (encseq == NULL) { haserr = true; } } else { genericindex = genericindex_new(gt_str_get(idxlocalioptions->indexname), idxlocalioptions->withesa, idxlocalioptions->withesa || idxlocalioptions->docompare, false, true, 0, logger, err); if (genericindex == NULL) { haserr = true; } else { encseq = genericindex_getencseq(genericindex); } } if (!haserr) { GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; Limdfsresources *limdfsresources = NULL; const AbstractDfstransformer *dfst; SWdpresource *swdpresource = NULL; Showmatchinfo showmatchinfo; ProcessIdxMatch processmatch; GtAlphabet *a; void *processmatchinfoonline, *processmatchinfooffline; Storematchinfo storeonline, storeoffline; a = gt_encseq_alphabet(encseq); if (idxlocalioptions->docompare) { processmatch = storematch; gt_initstorematch(&storeonline,encseq); gt_initstorematch(&storeoffline,encseq); processmatchinfoonline = &storeonline; processmatchinfooffline = &storeoffline; } else { processmatch = showmatch; showmatchinfo.encseq = encseq; showmatchinfo.characters = gt_alphabet_characters(a); showmatchinfo.wildcardshow = gt_alphabet_wildcard_show(a); showmatchinfo.showalignment = idxlocalioptions->showalignment; processmatchinfoonline = processmatchinfooffline = &showmatchinfo; } if (idxlocalioptions->doonline || idxlocalioptions->docompare) { swdpresource = gt_newSWdpresource(idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapextend, idxlocalioptions->threshold, idxlocalioptions->showalignment, processmatch, processmatchinfoonline); } dfst = gt_locali_AbstractDfstransformer(); if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_assert(genericindex != NULL); limdfsresources = gt_newLimdfsresources(genericindex, true, 0, 0, /* maxpathlength */ true, /* keepexpandedonstack */ processmatch, processmatchinfooffline, NULL, /* processresult */ NULL, /* processresult info */ dfst); } seqit = gt_seq_iterator_sequence_buffer_new(idxlocalioptions->queryfiles, err); if (!seqit) haserr = true; if (!haserr) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(a)); for (showmatchinfo.queryunit = 0; /* Nothing */; showmatchinfo.queryunit++) { retval = gt_seq_iterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } printf("process sequence " Formatuint64_t " of length %lu\n", PRINTuint64_tcast(showmatchinfo.queryunit),querylen); if (idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_multiapplysmithwaterman(swdpresource,encseq,query,querylen); } if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_indexbasedlocali(limdfsresources, idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapstart, idxlocalioptions->gapextend, idxlocalioptions->threshold, query, querylen, dfst); } if (idxlocalioptions->docompare) { gt_checkandresetstorematch(showmatchinfo.queryunit, &storeonline,&storeoffline); } } if (limdfsresources != NULL) { gt_freeLimdfsresources(&limdfsresources,dfst); } if (swdpresource != NULL) { gt_freeSWdpresource(swdpresource); swdpresource = NULL; } gt_seq_iterator_delete(seqit); } if (idxlocalioptions->docompare) { gt_freestorematch(&storeonline); gt_freestorematch(&storeoffline); } } if (genericindex == NULL) { gt_encseq_delete((GtEncseq *) encseq); encseq = NULL; } else { genericindex_delete(genericindex); } gt_logger_delete(logger); logger = NULL; return haserr ? -1 : 0; }
int gt_genomediff_pck_shu_simple(GtLogger *logger, const GtGenomediffArguments *arguments, GtError *err) { int had_err = 0; int retval; GtSeqIterator *queries = NULL; const GtUchar *symbolmap, *currentQuery; const GtAlphabet *alphabet; GtUchar c_sym = 0, g_sym = 0; uint64_t queryNo; char *description = NULL; unsigned long queryLength, subjectLength = 0, currentSuffix; double avgShuLength, currentShuLength = 0.0, /*gc_subject,*/ gc_query /*, gc*/; const FMindex *subjectindex = NULL; Genericindex *genericindexSubject; const GtEncseq *encseq = NULL; double *ln_n_fac; /* get the precalculation of ln(n!) for 0<n<max_ln_n_fac */ ln_n_fac = gt_get_ln_n_fac(arguments->max_ln_n_fac); gt_log_log("ln(max_ln_n_fac!) = %f\n", ln_n_fac[arguments->max_ln_n_fac]); genericindexSubject = genericindex_new(gt_str_get( arguments->indexname), arguments->with_esa, true, false, true, arguments->user_max_depth, logger, err); if (genericindexSubject == NULL) { had_err = 1; } else { encseq = genericindex_getencseq(genericindexSubject); } if (!had_err) { subjectLength = genericindex_get_totallength(genericindexSubject) - 1; /*subjectLength /= 2;*/ /*gt_log_log("subject length: %lu", subjectLength);*/ subjectindex = genericindex_get_packedindex(genericindexSubject); queries = gt_seqiterator_sequence_buffer_new( arguments->queryname, err); gt_assert(queries); alphabet = gt_encseq_alphabet(encseq); /* makes assumption that alphabet is dna, it has to calculate the gc! */ if (!gt_alphabet_is_dna(alphabet)) { fprintf(stderr, "error: Sequences need to be dna"); had_err = 1; } else { symbolmap = gt_alphabet_symbolmap(alphabet); gt_seqiterator_set_symbolmap(queries, symbolmap); c_sym = gt_alphabet_encode(alphabet, 'c'); g_sym = gt_alphabet_encode(alphabet, 'g'); } } for (queryNo = 0; !had_err; queryNo++) { retval = gt_seqiterator_next(queries, ¤tQuery, &queryLength, &description, err); if ( retval != 1) { if (retval < 0) { gt_free(description); } break; } gt_logger_log(logger, "found query of length: %lu", queryLength); avgShuLength = 0.0; gc_query = 0.0; for (currentSuffix = 0; currentSuffix < queryLength; currentSuffix++) { currentShuLength = (double) gt_pck_getShuStringLength( subjectindex, ¤tQuery[currentSuffix], queryLength - currentSuffix); avgShuLength += currentShuLength; if (currentQuery[currentSuffix] == c_sym || currentQuery[currentSuffix] == g_sym) { gc_query++; } } if (arguments->shulen_only) { printf("# Query %d sum of shulen:\n %.0f\n", (int) queryNo, avgShuLength); } else { avgShuLength /= (double) queryLength; gc_query /= (double) queryLength; gt_logger_log(logger, "Query %d has an average SHUstring length " "of\n# shulength: %f", (int) queryNo, avgShuLength); gt_logger_log(logger, "Query description: %s", description); gt_log_log("Query (i): %s", description); /* XXX Fehlerabfragen einbauen */ if ( !had_err ) { double div, kr; gt_logger_log(logger, "shulen:\n%f", avgShuLength); gt_log_log("shu: %f, gc: %f, len: %lu", avgShuLength, gc_query, subjectLength); div = gt_divergence(arguments->divergence_rel_err, arguments->divergence_abs_err, arguments->divergence_m, arguments->divergence_threshold, avgShuLength, subjectLength, gc_query, ln_n_fac, arguments->max_ln_n_fac); gt_logger_log(logger, "divergence:\n%f", div); kr = gt_calculateKr(div); printf("# Kr:\n%f\n", kr); } } } gt_free(ln_n_fac); gt_seqiterator_delete(queries); genericindex_delete(genericindexSubject); return had_err; }
int runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err) { Genericindex *genericindex = NULL; bool haserr = false; Verboseinfo *verboseinfo; const Encodedsequence *encseq = NULL; verboseinfo = newverboseinfo(idxlocalioptions->verbose); if (idxlocalioptions->doonline) { encseq = mapencodedsequence (true, idxlocalioptions->indexname, true, false, false, true, verboseinfo, err); if (encseq == NULL) { haserr = true; } } else { genericindex = genericindex_new(idxlocalioptions->indexname, idxlocalioptions->withesa, idxlocalioptions->withesa || idxlocalioptions->docompare, false, true, 0, verboseinfo, err); if (genericindex == NULL) { haserr = true; } else { encseq = genericindex_getencseq(genericindex); } } if (!haserr) { GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; Limdfsresources *limdfsresources = NULL; const AbstractDfstransformer *dfst; SWdpresource *swdpresource = NULL; Showmatchinfo showmatchinfo; Processmatch processmatch; void *processmatchinfoonline, *processmatchinfooffline; Storematchinfo storeonline, storeoffline; if (idxlocalioptions->docompare) { processmatch = storematch; initstorematch(&storeonline,encseq); initstorematch(&storeoffline,encseq); processmatchinfoonline = &storeonline; processmatchinfooffline = &storeoffline; } else { processmatch = showmatch; showmatchinfo.encseq = encseq; showmatchinfo.characters = getencseqAlphabetcharacters(encseq); showmatchinfo.wildcardshow = getencseqAlphabetwildcardshow(encseq); showmatchinfo.showalignment = idxlocalioptions->showalignment; processmatchinfoonline = processmatchinfooffline = &showmatchinfo; } if (idxlocalioptions->doonline || idxlocalioptions->docompare) { swdpresource = newSWdpresource(idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapextend, idxlocalioptions->threshold, idxlocalioptions->showalignment, processmatch, processmatchinfoonline); } dfst = locali_AbstractDfstransformer(); if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_assert(genericindex != NULL); limdfsresources = newLimdfsresources(genericindex, true, 0, 0, /* maxpathlength */ true, /* keepexpandedonstack */ processmatch, processmatchinfooffline, NULL, /* processresult */ NULL, /* processresult info */ dfst); } seqit = gt_seqiterator_new(idxlocalioptions->queryfiles, err); if (!seqit) haserr = true; if (!haserr) { gt_seqiterator_set_symbolmap(seqit, getencseqAlphabetsymbolmap(encseq)); for (showmatchinfo.queryunit = 0; /* Nothing */; showmatchinfo.queryunit++) { retval = gt_seqiterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } printf("process sequence " Formatuint64_t " of length %lu\n", PRINTuint64_tcast(showmatchinfo.queryunit),querylen); if (idxlocalioptions->doonline || idxlocalioptions->docompare) { multiapplysmithwaterman(swdpresource,encseq,query,querylen); } if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { indexbasedlocali(limdfsresources, idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapstart, idxlocalioptions->gapextend, idxlocalioptions->threshold, query, querylen, dfst); } if (idxlocalioptions->docompare) { checkandresetstorematch(showmatchinfo.queryunit, &storeonline,&storeoffline); } gt_free(desc); } if (limdfsresources != NULL) { freeLimdfsresources(&limdfsresources,dfst); } if (swdpresource != NULL) { freeSWdpresource(swdpresource); swdpresource = NULL; } gt_seqiterator_delete(seqit); } if (idxlocalioptions->docompare) { freestorematch(&storeonline); freestorematch(&storeoffline); } } if (genericindex == NULL) { gt_assert(encseq != NULL); encodedsequence_free((Encodedsequence **) &encseq); } else { genericindex_delete(genericindex); } freeverboseinfo(&verboseinfo); return haserr ? -1 : 0; }