GtQuerysubstringmatchiterator *gt_querysubstringmatchiterator_new( const GtEncseq *dbencseq, GtUword totallength, const ESASuffixptr *suftabpart, GtReadmode db_readmode, GtUword numberofsuffixes, const GtStrArray *query_files, const GtEncseq *query_encseq, GtReadmode query_readmode, unsigned int userdefinedleastlength, GtError *err) { GtQuerysubstringmatchiterator *qsmi = gt_malloc(sizeof *qsmi); qsmi->dbencseq = dbencseq; qsmi->suftabpart = suftabpart; qsmi->db_readmode = db_readmode; qsmi->numberofsuffixes = numberofsuffixes; qsmi->totallength = totallength; qsmi->userdefinedleastlength = (GtUword) userdefinedleastlength; qsmi->queryunitnum = 0; qsmi->desc = NULL; qsmi->query_for_seqit = NULL; qsmi->query_seqlen = 0; qsmi->queryrep.sequence = NULL; qsmi->queryrep.encseq = query_encseq; qsmi->queryrep.readmode = query_readmode; qsmi->queryrep.startpos = 0; qsmi->dbstart = 0; qsmi->matchlength = 0; qsmi->querysubstring.queryrep = &qsmi->queryrep; qsmi->mmsi = gt_mmsearchiterator_new_empty(); qsmi->mmsi_defined = false; if (query_files == NULL || gt_str_array_size(query_files) == 0) { gt_assert(query_encseq != NULL); qsmi->seqit = NULL; qsmi->query_encseq_numofsequences = (uint64_t) gt_encseq_num_of_sequences(query_encseq); } else { gt_assert(query_encseq == NULL); qsmi->seqit = gt_seq_iterator_sequence_buffer_new(query_files, err); if (qsmi->seqit == NULL) { gt_querysubstringmatchiterator_delete(qsmi); return NULL; } gt_seq_iterator_set_symbolmap(qsmi->seqit, gt_alphabet_symbolmap(gt_encseq_alphabet(dbencseq))); } return qsmi; }
int gt_esa2shulengthqueryfiles(unsigned long *totalgmatchlength, const Suffixarray *suffixarray, const GtStrArray *queryfilenames, GtError *err) { bool haserr = false; GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; GtAlphabet *alphabet; gt_error_check(err); alphabet = gt_encseq_alphabet(suffixarray->encseq); gt_assert(gt_str_array_size(queryfilenames) == 1UL); seqit = gt_seq_iterator_sequence_buffer_new(queryfilenames, err); if (!seqit) { haserr = true; } if (!haserr) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alphabet)); for (; /* Nothing */; ) { retval = gt_seq_iterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } *totalgmatchlength += gt_esa2shulengthquery(suffixarray,query,querylen); } gt_seq_iterator_delete(seqit); } return haserr ? -1 : 0; }
int gt_verifymappedstr(const GtEncseq *encseq, unsigned int prefixlength, GtError *err) { unsigned int numofchars; GtArrayGtCodetype codeliststream; bool haserr = false; gt_error_check(err); numofchars = gt_alphabet_num_of_chars(gt_encseq_alphabet(encseq)); GT_INITARRAY(&codeliststream,GtCodetype); if (getfastastreamkmers(gt_encseq_filenames(encseq), numofchars, prefixlength, gt_alphabet_symbolmap( gt_encseq_alphabet(encseq)), false, &codeliststream, err) != 0) { haserr = true; } if (!haserr) { if (verifycodelists(encseq, prefixlength, numofchars, &codeliststream, err) != 0) { haserr = true; } } GT_FREEARRAY(&codeliststream,GtCodetype); return haserr ? -1 : 0; }
static int hcr_write_seqs(FILE *fp, GtHcrEncoder *hcr_enc, GtError *err) { int had_err = 0, seqit_err; GtUword bits_to_write = 0, len, read_counter = 0, page_counter = 0, bits_left_in_page, cur_read = 0; GtWord filepos; GtSeqIterator *seqit; const GtUchar *seq, *qual; char *desc; GtBitOutStream *bitstream; gt_error_check(err); gt_assert(hcr_enc->seq_encoder->sampling); gt_safe_assign(bits_left_in_page, (hcr_enc->pagesize * 8)); gt_xfseek(fp, hcr_enc->seq_encoder->start_of_encoding, SEEK_SET); bitstream = gt_bitoutstream_new(fp); seqit = gt_seq_iterator_fastq_new(hcr_enc->files, err); if (!seqit) { gt_assert(gt_error_is_set(err)); had_err = -1; } if (!had_err) { gt_seq_iterator_set_quality_buffer(seqit, &qual); gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(hcr_enc->seq_encoder->alpha)); hcr_enc->seq_encoder->total_num_of_symbols = 0; while (!had_err && (seqit_err = gt_seq_iterator_next(seqit, &seq, &len, &desc, err)) == 1) { /* count the bits */ bits_to_write = hcr_write_seq(hcr_enc->seq_encoder, seq, qual, len, bitstream, true); /* check if a new sample has to be added */ if (gt_sampling_is_next_element_sample(hcr_enc->seq_encoder->sampling, page_counter, read_counter, bits_to_write, bits_left_in_page)) { gt_bitoutstream_flush_advance(bitstream); filepos = gt_bitoutstream_pos(bitstream); if (filepos < 0) { had_err = -1; gt_error_set(err, "error by ftell: %s", strerror(errno)); } else { gt_sampling_add_sample(hcr_enc->seq_encoder->sampling, (size_t) filepos, cur_read); read_counter = 0; page_counter = 0; gt_safe_assign(bits_left_in_page, (hcr_enc->pagesize * 8)); } } if (!had_err) { /* do the writing */ bits_to_write = hcr_write_seq(hcr_enc->seq_encoder, seq, qual, len, bitstream, false); /* update counter for sampling */ while (bits_left_in_page < bits_to_write) { page_counter++; bits_to_write -= bits_left_in_page; gt_safe_assign(bits_left_in_page, (hcr_enc->pagesize * 8)); } bits_left_in_page -= bits_to_write; /* always set first page as written */ if (page_counter == 0) page_counter++; read_counter++; hcr_enc->seq_encoder->total_num_of_symbols += len; cur_read++; } } gt_assert(hcr_enc->num_of_reads == cur_read); if (!had_err && seqit_err) { had_err = seqit_err; gt_assert(gt_error_is_set(err)); } } if (!had_err) { gt_bitoutstream_flush(bitstream); filepos = gt_bitoutstream_pos(bitstream); if (filepos < 0) { had_err = -1; gt_error_set(err, "error by ftell: %s", strerror(errno)); } else { hcr_enc->seq_encoder->startofsamplingtab = filepos; gt_log_log("start of samplingtab: "GT_WU"", hcr_enc->seq_encoder->startofsamplingtab); if (hcr_enc->seq_encoder->sampling != NULL) gt_sampling_write(hcr_enc->seq_encoder->sampling, fp); } } gt_bitoutstream_delete(bitstream); gt_seq_iterator_delete(seqit); return had_err; }
GtHcrEncoder *gt_hcr_encoder_new(GtStrArray *files, GtAlphabet *alpha, bool descs, GtQualRange qrange, GtTimer *timer, GtError *err) { GtBaseQualDistr *bqd; GtHcrEncoder *hcr_enc; GtSeqIterator *seqit; GtStrArray *file; int had_err = 0, status; GtUword len1, len2, i, num_of_reads = 0; const GtUchar *seq, *qual; char *desc; gt_error_check(err); gt_assert(alpha && files); if (timer != NULL) gt_timer_show_progress(timer, "get <base,qual> distr", stdout); if (qrange.start != GT_UNDEF_UINT) if (qrange.start == qrange.end) { gt_error_set(err, "qrange.start must unequal qrange.end"); return NULL; } hcr_enc = gt_malloc(sizeof (GtHcrEncoder)); hcr_enc->files = files; hcr_enc->num_of_files = gt_str_array_size(files); hcr_enc->num_of_reads = 0; hcr_enc->page_sampling = false; hcr_enc->regular_sampling = false; hcr_enc->sampling_rate = 0; hcr_enc->pagesize = gt_pagesize(); if (descs) { hcr_enc->encdesc_encoder = gt_encdesc_encoder_new(); if (timer != NULL) gt_encdesc_encoder_set_timer(hcr_enc->encdesc_encoder, timer); } else hcr_enc->encdesc_encoder = NULL; hcr_enc->seq_encoder = gt_malloc(sizeof (GtHcrSeqEncoder)); hcr_enc->seq_encoder->alpha = alpha; hcr_enc->seq_encoder->sampling = NULL; hcr_enc->seq_encoder->fileinfos = gt_calloc((size_t) hcr_enc->num_of_files, sizeof (*(hcr_enc->seq_encoder->fileinfos))); hcr_enc->seq_encoder->qrange = qrange; bqd = hcr_base_qual_distr_new(alpha, qrange); /* check if reads in the same file are of same length and get <base, quality> pair distribution */ for (i = 0; i < hcr_enc->num_of_files; i++) { file = gt_str_array_new(); gt_str_array_add(file, gt_str_array_get_str(files, i)); seqit = gt_seq_iterator_fastq_new(file, err); if (!seqit) { gt_error_set(err, "cannot initialize GtSeqIteratorFastQ object"); had_err = -1; } if (!had_err) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alpha)); gt_seq_iterator_set_quality_buffer(seqit, &qual); status = gt_seq_iterator_next(seqit, &seq, &len1, &desc, err); if (status == 1) { num_of_reads = 1UL; while (!had_err) { status = gt_seq_iterator_next(seqit, &seq, &len2, &desc, err); if (status == -1) had_err = -1; if (status != 1) break; if (len2 != len1) { gt_error_set(err, "reads have to be of equal length"); had_err = -1; break; } if (hcr_base_qual_distr_add(bqd, qual, seq, len1) != 0) had_err = -1; len1 = len2; num_of_reads++; } } else if (status == -1) had_err = -1; if (!had_err) { if (i == 0) hcr_enc->seq_encoder->fileinfos[i].readnum = num_of_reads; else hcr_enc->seq_encoder->fileinfos[i].readnum = hcr_enc->seq_encoder->fileinfos[i - 1].readnum + num_of_reads; hcr_enc->seq_encoder->fileinfos[i].readlength = len1; } } hcr_enc->num_of_reads += num_of_reads; gt_str_array_delete(file); gt_seq_iterator_delete(seqit); } if (!had_err) hcr_base_qual_distr_trim(bqd); if (!had_err) { if (timer != NULL) gt_timer_show_progress(timer, "build huffman tree for sequences and" " qualities", stdout); hcr_enc->seq_encoder->huffman = gt_huffman_new(bqd, hcr_base_qual_distr_func, (GtUword) bqd->ncols * bqd->nrows); } if (!had_err) { hcr_enc->seq_encoder->qual_offset = bqd->qual_offset; hcr_base_qual_distr_delete(bqd); return hcr_enc; } return NULL; }
int gt_genomediff_pck_shu_simple(GtLogger *logger, const GtGenomediffArguments *arguments, GtError *err) { int had_err = 0; int retval; GtSeqIterator *queries = NULL; const GtUchar *symbolmap, *currentQuery; const GtAlphabet *alphabet; GtUchar c_sym = 0, g_sym = 0; uint64_t queryNo; char *description = NULL; unsigned long queryLength, subjectLength = 0, currentSuffix; double avgShuLength, currentShuLength = 0.0, /*gc_subject,*/ gc_query /*, gc*/; const FMindex *subjectindex = NULL; Genericindex *genericindexSubject; const GtEncseq *encseq = NULL; double *ln_n_fac; /* get the precalculation of ln(n!) for 0<n<max_ln_n_fac */ ln_n_fac = gt_get_ln_n_fac(arguments->max_ln_n_fac); gt_log_log("ln(max_ln_n_fac!) = %f\n", ln_n_fac[arguments->max_ln_n_fac]); genericindexSubject = genericindex_new(gt_str_get( arguments->indexname), arguments->with_esa, true, false, true, arguments->user_max_depth, logger, err); if (genericindexSubject == NULL) { had_err = 1; } else { encseq = genericindex_getencseq(genericindexSubject); } if (!had_err) { subjectLength = genericindex_get_totallength(genericindexSubject) - 1; /*subjectLength /= 2;*/ /*gt_log_log("subject length: %lu", subjectLength);*/ subjectindex = genericindex_get_packedindex(genericindexSubject); queries = gt_seqiterator_sequence_buffer_new( arguments->queryname, err); gt_assert(queries); alphabet = gt_encseq_alphabet(encseq); /* makes assumption that alphabet is dna, it has to calculate the gc! */ if (!gt_alphabet_is_dna(alphabet)) { fprintf(stderr, "error: Sequences need to be dna"); had_err = 1; } else { symbolmap = gt_alphabet_symbolmap(alphabet); gt_seqiterator_set_symbolmap(queries, symbolmap); c_sym = gt_alphabet_encode(alphabet, 'c'); g_sym = gt_alphabet_encode(alphabet, 'g'); } } for (queryNo = 0; !had_err; queryNo++) { retval = gt_seqiterator_next(queries, ¤tQuery, &queryLength, &description, err); if ( retval != 1) { if (retval < 0) { gt_free(description); } break; } gt_logger_log(logger, "found query of length: %lu", queryLength); avgShuLength = 0.0; gc_query = 0.0; for (currentSuffix = 0; currentSuffix < queryLength; currentSuffix++) { currentShuLength = (double) gt_pck_getShuStringLength( subjectindex, ¤tQuery[currentSuffix], queryLength - currentSuffix); avgShuLength += currentShuLength; if (currentQuery[currentSuffix] == c_sym || currentQuery[currentSuffix] == g_sym) { gc_query++; } } if (arguments->shulen_only) { printf("# Query %d sum of shulen:\n %.0f\n", (int) queryNo, avgShuLength); } else { avgShuLength /= (double) queryLength; gc_query /= (double) queryLength; gt_logger_log(logger, "Query %d has an average SHUstring length " "of\n# shulength: %f", (int) queryNo, avgShuLength); gt_logger_log(logger, "Query description: %s", description); gt_log_log("Query (i): %s", description); /* XXX Fehlerabfragen einbauen */ if ( !had_err ) { double div, kr; gt_logger_log(logger, "shulen:\n%f", avgShuLength); gt_log_log("shu: %f, gc: %f, len: %lu", avgShuLength, gc_query, subjectLength); div = gt_divergence(arguments->divergence_rel_err, arguments->divergence_abs_err, arguments->divergence_m, arguments->divergence_threshold, avgShuLength, subjectLength, gc_query, ln_n_fac, arguments->max_ln_n_fac); gt_logger_log(logger, "divergence:\n%f", div); kr = gt_calculateKr(div); printf("# Kr:\n%f\n", kr); } } } gt_free(ln_n_fac); gt_seqiterator_delete(queries); genericindex_delete(genericindexSubject); return had_err; }
int gt_runidxlocali(const IdxlocaliOptions *idxlocalioptions,GtError *err) { Genericindex *genericindex = NULL; bool haserr = false; GtLogger *logger; const GtEncseq *encseq = NULL; logger = gt_logger_new(idxlocalioptions->verbose, GT_LOGGER_DEFLT_PREFIX, stdout); if (idxlocalioptions->doonline) { GtEncseqLoader *el; el = gt_encseq_loader_new(); gt_encseq_loader_require_multiseq_support(el); gt_encseq_loader_drop_description_support(el); gt_encseq_loader_set_logger(el, logger); encseq = gt_encseq_loader_load(el, gt_str_get(idxlocalioptions->indexname), err); gt_encseq_loader_delete(el); if (encseq == NULL) { haserr = true; } } else { genericindex = genericindex_new(gt_str_get(idxlocalioptions->indexname), idxlocalioptions->withesa, idxlocalioptions->withesa || idxlocalioptions->docompare, false, true, 0, logger, err); if (genericindex == NULL) { haserr = true; } else { encseq = genericindex_getencseq(genericindex); } } if (!haserr) { GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; Limdfsresources *limdfsresources = NULL; const AbstractDfstransformer *dfst; SWdpresource *swdpresource = NULL; Showmatchinfo showmatchinfo; ProcessIdxMatch processmatch; GtAlphabet *a; void *processmatchinfoonline, *processmatchinfooffline; Storematchinfo storeonline, storeoffline; a = gt_encseq_alphabet(encseq); if (idxlocalioptions->docompare) { processmatch = storematch; gt_initstorematch(&storeonline,encseq); gt_initstorematch(&storeoffline,encseq); processmatchinfoonline = &storeonline; processmatchinfooffline = &storeoffline; } else { processmatch = showmatch; showmatchinfo.encseq = encseq; showmatchinfo.characters = gt_alphabet_characters(a); showmatchinfo.wildcardshow = gt_alphabet_wildcard_show(a); showmatchinfo.showalignment = idxlocalioptions->showalignment; processmatchinfoonline = processmatchinfooffline = &showmatchinfo; } if (idxlocalioptions->doonline || idxlocalioptions->docompare) { swdpresource = gt_newSWdpresource(idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapextend, idxlocalioptions->threshold, idxlocalioptions->showalignment, processmatch, processmatchinfoonline); } dfst = gt_locali_AbstractDfstransformer(); if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_assert(genericindex != NULL); limdfsresources = gt_newLimdfsresources(genericindex, true, 0, 0, /* maxpathlength */ true, /* keepexpandedonstack */ processmatch, processmatchinfooffline, NULL, /* processresult */ NULL, /* processresult info */ dfst); } seqit = gt_seq_iterator_sequence_buffer_new(idxlocalioptions->queryfiles, err); if (!seqit) haserr = true; if (!haserr) { gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(a)); for (showmatchinfo.queryunit = 0; /* Nothing */; showmatchinfo.queryunit++) { retval = gt_seq_iterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } printf("process sequence " Formatuint64_t " of length %lu\n", PRINTuint64_tcast(showmatchinfo.queryunit),querylen); if (idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_multiapplysmithwaterman(swdpresource,encseq,query,querylen); } if (!idxlocalioptions->doonline || idxlocalioptions->docompare) { gt_indexbasedlocali(limdfsresources, idxlocalioptions->matchscore, idxlocalioptions->mismatchscore, idxlocalioptions->gapstart, idxlocalioptions->gapextend, idxlocalioptions->threshold, query, querylen, dfst); } if (idxlocalioptions->docompare) { gt_checkandresetstorematch(showmatchinfo.queryunit, &storeonline,&storeoffline); } } if (limdfsresources != NULL) { gt_freeLimdfsresources(&limdfsresources,dfst); } if (swdpresource != NULL) { gt_freeSWdpresource(swdpresource); swdpresource = NULL; } gt_seq_iterator_delete(seqit); } if (idxlocalioptions->docompare) { gt_freestorematch(&storeonline); gt_freestorematch(&storeoffline); } } if (genericindex == NULL) { gt_encseq_delete((GtEncseq *) encseq); encseq = NULL; } else { genericindex_delete(genericindex); } gt_logger_delete(logger); logger = NULL; return haserr ? -1 : 0; }
int gt_tyrsearch(const char *tyrindexname, const GtStrArray *queryfilenames, unsigned int showmode, unsigned int searchstrand, bool verbose, bool performtest, GtError *err) { Tyrindex *tyrindex; Tyrcountinfo *tyrcountinfo = NULL; Tyrbckinfo *tyrbckinfo = NULL; bool haserr = false; gt_error_check(err); tyrindex = gt_tyrindex_new(tyrindexname,err); if (tyrindex == NULL) { haserr = true; } else { if (verbose) { gt_tyrindex_show(tyrindex); } if (performtest) { gt_tyrindex_check(tyrindex); } } if (!haserr) { gt_assert(tyrindex != NULL); if ((showmode & SHOWCOUNTS) && !gt_tyrindex_isempty(tyrindex)) { tyrcountinfo = gt_tyrcountinfo_new(tyrindex,tyrindexname,err); if (tyrcountinfo == NULL) { haserr = true; } } } if (!haserr) { gt_assert(tyrindex != NULL); if (!gt_tyrindex_isempty(tyrindex)) { tyrbckinfo = gt_tyrbckinfo_new(tyrindexname, gt_tyrindex_alphasize(tyrindex), err); if (tyrbckinfo == NULL) { haserr = true; } } } if (!haserr) { const GtUchar *query; unsigned long querylen; char *desc = NULL; uint64_t unitnum; int retval; Tyrsearchinfo tyrsearchinfo; GtSeqIterator *seqit; gt_assert(tyrindex != NULL); gt_tyrsearchinfo_init(&tyrsearchinfo,tyrindex,showmode,searchstrand); seqit = gt_seqiterator_sequence_buffer_new(queryfilenames, err); if (!seqit) haserr = true; if (!haserr) { gt_seqiterator_set_symbolmap(seqit, gt_alphabet_symbolmap(tyrsearchinfo.dnaalpha)); for (unitnum = 0; /* Nothing */; unitnum++) { retval = gt_seqiterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } singleseqtyrsearch(tyrindex, tyrcountinfo, &tyrsearchinfo, tyrbckinfo, unitnum, query, querylen, desc); } gt_seqiterator_delete(seqit); } gt_tyrsearchinfo_delete(&tyrsearchinfo); } if (tyrbckinfo != NULL) { gt_tyrbckinfo_delete(&tyrbckinfo); } if (tyrcountinfo != NULL) { gt_tyrcountinfo_delete(&tyrcountinfo); } if (tyrindex != NULL) { gt_tyrindex_delete(&tyrindex); } return haserr ? -1 : 0; }
static int gt_callenumquerymatches_withindex( GtQuerysubstringmatchfunc findquerymatches, const Suffixarray *suffixarray, const GtStrArray *queryfiles, bool forwardstrand, bool reversestrand, unsigned int userdefinedleastlength, GtProcessquerybeforematching processquerybeforematching, GtProcessquerymatch processquerymatch, void *processquerymatchinfo, GtError *err) { GtSeqIterator *seqit; bool haserr = false; seqit = gt_seq_iterator_sequence_buffer_new(queryfiles, err); if (seqit == NULL) { haserr = true; } else { GtQuerymatch *querymatchspaceptr = gt_querymatch_new(); const GtUchar *query; unsigned long querylen; int retval; uint64_t queryunitnum; GtUchar *queryreverse = NULL; unsigned long queryreverse_length = 0; char *desc = NULL; int mode; gt_seq_iterator_set_symbolmap(seqit, gt_alphabet_symbolmap(gt_encseq_alphabet( suffixarray->encseq))); for (queryunitnum = 0; /* Nothing */; queryunitnum++) { retval = gt_seq_iterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } if (querylen >= (unsigned long) userdefinedleastlength) { GtQueryrep queryrep; queryrep.encseq = NULL; queryrep.readmode = GT_READMODE_FORWARD; queryrep.startpos = 0; queryrep.length = querylen; for (mode = 0; mode <= 1; mode++) { if (mode == 0 && forwardstrand) { queryrep.sequence = query; queryrep.reversecopy = false; if (processquerybeforematching != NULL) { processquerybeforematching(processquerymatchinfo,desc,query, querylen,true); } } else { if (mode == 1 && reversestrand) { if (querylen > queryreverse_length) { queryreverse = gt_realloc(queryreverse, sizeof (*queryreverse) * querylen); queryreverse_length = querylen; } gt_copy_reversecomplement(queryreverse,query,querylen); queryrep.sequence = queryreverse; queryrep.reversecopy = true; if (processquerybeforematching != NULL) { processquerybeforematching(processquerymatchinfo,desc, queryreverse,querylen,false); } } else { queryrep.sequence = NULL; queryrep.reversecopy = false; } } if (queryrep.sequence != NULL) { int ret = findquerymatches(false, suffixarray, queryunitnum, &queryrep, (unsigned long) userdefinedleastlength, processquerymatch, processquerymatchinfo, querymatchspaceptr, err); if (ret != 0) { haserr = true; break; } } } } } gt_seq_iterator_delete(seqit); gt_free(queryreverse); gt_querymatch_delete(querymatchspaceptr); } return haserr ? -1 : 0; }
int gt_findsubquerygmatchforward(const GtEncseq *encseq, const void *genericindex, unsigned long totallength, Greedygmatchforwardfunction gmatchforward, const GtAlphabet *alphabet, const GtStrArray *queryfilenames, Definedunsignedlong minlength, Definedunsignedlong maxlength, bool showsequence, bool showquerypos, bool showsubjectpos, GtError *err) { Substringinfo substringinfo; Rangespecinfo rangespecinfo; bool haserr = false; GtSeqIterator *seqit; const GtUchar *query; unsigned long querylen; char *desc = NULL; int retval; uint64_t unitnum; gt_error_check(err); substringinfo.genericindex = genericindex; substringinfo.totallength = totallength; rangespecinfo.minlength = minlength; rangespecinfo.maxlength = maxlength; rangespecinfo.showsequence = showsequence; rangespecinfo.showquerypos = showquerypos; rangespecinfo.showsubjectpos = showsubjectpos; substringinfo.preprocessgmatchlength = showunitnum; substringinfo.processgmatchlength = showifinlengthrange; substringinfo.postprocessgmatchlength = NULL; substringinfo.alphabet = alphabet; substringinfo.processinfo = &rangespecinfo; substringinfo.gmatchforward = gmatchforward; substringinfo.encseq = encseq; seqit = gt_seqiterator_sequence_buffer_new(queryfilenames, err); if (!seqit) haserr = true; if (!haserr) { gt_seqiterator_set_symbolmap(seqit, gt_alphabet_symbolmap(alphabet)); for (unitnum = 0; /* Nothing */; unitnum++) { retval = gt_seqiterator_next(seqit, &query, &querylen, &desc, err); if (retval < 0) { haserr = true; break; } if (retval == 0) { break; } gmatchposinsinglesequence(&substringinfo, unitnum, query, querylen, desc); } gt_seqiterator_delete(seqit); } return haserr ? -1 : 0; }
GtBareEncseq *gt_bare_encseq_parse_new(GtUchar *filecontents,size_t numofbytes, const GtAlphabet *alphabet, GtError *err) { GtUchar *writeptr = filecontents, *readptr = filecontents; const GtUchar *endptr = filecontents + numofbytes; bool firstline = true, haserr = false; GtUword lastspecialrange_length = 0; GtBareSpecialrange *srptr = NULL; GtBareEncseq *bare_encseq = gt_malloc(sizeof *bare_encseq); const GtUchar *smap = gt_alphabet_symbolmap(alphabet); bare_encseq->specialcharacters = 0; bare_encseq->numofchars = (GtUword) gt_alphabet_num_of_chars(alphabet); bare_encseq->charcount = gt_calloc((size_t) bare_encseq->numofchars, sizeof *bare_encseq->charcount); GT_INITARRAY(&bare_encseq->specialranges,GtBareSpecialrange); readptr = filecontents; while (!haserr && readptr < endptr) { if (*readptr == '>') { if (!firstline) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (writeptr - filecontents); } lastspecialrange_length++; *writeptr++ = SEPARATOR; bare_encseq->specialcharacters++; } else { firstline = false; } while (readptr < endptr && *readptr != '\n') { readptr++; } readptr++; } else { while (readptr < endptr && *readptr != '\n') { if (!isspace(*readptr)) { GtUchar cc = smap[*readptr]; if (cc == UNDEFCHAR) { gt_error_set(err,"illegal input characters %c\n",*readptr); haserr = true; break; } if (ISSPECIAL(cc)) { if (lastspecialrange_length == 0) { GT_GETNEXTFREEINARRAY(srptr,&bare_encseq->specialranges, GtBareSpecialrange,128UL); srptr->start = (GtUword) (writeptr - filecontents); } lastspecialrange_length++; bare_encseq->specialcharacters++; } else { gt_assert((GtUword) cc < bare_encseq->numofchars); bare_encseq->charcount[(int) cc]++; if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } lastspecialrange_length = 0; } *writeptr++ = cc; } readptr++; } readptr++; } } if (lastspecialrange_length > 0) { gt_assert(srptr != NULL); srptr->length = lastspecialrange_length; } bare_encseq->sequence = filecontents; bare_encseq->totallength = (GtUword) (writeptr - filecontents); if (haserr) { gt_bare_encseq_delete(bare_encseq); return NULL; } return bare_encseq; }