static GtEncseq *mapbwtencoding(const char *indexname, GtLogger *logger, GtError *err) { GtEncseqLoader *el; GtEncseq *ret; gt_error_check(err); el = gt_encseq_loader_new(); gt_encseq_loader_do_not_require_des_tab(el); gt_encseq_loader_do_not_require_ssp_tab(el); gt_encseq_loader_do_not_require_sds_tab(el); gt_encseq_loader_set_logger(el, logger); ret = gt_encseq_loader_load(el, indexname, err); gt_encseq_loader_delete(el); return ret; }
static int inputsuffixarray(bool map, Suffixarray *suffixarray, unsigned int demand, const char *indexname, GtLogger *logger, GtError *err) { bool haserr = false; GtEncseqLoader *el; GtUword totallength = 0; gt_error_check(err); initsuffixarray(suffixarray); el = gt_encseq_loader_new(); if (!(demand & SARR_DESTAB)) gt_encseq_loader_do_not_require_des_tab(el); else gt_encseq_loader_require_des_tab(el); if (!(demand & SARR_SDSTAB)) gt_encseq_loader_do_not_require_sds_tab(el); else gt_encseq_loader_require_sds_tab(el); if (!(demand & SARR_SSPTAB)) gt_encseq_loader_do_not_require_ssp_tab(el); else gt_encseq_loader_require_ssp_tab(el); gt_encseq_loader_set_logger(el, logger); suffixarray->encseq = gt_encseq_loader_load(el, indexname, err); gt_encseq_loader_delete(el); if (suffixarray->encseq == NULL) { haserr = true; } if (!haserr) { haserr = scanprjfileuintkeys(suffixarray,indexname,logger,err); } if (!haserr && suffixarray->mirroredencseq && !gt_encseq_is_mirrored(suffixarray->encseq)) { if (gt_encseq_mirror(suffixarray->encseq, err) != 0) haserr = true; } if (!haserr) { totallength = gt_encseq_total_length(suffixarray->encseq); } if (!haserr && (demand & SARR_SUFTAB)) { if (map) { if (suffixarray->numberofallsortedsuffixes > 0) { suffixarray->suftab = gt_fa_mmap_check_size_with_suffix(indexname, GT_SUFTABSUFFIX, suffixarray->numberofallsortedsuffixes, sizeof (*suffixarray->suftab), err); if (suffixarray->suftab == NULL) { haserr = true; } } } else { #if defined (_LP64) || defined (_WIN64) off_t filesize = gt_file_size_with_suffix(indexname,GT_SUFTABSUFFIX); if (filesize == (off_t) sizeof (uint32_t) * suffixarray->numberofallsortedsuffixes) { gt_logger_log(logger,"read suftab in units of 4 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_uint32_t,uint32_t, GT_SUFTABSUFFIX); } else { gt_logger_log(logger,"read suftab in units of 8 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword, GT_SUFTABSUFFIX); } #else gt_logger_log(logger,"read suftab in units of 4 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword, GT_SUFTABSUFFIX); #endif } if (!haserr && !suffixarray->longest.defined) { gt_error_set(err,"longest not defined"); haserr = true; } } if (!haserr && (demand & SARR_LCPTAB)) { if (map) { if (suffixarray->numberofallsortedsuffixes > 0) { suffixarray->lcptab = gt_fa_mmap_check_size_with_suffix(indexname, GT_LCPTABSUFFIX, suffixarray->numberofallsortedsuffixes, sizeof (*suffixarray->lcptab), err); if (suffixarray->lcptab == NULL) { haserr = true; } } } else { INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar, GT_LCPTABSUFFIX); if (!haserr && fseek(suffixarray->lcptabstream.fp, (GtWord) sizeof (GtUchar),SEEK_SET)) { gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno)); haserr = true; } } if (!haserr && !suffixarray->numoflargelcpvalues.defined) { gt_error_set(err,"numoflargelcpvalues not defined"); haserr = true; } if (!haserr && suffixarray->numoflargelcpvalues.valueunsignedlong > 0) { if (map) { suffixarray->llvtab = gt_fa_mmap_check_size_with_suffix(indexname, GT_LARGELCPTABSUFFIX, (GtUword) suffixarray->numoflargelcpvalues. valueunsignedlong, sizeof (*suffixarray->llvtab), err); if (suffixarray->llvtab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue, GT_LARGELCPTABSUFFIX); } } } if (!haserr && (demand & SARR_BWTTAB)) { if (map) { suffixarray->bwttab = gt_fa_mmap_check_size_with_suffix(indexname, GT_BWTTABSUFFIX, totallength+1, sizeof (*suffixarray->bwttab), err); if (suffixarray->bwttab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar, GT_BWTTABSUFFIX); } } if (!haserr && (demand & SARR_BCKTAB)) { suffixarray->bcktab = gt_bcktab_map(indexname, gt_encseq_alphabetnumofchars(suffixarray->encseq), suffixarray->prefixlength, totallength+1, true, err); if (suffixarray->bcktab == NULL) { haserr = true; } } if (haserr) { gt_freesuffixarray(suffixarray); } return haserr ? -1 : 0; }
int gt_testmaxpairs(const char *indexname, GtUword samples, unsigned int minlength, GtUword substringlength, GtLogger *logger, GtError *err) { GtEncseq *encseq; GtUword totallength = 0, dblen, querylen; GtUchar *dbseq = NULL, *query = NULL; bool haserr = false; GtUword s; GtArray *tabmaxquerymatches; Maxmatchselfinfo maxmatchselfinfo; GtEncseqLoader *el; gt_logger_log(logger,"draw "GT_WU" samples",samples); el = gt_encseq_loader_new(); gt_encseq_loader_do_not_require_des_tab(el); gt_encseq_loader_do_not_require_ssp_tab(el); gt_encseq_loader_do_not_require_sds_tab(el); gt_encseq_loader_set_logger(el, logger); encseq = gt_encseq_loader_load(el, indexname, err); gt_encseq_loader_delete(el); if (encseq == NULL) { haserr = true; } else { totallength = gt_encseq_total_length(encseq); } if (!haserr) { if (substringlength > totallength/2) { substringlength = totallength/2; } dbseq = gt_malloc(sizeof *dbseq * substringlength); query = gt_malloc(sizeof *query * substringlength); } for (s=0; s<samples && !haserr; s++) { dblen = samplesubstring(dbseq,encseq,substringlength); querylen = samplesubstring(query,encseq,substringlength); gt_logger_log(logger,"run query match for dblen="GT_WU"" ",querylen= "GT_WU", minlength=%u", dblen, querylen, minlength); tabmaxquerymatches = gt_array_new(sizeof (Substringmatch)); if (gt_sarrquerysubstringmatch(dbseq, dblen, query, (GtUword) querylen, minlength, gt_encseq_alphabet(encseq), storemaxmatchquery, tabmaxquerymatches, logger, err) != 0) { haserr = true; break; } gt_logger_log(logger,"run self match for dblen="GT_WU"" ",querylen= "GT_WU", minlength=%u", dblen, querylen, minlength); maxmatchselfinfo.results = gt_array_new(sizeof (Substringmatch)); maxmatchselfinfo.dblen = dblen; maxmatchselfinfo.querylen = querylen; maxmatchselfinfo.querymarkpos = sequence2markpositions(&maxmatchselfinfo.numofquerysequences, query,querylen); if (sarrselfsubstringmatch(dbseq, dblen, query, (GtUword) querylen, minlength, gt_encseq_alphabet(encseq), storemaxmatchself, &maxmatchselfinfo, logger, err) != 0) { haserr = true; break; } gt_array_sort(tabmaxquerymatches,orderSubstringmatch); gt_array_sort(maxmatchselfinfo.results,orderSubstringmatch); if (!gt_array_equal(tabmaxquerymatches,maxmatchselfinfo.results, orderSubstringmatch)) { const GtUword width = 60UL; printf("failure for query of length "GT_WU"\n",(GtUword) querylen); printf("querymatches\n"); (void) gt_array_iterate(tabmaxquerymatches,showSubstringmatch,NULL, err); printf("dbmatches\n"); (void) gt_array_iterate(maxmatchselfinfo.results,showSubstringmatch, NULL,err); gt_symbolstring2fasta(stdout,"dbseq", gt_encseq_alphabet(encseq), dbseq, (GtUword) dblen, width); gt_symbolstring2fasta(stdout,"queryseq", gt_encseq_alphabet(encseq), query, (GtUword) querylen, width); exit(GT_EXIT_PROGRAMMING_ERROR); } gt_free(maxmatchselfinfo.querymarkpos); printf("# numberofmatches="GT_WU"\n",gt_array_size(tabmaxquerymatches)); gt_array_delete(tabmaxquerymatches); gt_array_delete(maxmatchselfinfo.results); } gt_free(dbseq); gt_free(query); gt_encseq_delete(encseq); encseq = NULL; return haserr ? -1 : 0; }