static int decode_sequence_file(const char *seqfile, GtEncseqDecodeArguments *args, GtError *err) { GtEncseqLoader *encseq_loader; GtEncseq *encseq; int had_err = 0; gt_error_check(err); gt_assert(seqfile); encseq_loader = gt_encseq_loader_new(); if (!had_err && gt_encseq_options_lossless_value(args->eopts)) { gt_encseq_loader_require_lossless_support(encseq_loader); } if (!(encseq = gt_encseq_loader_load(encseq_loader, seqfile, err))) had_err = -1; if (!had_err && gt_encseq_options_mirrored_value(args->eopts)) { if (!gt_alphabet_is_dna(gt_encseq_alphabet(encseq))) { gt_error_set(err, "mirroring is only defined on DNA sequences"); had_err = -1; } if (!had_err) had_err = gt_encseq_mirror(encseq, err); } if (!had_err) had_err = output_sequence(encseq, args, seqfile, err); gt_encseq_delete(encseq); gt_encseq_loader_delete(encseq_loader); return had_err; }
static int encseq_lua_mirror(lua_State *L) { GtEncseq **encseq; GtError *err = gt_error_new(); encseq = check_encseq(L, 1); gt_assert(*encseq); luaL_argcheck(L, !gt_encseq_is_mirrored(*encseq), 1, "is already mirrored"); if (gt_encseq_mirror(*encseq, err) != 0) gt_lua_error(L, err); gt_error_delete(err); return 0; }
static int gt_encseq_bitextract_runner(GT_UNUSED int argc, const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtEncseqBitextractArguments *arguments = tool_arguments; GtEncseqLoader *el; GtEncseq *encseq; int had_err = 0; bool fwd, it1, GT_UNUSED it2; char buffer[BUFSIZ]; GtEndofTwobitencoding etbe; GtEncseqReader *esr; GtSpecialrangeiterator *sri; GtRange srng; GtReadmode rm; gt_error_check(err); gt_assert(arguments); el = gt_encseq_loader_new(); encseq = gt_encseq_loader_load(el, argv[parsed_args], err); if (!encseq) had_err = -1; if (!had_err && arguments->mirror) { had_err = gt_encseq_mirror(encseq, err); } if (!had_err) { rm = gt_readmode_parse(gt_str_get(arguments->readmode), NULL); fwd = GT_ISDIRREVERSE(rm) ? false : true; } if (!had_err && arguments->bitpos != GT_UNDEF_ULONG) { if (arguments->bitpos >= gt_encseq_total_length(encseq)) { gt_error_set(err, "position %lu exceeds encoded sequence length of %lu", arguments->bitpos, gt_encseq_total_length(encseq)); had_err = -1; } if (!had_err) { unsigned long ret; esr = gt_encseq_create_reader_with_readmode(encseq, rm, arguments->bitpos); ret = gt_encseq_extract2bitencwithtwobitencodingstoppos(&etbe, esr, encseq, rm, arguments->bitpos); gt_bitsequence_tostring(buffer, etbe.tbe); printf("Twobitencoding %s\n" "unitsnotspecial %u\n" "position %lu\n" "returnvalue %lu\n", buffer, etbe.unitsnotspecial, arguments->bitpos, ret); gt_encseq_reader_delete(esr); } } if (!had_err && arguments->stoppos != GT_UNDEF_ULONG) { if (arguments->stoppos >= gt_encseq_total_length(encseq)) { gt_error_set(err, "position %lu exceeds encoded sequence length of %lu", arguments->stoppos, gt_encseq_total_length(encseq)); had_err = -1; } if (!had_err) { esr = gt_encseq_create_reader_with_readmode(encseq, rm, 0); /* check stoppos stuff */ gt_encseq_reader_reinit_with_readmode(esr, encseq, rm, arguments->stoppos); printf("%lu: %lu\n", arguments->stoppos, gt_getnexttwobitencodingstoppos(fwd, esr)); gt_encseq_reader_delete(esr); } } if (!had_err && arguments->specialranges) { /* check specialrangeiterator stuff */ if (gt_encseq_has_specialranges(encseq)) { sri = gt_specialrangeiterator_new(encseq, fwd); while (true) { it1 = gt_specialrangeiterator_next(sri, &srng); if (it1) printf("%lu:%lu\n", srng.start, srng.end); else break; } gt_specialrangeiterator_delete(sri); } } gt_encseq_delete(encseq); gt_encseq_loader_delete(el); return had_err; }
static int inputsuffixarray(bool map, Suffixarray *suffixarray, unsigned int demand, const char *indexname, GtLogger *logger, GtError *err) { bool haserr = false; GtEncseqLoader *el; GtUword totallength = 0; gt_error_check(err); initsuffixarray(suffixarray); el = gt_encseq_loader_new(); if (!(demand & SARR_DESTAB)) gt_encseq_loader_do_not_require_des_tab(el); else gt_encseq_loader_require_des_tab(el); if (!(demand & SARR_SDSTAB)) gt_encseq_loader_do_not_require_sds_tab(el); else gt_encseq_loader_require_sds_tab(el); if (!(demand & SARR_SSPTAB)) gt_encseq_loader_do_not_require_ssp_tab(el); else gt_encseq_loader_require_ssp_tab(el); gt_encseq_loader_set_logger(el, logger); suffixarray->encseq = gt_encseq_loader_load(el, indexname, err); gt_encseq_loader_delete(el); if (suffixarray->encseq == NULL) { haserr = true; } if (!haserr) { haserr = scanprjfileuintkeys(suffixarray,indexname,logger,err); } if (!haserr && suffixarray->mirroredencseq && !gt_encseq_is_mirrored(suffixarray->encseq)) { if (gt_encseq_mirror(suffixarray->encseq, err) != 0) haserr = true; } if (!haserr) { totallength = gt_encseq_total_length(suffixarray->encseq); } if (!haserr && (demand & SARR_SUFTAB)) { if (map) { if (suffixarray->numberofallsortedsuffixes > 0) { suffixarray->suftab = gt_fa_mmap_check_size_with_suffix(indexname, GT_SUFTABSUFFIX, suffixarray->numberofallsortedsuffixes, sizeof (*suffixarray->suftab), err); if (suffixarray->suftab == NULL) { haserr = true; } } } else { #if defined (_LP64) || defined (_WIN64) off_t filesize = gt_file_size_with_suffix(indexname,GT_SUFTABSUFFIX); if (filesize == (off_t) sizeof (uint32_t) * suffixarray->numberofallsortedsuffixes) { gt_logger_log(logger,"read suftab in units of 4 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_uint32_t,uint32_t, GT_SUFTABSUFFIX); } else { gt_logger_log(logger,"read suftab in units of 8 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword, GT_SUFTABSUFFIX); } #else gt_logger_log(logger,"read suftab in units of 4 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword, GT_SUFTABSUFFIX); #endif } if (!haserr && !suffixarray->longest.defined) { gt_error_set(err,"longest not defined"); haserr = true; } } if (!haserr && (demand & SARR_LCPTAB)) { if (map) { if (suffixarray->numberofallsortedsuffixes > 0) { suffixarray->lcptab = gt_fa_mmap_check_size_with_suffix(indexname, GT_LCPTABSUFFIX, suffixarray->numberofallsortedsuffixes, sizeof (*suffixarray->lcptab), err); if (suffixarray->lcptab == NULL) { haserr = true; } } } else { INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar, GT_LCPTABSUFFIX); if (!haserr && fseek(suffixarray->lcptabstream.fp, (GtWord) sizeof (GtUchar),SEEK_SET)) { gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno)); haserr = true; } } if (!haserr && !suffixarray->numoflargelcpvalues.defined) { gt_error_set(err,"numoflargelcpvalues not defined"); haserr = true; } if (!haserr && suffixarray->numoflargelcpvalues.valueunsignedlong > 0) { if (map) { suffixarray->llvtab = gt_fa_mmap_check_size_with_suffix(indexname, GT_LARGELCPTABSUFFIX, (GtUword) suffixarray->numoflargelcpvalues. valueunsignedlong, sizeof (*suffixarray->llvtab), err); if (suffixarray->llvtab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue, GT_LARGELCPTABSUFFIX); } } } if (!haserr && (demand & SARR_BWTTAB)) { if (map) { suffixarray->bwttab = gt_fa_mmap_check_size_with_suffix(indexname, GT_BWTTABSUFFIX, totallength+1, sizeof (*suffixarray->bwttab), err); if (suffixarray->bwttab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar, GT_BWTTABSUFFIX); } } if (!haserr && (demand & SARR_BCKTAB)) { suffixarray->bcktab = gt_bcktab_map(indexname, gt_encseq_alphabetnumofchars(suffixarray->encseq), suffixarray->prefixlength, totallength+1, true, err); if (suffixarray->bcktab == NULL) { haserr = true; } } if (haserr) { gt_freesuffixarray(suffixarray); } return haserr ? -1 : 0; }
static int gt_encseq2spm_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtEncseq2spmArguments *arguments = tool_arguments; GtEncseqLoader *el = NULL; GtEncseq *encseq = NULL; bool haserr = false; gt_error_check(err); gt_assert(arguments); el = gt_encseq_loader_new(); gt_encseq_loader_drop_description_support(el); gt_encseq_loader_disable_autosupport(el); encseq = gt_encseq_loader_load(el, gt_str_get(arguments->encseqinput), err); if (encseq == NULL) { haserr = true; } if (!haserr) { if (arguments->singlestrand) { gt_error_set(err,"option -singlestand is not implemented"); haserr = true; } else { if (gt_encseq_mirror(encseq, err) != 0) { haserr = true; } } } if (!haserr && arguments->singlescan > 0) { GtTimer *timer = NULL; if (gt_showtime_enabled()) { char *outmsg; switch (arguments->singlescan) { case 1: outmsg = "to run fast scanning"; break; case 2: outmsg = "to run fast scanning with check"; break; case 3: outmsg = "to run fast scanning with output"; break; case 4: outmsg = "to run old scanning code"; break; default: gt_error_set(err,"argument %u to option -singlescan not allowed", arguments->singlescan); haserr = true; } if (!haserr) { timer = gt_timer_new_with_progress_description(outmsg); gt_timer_start(timer); } } if (!haserr) { unsigned int kmersize = 0; haserr = gt_encseq2spm_kmersize(arguments, &kmersize, err); if (!haserr) { if (arguments->singlescan == 4U) { gt_rungetencseqkmers(encseq,kmersize); } else { if (arguments->singlescan > 0) { gt_firstcode_runkmerscan(encseq,arguments->singlescan - 1,kmersize, arguments->minmatchlength); } } } } if (timer != NULL) { gt_timer_show_progress_final(timer, stdout); gt_timer_delete(timer); } } if (!haserr && arguments->singlescan == 0) { GtLogger *logger; const GtReadmode readmode = GT_READMODE_FORWARD; GtBUstate_spmsk **spmsk_states = NULL; unsigned int kmersize, threadcount; #ifdef GT_THREADS_ENABLED const unsigned int threads = gt_jobs; #else const unsigned int threads = 1U; #endif if (arguments->countspms || arguments->outputspms) { spmsk_states = gt_malloc(sizeof (*spmsk_states) * threads); for (threadcount = 0; threadcount < threads; threadcount++) { spmsk_states[threadcount] = gt_spmsk_inl_new(encseq, readmode, (unsigned long) arguments->minmatchlength, arguments->countspms, arguments->outputspms, gt_str_get(arguments->encseqinput)); } } logger = gt_logger_new(arguments->verbose,GT_LOGGER_DEFLT_PREFIX, stdout); haserr = gt_encseq2spm_kmersize(arguments, &kmersize, err); if (!haserr) { if (storefirstcodes_getencseqkmers_twobitencoding(encseq, kmersize, arguments->numofparts, arguments->maximumspace, arguments->minmatchlength, /* use false */ arguments->checksuftab, /* use false */ arguments->onlyaccum, /* use false */ arguments-> onlyallfirstcodes, /* use 5U */ arguments-> addbscache_depth, /* specify the extra space needed for the function processing the interval */ arguments->phase2extra, /* use true */ arguments->radixlarge ? false : true, /* use 2 without threads and use 1 with threads */ arguments->radixparts, spmsk_states != NULL ? gt_spmsk_inl_process : NULL, gt_spmsk_inl_process_end, spmsk_states, logger, err) != 0) { haserr = true; } } if (spmsk_states != NULL) { unsigned long countmatches = 0; for (threadcount = 0; threadcount < threads; threadcount++) { countmatches += gt_spmsk_inl_delete(spmsk_states[threadcount]); } if (arguments->countspms) { printf("number of suffix-prefix matches=%lu\n",countmatches); } gt_free(spmsk_states); } gt_logger_delete(logger); } gt_encseq_delete(encseq); gt_encseq_loader_delete(el); return haserr ? -1 : 0; }
int gt_encseq_gc_unit_test(GtError *err) { int had_err = 0; double *results; GtEncseqBuilder *eb; GtEncseq *encseq; const char testseq1[] = "aaaaaa", testseq2[] = "cccccc", testseq3[] = "acgtacgt", testseq4[] = "acgtn"; /* testseq5[] = "xxxxn"; */ GtAlphabet *alpha; /*GtError *tmp_err;*/ gt_error_check(err); alpha = gt_alphabet_new_dna(); /* test a-seq */ eb = gt_encseq_builder_new(alpha); gt_encseq_builder_create_ssp_tab(eb); gt_encseq_builder_enable_description_support(eb); gt_encseq_builder_add_cstr(eb, testseq1, 6UL, "only a"); encseq = gt_encseq_builder_build(eb, err); if ((results = gt_encseq_get_gc(encseq, false, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.0)); } else { had_err = -1; } gt_free(results); gt_encseq_builder_delete(eb); gt_encseq_delete(encseq); if (!had_err) { /* test c-seq */ eb = gt_encseq_builder_new(alpha); gt_encseq_builder_create_ssp_tab(eb); gt_encseq_builder_enable_description_support(eb); gt_encseq_builder_add_cstr(eb, testseq2, 6UL, "only c"); encseq = gt_encseq_builder_build(eb, err); if ((results = gt_encseq_get_gc(encseq, false, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_one(results[0])); } else { had_err = -1; } gt_free(results); gt_encseq_builder_delete(eb); gt_encseq_delete(encseq); } if (!had_err) { /* test dna-seq and dna+special-seq*/ eb = gt_encseq_builder_new(alpha); gt_encseq_builder_create_ssp_tab(eb); gt_encseq_builder_enable_description_support(eb); gt_encseq_builder_add_cstr(eb, testseq3, 8UL, "0.5"); gt_encseq_builder_add_cstr(eb, testseq4, 5UL, "0.5+special"); encseq = gt_encseq_builder_build(eb, err); if ((results = gt_encseq_get_gc(encseq, false, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[1], 0.5)); } else { had_err = -1; } gt_free(results); if (!had_err) { /* count special chars */ if ((results = gt_encseq_get_gc(encseq, true, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[1], (2.0/5.0))); } else { had_err = -1; } gt_free(results); } gt_encseq_builder_delete(eb); gt_encseq_delete(encseq); } if (!had_err) { /* test dna-seq and dna+special-seq*/ eb = gt_encseq_builder_new(alpha); gt_encseq_builder_create_ssp_tab(eb); gt_encseq_builder_enable_description_support(eb); gt_encseq_builder_add_cstr(eb, testseq3, 8UL, "0.5"); gt_encseq_builder_add_cstr(eb, testseq4, 5UL, "0.5+special"); encseq = gt_encseq_builder_build(eb, err); /*add mirrored sequence*/ had_err = gt_encseq_mirror(encseq, err); /* sequence wise */ if ((results = gt_encseq_get_gc(encseq, false, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[1], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[2], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[3], 0.5)); } else { had_err = -1; } gt_free(results); if (!had_err) { /* count special chars */ if ((results = gt_encseq_get_gc(encseq, true, true, err)) != NULL) { gt_ensure(had_err, gt_double_equals_double(results[0], 0.5)); gt_ensure(had_err, gt_double_equals_double(results[1], (2.0/5.0))); gt_ensure(had_err, gt_double_equals_double(results[2], (2.0/5.0))); gt_ensure(had_err, gt_double_equals_double(results[3], 0.5)); } else { had_err = -1; } gt_free(results); } gt_encseq_builder_delete(eb); gt_encseq_delete(encseq); } gt_alphabet_delete(alpha); return had_err; }