static int encseq_lua_is_mirrored(lua_State *L) { GtEncseq **encseq; encseq = check_encseq(L, 1); gt_assert(*encseq); lua_pushboolean(L, gt_encseq_is_mirrored(*encseq)); return 1; }
static int encseq_lua_unmirror(lua_State *L) { GtEncseq **encseq; encseq = check_encseq(L, 1); gt_assert(*encseq); luaL_argcheck(L, gt_encseq_is_mirrored(*encseq), 1, "is not mirrored"); gt_encseq_unmirror(*encseq); return 0; }
static int encseq_lua_mirror(lua_State *L) { GtEncseq **encseq; GtError *err = gt_error_new(); encseq = check_encseq(L, 1); gt_assert(*encseq); luaL_argcheck(L, !gt_encseq_is_mirrored(*encseq), 1, "is already mirrored"); if (gt_encseq_mirror(*encseq, err) != 0) gt_lua_error(L, err); gt_error_delete(err); return 0; }
static void showprjinfo(FILE *outprj, GtReadmode readmode, const GtEncseq *encseq, GtUword numberofallsortedsuffixes, unsigned int prefixlength, GtUword numoflargelcpvalues, double averagelcp, GtUword maxbranchdepth, const Definedunsignedlong *longest) { GtUword totallength; GtUword numofsequences; totallength = gt_encseq_total_length(encseq); fprintf(outprj,"totallength="GT_WU"\n",totallength); PRJSPECIALOUT(specialcharacters); PRJSPECIALOUT(specialranges); PRJSPECIALOUT(realspecialranges); PRJSPECIALOUT(lengthofspecialprefix); PRJSPECIALOUT(lengthofspecialsuffix); PRJSPECIALOUT(wildcards); PRJSPECIALOUT(wildcardranges); PRJSPECIALOUT(realwildcardranges); PRJSPECIALOUT(lengthofwildcardprefix); PRJSPECIALOUT(lengthofwildcardsuffix); numofsequences = gt_encseq_num_of_sequences(encseq); fprintf(outprj,"numofsequences="GT_WU"\n",numofsequences); fprintf(outprj,"numofdbsequences="GT_WU"\n",numofsequences); fprintf(outprj,"numofquerysequences=0\n"); fprintf(outprj,"numberofallsortedsuffixes="GT_WU"\n", numberofallsortedsuffixes); if (longest->defined) { fprintf(outprj,"longest="GT_WU"\n",longest->valueunsignedlong); } fprintf(outprj,"prefixlength=%u\n",prefixlength); fprintf(outprj,"largelcpvalues="GT_WU"\n",numoflargelcpvalues); fprintf(outprj,"averagelcp=%.2f\n",averagelcp); fprintf(outprj,"maxbranchdepth="GT_WU"\n",maxbranchdepth); fprintf(outprj,"integersize=%u\n", (unsigned int) (sizeof (GtUword) * CHAR_BIT)); fprintf(outprj,"littleendian=%c\n",gt_is_little_endian() ? '1' : '0'); fprintf(outprj,"readmode=%u\n",(unsigned int) readmode); fprintf(outprj,"mirrored=%c\n", gt_encseq_is_mirrored(encseq) ? '1' : '0'); }
static void gt_readjoiner_assembly_pump_encseq_through_cache( const GtEncseq *encseq) { const GtTwobitencoding *twobitencoding = gt_encseq_twobitencoding_export( encseq); uint64_t sum = 0; /* compute the sum, so that the compiler does no remove the code accessing twobitencoding during optimization */ GtUword idx, totallength = gt_encseq_total_length(encseq), numofunits = ! gt_encseq_is_mirrored(encseq) ? gt_unitsoftwobitencoding(totallength) : gt_unitsoftwobitencoding((totallength - 1)/2); for (idx = 0; idx < numofunits; idx++) sum += twobitencoding[idx]; gt_assert(sum > 0); #ifndef S_SPLINT_S gt_log_log("encseq codes-sum: %"PRIu64, sum); #endif }
static int inputsuffixarray(bool map, Suffixarray *suffixarray, unsigned int demand, const char *indexname, GtLogger *logger, GtError *err) { bool haserr = false; GtEncseqLoader *el; GtUword totallength = 0; gt_error_check(err); initsuffixarray(suffixarray); el = gt_encseq_loader_new(); if (!(demand & SARR_DESTAB)) gt_encseq_loader_do_not_require_des_tab(el); else gt_encseq_loader_require_des_tab(el); if (!(demand & SARR_SDSTAB)) gt_encseq_loader_do_not_require_sds_tab(el); else gt_encseq_loader_require_sds_tab(el); if (!(demand & SARR_SSPTAB)) gt_encseq_loader_do_not_require_ssp_tab(el); else gt_encseq_loader_require_ssp_tab(el); gt_encseq_loader_set_logger(el, logger); suffixarray->encseq = gt_encseq_loader_load(el, indexname, err); gt_encseq_loader_delete(el); if (suffixarray->encseq == NULL) { haserr = true; } if (!haserr) { haserr = scanprjfileuintkeys(suffixarray,indexname,logger,err); } if (!haserr && suffixarray->mirroredencseq && !gt_encseq_is_mirrored(suffixarray->encseq)) { if (gt_encseq_mirror(suffixarray->encseq, err) != 0) haserr = true; } if (!haserr) { totallength = gt_encseq_total_length(suffixarray->encseq); } if (!haserr && (demand & SARR_SUFTAB)) { if (map) { if (suffixarray->numberofallsortedsuffixes > 0) { suffixarray->suftab = gt_fa_mmap_check_size_with_suffix(indexname, GT_SUFTABSUFFIX, suffixarray->numberofallsortedsuffixes, sizeof (*suffixarray->suftab), err); if (suffixarray->suftab == NULL) { haserr = true; } } } else { #if defined (_LP64) || defined (_WIN64) off_t filesize = gt_file_size_with_suffix(indexname,GT_SUFTABSUFFIX); if (filesize == (off_t) sizeof (uint32_t) * suffixarray->numberofallsortedsuffixes) { gt_logger_log(logger,"read suftab in units of 4 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_uint32_t,uint32_t, GT_SUFTABSUFFIX); } else { gt_logger_log(logger,"read suftab in units of 8 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword, GT_SUFTABSUFFIX); } #else gt_logger_log(logger,"read suftab in units of 4 bytes"); INITBufferedfile(indexname,&suffixarray->suftabstream_GtUword,GtUword, GT_SUFTABSUFFIX); #endif } if (!haserr && !suffixarray->longest.defined) { gt_error_set(err,"longest not defined"); haserr = true; } } if (!haserr && (demand & SARR_LCPTAB)) { if (map) { if (suffixarray->numberofallsortedsuffixes > 0) { suffixarray->lcptab = gt_fa_mmap_check_size_with_suffix(indexname, GT_LCPTABSUFFIX, suffixarray->numberofallsortedsuffixes, sizeof (*suffixarray->lcptab), err); if (suffixarray->lcptab == NULL) { haserr = true; } } } else { INITBufferedfile(indexname,&suffixarray->lcptabstream,GtUchar, GT_LCPTABSUFFIX); if (!haserr && fseek(suffixarray->lcptabstream.fp, (GtWord) sizeof (GtUchar),SEEK_SET)) { gt_error_set(err,"fseek(esastream) failed: %s",strerror(errno)); haserr = true; } } if (!haserr && !suffixarray->numoflargelcpvalues.defined) { gt_error_set(err,"numoflargelcpvalues not defined"); haserr = true; } if (!haserr && suffixarray->numoflargelcpvalues.valueunsignedlong > 0) { if (map) { suffixarray->llvtab = gt_fa_mmap_check_size_with_suffix(indexname, GT_LARGELCPTABSUFFIX, (GtUword) suffixarray->numoflargelcpvalues. valueunsignedlong, sizeof (*suffixarray->llvtab), err); if (suffixarray->llvtab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->llvtabstream,Largelcpvalue, GT_LARGELCPTABSUFFIX); } } } if (!haserr && (demand & SARR_BWTTAB)) { if (map) { suffixarray->bwttab = gt_fa_mmap_check_size_with_suffix(indexname, GT_BWTTABSUFFIX, totallength+1, sizeof (*suffixarray->bwttab), err); if (suffixarray->bwttab == NULL) { haserr = true; } } else { INITBufferedfile(indexname,&suffixarray->bwttabstream,GtUchar, GT_BWTTABSUFFIX); } } if (!haserr && (demand & SARR_BCKTAB)) { suffixarray->bcktab = gt_bcktab_map(indexname, gt_encseq_alphabetnumofchars(suffixarray->encseq), suffixarray->prefixlength, totallength+1, true, err); if (suffixarray->bcktab == NULL) { haserr = true; } } if (haserr) { gt_freesuffixarray(suffixarray); } return haserr ? -1 : 0; }
double *gt_encseq_get_gc(const GtEncseq *encseq, bool with_special, bool calculate, GT_UNUSED GtError *err) { GtEncseqReader *reader; GtAlphabet *alphabet; double *gc_content; /* unit = file or sequence depending on per_file */ unsigned long char_idx, totallength, max_unit, seq_idx = 0, nextsep = 0, at_count = 0, gc_count = 0, default_count = 0; bool is_mirrored_encseq; GtUchar acgt[8], current_c; alphabet = gt_encseq_alphabet(encseq); gt_assert(gt_alphabet_is_dna(alphabet)); gt_alphabet_encode_seq(alphabet, acgt, "aAtTcCgG", 8UL); totallength = gt_encseq_total_length(encseq); reader = gt_encseq_create_reader_with_readmode(encseq, GT_READMODE_FORWARD, 0); is_mirrored_encseq = gt_encseq_is_mirrored(encseq); if (is_mirrored_encseq) { max_unit = GT_DIV2(gt_encseq_num_of_sequences(encseq)); gc_content = gt_calloc((size_t) GT_MULT2(max_unit), sizeof (double)); } else { max_unit = gt_encseq_num_of_sequences(encseq); gc_content = gt_calloc((size_t) max_unit, sizeof (double)); } nextsep = gt_encseq_seqstartpos(encseq, seq_idx) + gt_encseq_seqlength(encseq, seq_idx); for (char_idx = 0; char_idx < totallength; char_idx++) { if (nextsep == char_idx) { if (calculate) { calculate_gc(encseq, gc_content, with_special, seq_idx, gc_count, at_count); } else { gc_content[seq_idx] = (double) gc_count; } seq_idx++; nextsep = gt_encseq_seqstartpos(encseq, seq_idx) + gt_encseq_seqlength(encseq, seq_idx); gt_encseq_reader_reinit_with_readmode(reader, encseq, GT_READMODE_FORWARD, char_idx + 1UL); gc_count = at_count = default_count = 0UL; continue; } current_c = gt_encseq_reader_next_encoded_char(reader); if (current_c == acgt[0] || current_c == acgt[1] || current_c == acgt[2] || current_c == acgt[3]) { at_count++; } else { if (current_c == acgt[4] || current_c == acgt[5] || current_c == acgt[6] || current_c == acgt[7]) { gc_count++; } else { default_count++; } } } if (calculate) { calculate_gc(encseq, gc_content, with_special, seq_idx, gc_count, at_count); } else { gc_content[seq_idx] = (double) gc_count; } gt_encseq_reader_delete(reader); if (is_mirrored_encseq) { unsigned long double_max_unit = GT_MULT2(max_unit); for (seq_idx = 0; seq_idx < max_unit; seq_idx++) { gc_content[double_max_unit - seq_idx - 1] = gc_content[seq_idx]; } } return gc_content; }