static inline void gt_cntlist_show_ascii(GtBitsequence *cntlist, GtUword nofreads, FILE *file) { GtUword i; gt_assert(file != NULL); fprintf(file, "[n: "GT_WU"]\n", nofreads); for (i = 0; i < nofreads; i++) if (GT_ISIBITSET(cntlist, i)) fprintf(file, ""GT_WU"\n", i); }
GtUword gt_cntlist_count(const GtBitsequence *cntlist, GtUword nofreads) { GtUword i, counter = 0; for (i = 0; i < nofreads; i++) if ((bool)GT_ISIBITSET(cntlist, i)) counter++; return counter; }
void gt_Outlcpinfo_check_lcpvalues(const GtEncseq *encseq, GtReadmode readmode, const GtSuffixsortspace *sortedsample, GtUword effectivesamplesize, const GtOutlcpinfo *outlcpinfosample, bool checkequality) { GT_UNUSED int cmp; GtUword idx, reallcp, startpos1, startpos2, currentlcp, totalcmpmissing = 0; if (effectivesamplesize == 0) { return; } startpos1 = gt_suffixsortspace_getdirect(sortedsample,0); for (idx=1UL; idx<effectivesamplesize; idx++) { startpos2 = gt_suffixsortspace_getdirect(sortedsample,idx); cmp = gt_encseq_check_comparetwosuffixes(encseq, readmode, &reallcp, false, false, 0, startpos1, startpos2, NULL, NULL); gt_assert(cmp <= 0); gt_assert(GT_ISIBITSET(outlcpinfosample->lcpsubtab.tableoflcpvalues .isset,idx)); currentlcp = (GtUword) outlcpinfosample->lcpsubtab.tableoflcpvalues. bucketoflcpvalues[idx]; if ((checkequality && currentlcp != reallcp) || (!checkequality && currentlcp > reallcp)) { fprintf(stderr,"idx="GT_WU",suffixpair="GT_WU","GT_WU": " "currentlcp = "GT_WU" %s "GT_WU" = reallcp\n", idx,startpos1,startpos2,currentlcp, checkequality ? "!=" : ">",reallcp); gt_encseq_showatstartposwithdepth(stderr,encseq,readmode,startpos1,50UL); fprintf(stderr,"\n"); gt_encseq_showatstartposwithdepth(stderr,encseq,readmode,startpos2,50UL); fprintf(stderr,"\n"); exit(GT_EXIT_PROGRAMMING_ERROR); } else { totalcmpmissing += (reallcp - currentlcp); } startpos1 = startpos2; } /*printf("totalcmpmissing = "GT_WU"(avg=%.2f)\n", totalcmpmissing,(double) totalcmpmissing/effectivesamplesize);*/ }
static inline int processleafedge_rdjcv(GT_UNUSED bool firstsucc, unsigned long fatherdepth, GT_UNUSED GtBUinfo_rdjcv *father, unsigned long leafnumber, GtBUstate_rdjcv *state, GT_UNUSED GtError *err) { unsigned long seqnum; if (fatherdepth >= state->shortest) { if ((leafnumber == 0 || GT_ISIBITSET(state->sspbittab, leafnumber-1)) && GT_ISIBITSET(state->sspbittab, leafnumber + fatherdepth)) { seqnum = gt_encseq_seqnum(state->encseq, leafnumber); processcontained(seqnum, state); } } if (state->show_progressbar) state->progress++; return 0; }
static inline void processcontained(unsigned long seqnum, ContfindBUstate *state) { if (state->firstrevcompl > 0) seqnum = GT_READJOINER_READNUM(seqnum, state->firstrevcompl, state->nofsequences); if (!GT_ISIBITSET(state->contained, seqnum)) { GT_SETIBIT(state->contained, seqnum); state->counter++; } if (state->csize == 0 || seqnum < state->cmin) state->cmin = seqnum; state->csize++; }
void gt_checkandresetstorematch(GT_UNUSED uint64_t queryunit, Storematchinfo *storeonline, Storematchinfo *storeoffline) { unsigned long seqnum, countmatchseq = 0, numofdbsequences = gt_encseq_num_of_sequences(storeonline->encseq); for (seqnum = 0; seqnum < numofdbsequences; seqnum++) { #ifndef NDEBUG if (GT_ISIBITSET(storeonline->hasmatch,seqnum) && !GT_ISIBITSET(storeoffline->hasmatch,seqnum)) { fprintf(stderr,"query " Formatuint64_t " refseq %lu: " "online has match but offline not\n", PRINTuint64_tcast(queryunit),seqnum); exit(GT_EXIT_PROGRAMMING_ERROR); } if (!GT_ISIBITSET(storeonline->hasmatch,seqnum) && GT_ISIBITSET(storeoffline->hasmatch,seqnum)) { fprintf(stderr,"query " Formatuint64_t " refseq %lu: " "offline has match but online not\n", PRINTuint64_tcast(queryunit),seqnum); exit(GT_EXIT_PROGRAMMING_ERROR); } #endif if (GT_ISIBITSET(storeonline->hasmatch,seqnum)) { countmatchseq++; } } GT_CLEARBITTAB(storeonline->hasmatch,numofdbsequences); GT_CLEARBITTAB(storeoffline->hasmatch,numofdbsequences); printf("matching sequences: %lu\n",countmatchseq); }
static void storematch(void *info,const GtIdxMatch *match) { Storematchinfo *storematch = (Storematchinfo *) info; unsigned long seqnum; if (match->dbabsolute) { seqnum = gt_encseq_seqnum(storematch->encseq, match->dbstartpos); } else { seqnum = match->dbseqnum; } if (!GT_ISIBITSET(storematch->hasmatch,seqnum)) { GT_SETIBIT(storematch->hasmatch,seqnum); } }
static inline void rdj_pairwise_generic(bool use_dp, GtOvlfindMode m, GtEncseq *encseq, bool revcompl, bool show_progressbar, bool use_kmp, double max_error, GtUword min_length, bool find_nonmaximal, GtSpmproc proc, GtSpmprocA proc_a, void* procdata, bool cntfilter, GtBitsequence *cntreads_in, GtBitsequence **cntreads_out, GtUword *nofreads) { GtContfind containment_status; GtBitsequence *cntreads = NULL; GtUint64 progress = 0; GtUword i, j, startpos, v_seqnum, nofsequences, n; struct Read u, v; struct Data d; gt_kmp_t** kmp_values = NULL; GT_RDJ_PAIRWISE_INIT_STRUCT_DATA(d, proc, proc_a, procdata, &u, &v, 0); gt_assert(encseq != NULL); d.mode = m; if ((m == GT_OVLFIND_ALL) && cntfilter) d.mode = GT_OVLFIND_PROPER_SPM; n = gt_encseq_num_of_sequences(encseq); if (use_kmp) kmp_values = prepare_kmp_values(encseq, n); nofsequences = n; if (revcompl) n = n >> 1; if (cntreads_in != NULL) cntreads = cntreads_in; else if (m != GT_OVLFIND_SPM) GT_INITBITTAB(cntreads, n); if (show_progressbar) gt_progressbar_start(&progress, (GtUint64)n * ((GtUint64)n - 1ULL) / 2ULL); for (i = 0; i < n; i++) { u.seqnum = i; u.direct = true; u.len = gt_encseq_seqlength(encseq, i); u.seq = gt_malloc(sizeof (char) * (u.len + 1)); startpos = gt_encseq_seqstartpos(encseq, i); gt_encseq_extract_decoded(encseq, u.seq, startpos, startpos + u.len - 1); u.seq[u.len] = '\0'; if (use_kmp) { gt_assert(kmp_values != NULL); u.pi = kmp_values[i]; } for (j = i; j < n; j++) { if (cntfilter) { gt_assert(cntreads != NULL); if ((bool)GT_ISIBITSET(cntreads, i)) break; if ((bool)GT_ISIBITSET(cntreads, j)) continue; } v.seqnum = j; /* find overlaps using direct v */ v.direct = true; v.len = gt_encseq_seqlength(encseq, j); v.seq = gt_malloc(sizeof (char) * (v.len + 1)); startpos = gt_encseq_seqstartpos(encseq, j); gt_encseq_extract_decoded(encseq, v.seq, startpos, startpos + v.len - 1); v.seq[v.len] = '\0'; if (use_kmp) { gt_assert(kmp_values != NULL); v.pi = kmp_values[j]; } containment_status = use_dp ? find_approx_overlaps(&d, max_error, min_length, find_nonmaximal) : find_exact_overlaps(&d, use_kmp, min_length, find_nonmaximal); if (m != GT_OVLFIND_SPM) mark_contained(containment_status, u.seqnum, v.seqnum, cntreads); /* find overlaps using reverse complement of v */ if (revcompl) { v_seqnum = nofsequences - j - 1; v.direct = false; gt_assert(gt_encseq_seqlength(encseq, j) == gt_encseq_seqlength(encseq, v_seqnum)); startpos = gt_encseq_seqstartpos(encseq, v_seqnum); gt_encseq_extract_decoded(encseq, v.seq, startpos, startpos + v.len - 1); if (use_kmp) { gt_assert(kmp_values != NULL); v.pi = kmp_values[v_seqnum]; } containment_status = use_dp ? find_approx_overlaps(&d, max_error, min_length, find_nonmaximal) : find_exact_overlaps(&d, use_kmp, min_length, find_nonmaximal); if (m != GT_OVLFIND_SPM) mark_contained(containment_status, u.seqnum, v.seqnum, cntreads); } gt_free(v.seq); progress++; } gt_free(u.seq); } if (cntreads_out != NULL) *cntreads_out = cntreads; else if (cntreads_in == NULL) gt_free(cntreads); if (nofreads != NULL) *nofreads = n; if (use_kmp) free_kmp_values(kmp_values, revcompl ? n << 1 : n); if (show_progressbar) gt_progressbar_stop(); }
static int gt_compressedbits_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GtError *err) { GtCompressdbitsArguments *arguments = tool_arguments; int had_err = 0; unsigned long idx; unsigned long long num_of_bits = 0ULL; GtBitsequence *bits = NULL; GtCompressedBitsequence *cbs = NULL, *read_cbs = NULL; GtStr *filename = gt_str_new(); FILE *fp = NULL; gt_error_check(err); gt_assert(arguments); gt_assert(argc == parsed_args); if (gt_option_is_set(arguments->filename_op)) { FILE *file = NULL; gt_assert(arguments->filename != NULL); file = gt_xfopen(gt_str_get(arguments->filename), "r"); if ((size_t) 1 != gt_xfread(&num_of_bits, sizeof (num_of_bits), (size_t) 1, file)) { had_err = -1; } if (!had_err) { gt_log_log("bits to read: %llu", num_of_bits); arguments->size = (unsigned long) GT_NUMOFINTSFORBITS(num_of_bits); bits = gt_malloc(sizeof (*bits) * arguments->size); if ((size_t) arguments->size != gt_xfread(bits, sizeof (*bits), (size_t) arguments->size, file)) { had_err = -1; } } gt_xfclose(file); } else { bits = gt_calloc(sizeof (*bits), (size_t) arguments->size); num_of_bits = (unsigned long long) (GT_INTWORDSIZE * arguments->size); if (arguments->fill_random) { for (idx = 0; idx < arguments->size; idx++) { bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ gt_rand_max(ULONG_MAX)); } } else { for (idx = 0; idx < arguments->size; idx++) bits[idx] = (GtBitsequence) (0xAAAAAAAAAAAAAAAAUL ^ idx); } } if (!had_err) { fp = gt_xtmpfp(filename); gt_fa_xfclose(fp); fp = NULL; gt_log_log("filename: %s", gt_str_get(filename)); gt_log_log("size in words: %lu", arguments->size); cbs = gt_compressed_bitsequence_new( bits, arguments->samplerate, (unsigned long) num_of_bits); gt_log_log("original size in MB: %2.3f", (sizeof (*bits) * arguments->size) / (1024.0 * 1024.0)); gt_log_log("compressed size in MB: %2.3f", gt_compressed_bitsequence_size(cbs) / (1024.0 * 1024.0)); gt_log_log("popcount table size thereof in MB: %2.3f", gt_popcount_tab_calculate_size(15U) / (1024.0 * 1024.0)); had_err = gt_compressed_bitsequence_write(cbs, gt_str_get(filename), err); } if (!had_err) { read_cbs = gt_compressed_bitsequence_new_from_file(gt_str_get(filename), err); if (read_cbs == NULL) had_err = -1; } if (!had_err && bits != NULL && arguments->check_consistency) { for (idx = 0; (unsigned long long) idx < num_of_bits; ++idx) { int GT_UNUSED bit = gt_compressed_bitsequence_access(read_cbs, idx); int GT_UNUSED original = GT_ISIBITSET(bits, idx) ? 1 : 0; gt_assert(gt_compressed_bitsequence_access(cbs, idx) == bit); gt_assert(original == bit); } } gt_compressed_bitsequence_delete(cbs); gt_compressed_bitsequence_delete(read_cbs); gt_free(bits); gt_str_delete(filename); return had_err; }