void gt_Outlcpinfo_check_lcpvalues(const GtEncseq *encseq, GtReadmode readmode, const GtSuffixsortspace *sortedsample, GtUword effectivesamplesize, const GtOutlcpinfo *outlcpinfosample, bool checkequality) { GT_UNUSED int cmp; GtUword idx, reallcp, startpos1, startpos2, currentlcp, totalcmpmissing = 0; if (effectivesamplesize == 0) { return; } startpos1 = gt_suffixsortspace_getdirect(sortedsample,0); for (idx=1UL; idx<effectivesamplesize; idx++) { startpos2 = gt_suffixsortspace_getdirect(sortedsample,idx); cmp = gt_encseq_check_comparetwosuffixes(encseq, readmode, &reallcp, false, false, 0, startpos1, startpos2, NULL, NULL); gt_assert(cmp <= 0); gt_assert(GT_ISIBITSET(outlcpinfosample->lcpsubtab.tableoflcpvalues .isset,idx)); currentlcp = (GtUword) outlcpinfosample->lcpsubtab.tableoflcpvalues. bucketoflcpvalues[idx]; if ((checkequality && currentlcp != reallcp) || (!checkequality && currentlcp > reallcp)) { fprintf(stderr,"idx="GT_WU",suffixpair="GT_WU","GT_WU": " "currentlcp = "GT_WU" %s "GT_WU" = reallcp\n", idx,startpos1,startpos2,currentlcp, checkequality ? "!=" : ">",reallcp); gt_encseq_showatstartposwithdepth(stderr,encseq,readmode,startpos1,50UL); fprintf(stderr,"\n"); gt_encseq_showatstartposwithdepth(stderr,encseq,readmode,startpos2,50UL); fprintf(stderr,"\n"); exit(GT_EXIT_PROGRAMMING_ERROR); } else { totalcmpmissing += (reallcp - currentlcp); } startpos1 = startpos2; } /*printf("totalcmpmissing = "GT_WU"(avg=%.2f)\n", totalcmpmissing,(double) totalcmpmissing/effectivesamplesize);*/ }
static void backwardderive(const GtBucketspec2 *bucketspec2, GtSuffixsortspace *suffixsortspace, GtUword *targetoffset, unsigned int source, GtUword idx) { GtUword startpos; GtUchar cc; for (; idx + 1 > targetoffset[source] + 1; idx--) { startpos = gt_suffixsortspace_getdirect(suffixsortspace,idx); if (startpos > 0) { cc = gt_encseq_get_encoded_char(bucketspec2->encseq, startpos-1, bucketspec2->readmode); if (ISNOTSPECIAL(cc) && !bucketspec2->superbuckettab[cc].sorted) { gt_suffixsortspace_setdirect(suffixsortspace,targetoffset[cc], startpos - 1); targetoffset[cc]--; } } } }
unsigned long gt_suffixsortspace_get(const GtSuffixsortspace *sssp, unsigned long subbucketleft, unsigned long idx) { return gt_suffixsortspace_getdirect(sssp, sssp->bucketleftidx + subbucketleft + idx - sssp->partoffset); }
GtUword gt_suffixsortspace_get(const GtSuffixsortspace *sssp, GtUword subbucketleft, GtUword idx) { return gt_suffixsortspace_getdirect(sssp, sssp->bucketleftidx + subbucketleft + idx - sssp->partoffset); }
void gt_suffixsortspace_checkorder(const GtSuffixsortspace *sssp, unsigned long subbucketleft, unsigned long width) { unsigned long idx, currentpos; GT_UNUSED unsigned long prevpos; gt_assert(width > 0); prevpos = gt_suffixsortspace_getdirect(sssp, sssp->bucketleftidx+subbucketleft - sssp->partoffset); for (idx=sssp->bucketleftidx+subbucketleft - sssp->partoffset + 1; idx<sssp->bucketleftidx+subbucketleft + width - sssp->partoffset; idx++) { currentpos = gt_suffixsortspace_getdirect(sssp,idx); gt_assert(prevpos > currentpos); prevpos = currentpos; } }
void gt_suffixsortspace_showrange(const GtSuffixsortspace *sssp, unsigned long subbucketleft, unsigned long width) { unsigned long idx; printf("%lu,%lu=",sssp->bucketleftidx+subbucketleft-sssp->partoffset, sssp->bucketleftidx+subbucketleft+width-1-sssp->partoffset); for (idx=sssp->bucketleftidx+subbucketleft-sssp->partoffset; idx<sssp->bucketleftidx+subbucketleft+width-sssp->partoffset; idx++) { printf(" %lu", gt_suffixsortspace_getdirect(sssp,idx)); } }
void gt_suffixsortspace_showrange(const GtSuffixsortspace *sssp, GtUword subbucketleft, GtUword width) { GtUword idx; printf(""GT_WU","GT_WU"=",sssp->bucketleftidx+subbucketleft-sssp->partoffset, sssp->bucketleftidx+subbucketleft+width-1-sssp->partoffset); for (idx=sssp->bucketleftidx+subbucketleft-sssp->partoffset; idx<sssp->bucketleftidx+subbucketleft+width-sssp->partoffset; idx++) { printf(" "GT_WU"", gt_suffixsortspace_getdirect(sssp,idx)); } }
size_t gt_translateSuftab2BWTSuffixsortspace( void *translator, void *voiddest, const GtSuffixsortspace *suffixsortspace, unsigned long offset, size_t len) { struct encSeqTrState *trState = (struct encSeqTrState *) translator; GtUchar *dest = (GtUchar *) voiddest; size_t idx; gt_assert(trState); for (idx = 0; idx < len; ++idx) { dest[idx] = sfxIdx2BWTSym(gt_suffixsortspace_getdirect(suffixsortspace,offset+idx), trState->encseq, trState->readmode); } return len * sizeof (GtUchar); }
static int gt_seqorder_runner(GT_UNUSED int argc, const char **argv, int parsed_args, void *tool_arguments, GtError *err) { GtSeqorderArguments *arguments = tool_arguments; int had_err = 0; GtEncseq *encseq; GtEncseqLoader *loader; unsigned long i, nofseqs; gt_error_check(err); gt_assert(arguments != NULL); /* load encseq */ loader = gt_encseq_loader_new(); encseq = gt_encseq_loader_load(loader, argv[parsed_args], err); if (encseq == NULL) had_err = -1; if (had_err == 0 && !gt_encseq_has_description_support(encseq)) gt_warning("%s has no description support", argv[parsed_args]); if (!had_err) { nofseqs = gt_encseq_num_of_sequences(encseq); if (arguments->invert) { for (i = nofseqs; i > 0; i--) gt_seqorder_output(i - 1, encseq); } else if (arguments->shuffle) { unsigned long *seqnums; seqnums = gt_malloc(sizeof (unsigned long) * nofseqs); gt_seqorder_get_shuffled_seqnums(nofseqs, seqnums); for (i = 0; i < nofseqs; i++) gt_seqorder_output(seqnums[i], encseq); gt_free(seqnums); } else { GtSuffixsortspace *suffixsortspace; gt_assert(arguments->sort || arguments->revsort); suffixsortspace = gt_suffixsortspace_new(nofseqs, /* Use iterator over sequence separators: saves a lot of binary searches */ gt_encseq_seqstartpos(encseq, nofseqs-1), false,NULL); gt_seqorder_sort(suffixsortspace, encseq); if (arguments->sort) for (i = 0; i < nofseqs; i++) gt_seqorder_output(gt_encseq_seqnum(encseq, gt_suffixsortspace_getdirect(suffixsortspace, i)), encseq); else for (i = nofseqs; i > 0; i--) gt_seqorder_output(gt_encseq_seqnum(encseq, gt_suffixsortspace_getdirect(suffixsortspace, i - 1)), encseq); gt_suffixsortspace_delete(suffixsortspace, false); } } gt_encseq_loader_delete(loader); gt_encseq_delete(encseq); return had_err; }
void gt_copysort_derivesorting(const GtBucketspec2 *bucketspec2, GtSuffixsortspace *suffixsortspace, GtLogger *logger) { GtUword hardwork = 0, *targetoffset; unsigned int idx, idxsource, source, second; #ifdef WITHSUFFIXES { GtUword idx; for (idx = 0; idx < bucketspec2->partwidth; idx++) { gt_encseq_showatstartpos( stdout, GT_ISDIRREVERSE(readmode) ? false : true, GT_ISDIRCOMPLEMENT(readmode) ? true : false, encseq, gt_suffixsortspace_getdirect(suffixsortspace,idx)); } } #endif targetoffset = gt_malloc(sizeof (*targetoffset) * bucketspec2->numofchars); for (idxsource = 0; idxsource<bucketspec2->numofchars; idxsource++) { source = bucketspec2->order[idxsource]; for (second = 0; second < bucketspec2->numofchars; second++) { if (!bucketspec2->subbuckettab[source][second].sorted && source != second) { gt_assert(bucketspec2->subbuckettab[source][second].hardworktodo); gt_logger_log(logger,"hard work for %u %u",source,second); hardwork += getendidx(bucketspec2,source,second) - getstartidx(bucketspec2,source,second); bucketspec2->subbuckettab[source][second].sorted = true; } else { gt_assert(!bucketspec2->subbuckettab[source][second].hardworktodo); } } if (getstartidx(bucketspec2,source,0) < getstartidx(bucketspec2,source,source)) { for (idx = 0; idx < bucketspec2->numofchars; idx++) { targetoffset[idx] = getstartidx(bucketspec2,idx,source); } forwardderive(bucketspec2, suffixsortspace, targetoffset, source, getstartidx(bucketspec2,source,0)); } if (getendidx(bucketspec2,source,source) < getendidx(bucketspec2,source,bucketspec2->numofchars)) { for (idx = 0; idx < bucketspec2->numofchars; idx++) { /* do not need to assert that getendidx(idx,source) > 0, as later the value stored in targetoffset is incremented */ targetoffset[idx] = getendidx(bucketspec2,idx,source) - 1; } gt_assert(getendidx(bucketspec2,source,bucketspec2->numofchars) > 0); backwardderive(bucketspec2, suffixsortspace, targetoffset, source, getendidx(bucketspec2,source,bucketspec2->numofchars) - 1); } for (idx = 0; idx < bucketspec2->numofchars; idx++) { bucketspec2->subbuckettab[idx][source].sorted = true; } bucketspec2->superbuckettab[source].sorted = true; } gt_free(targetoffset); gt_logger_log(logger,"hardwork = "GT_WU" (%.2f)", hardwork, (double) hardwork/gt_encseq_total_length(bucketspec2->encseq)); }