static void runscanatpostrial(const Encodedsequence *encseq, Encodedsequencescanstate *esr, Readmode readmode,Seqpos startpos) { Seqpos pos, totallength; GtUchar ccra, ccsr; totallength = getencseqtotallength(encseq); initEncodedsequencescanstate(esr,encseq,readmode,startpos); for (pos=startpos; pos < totallength; pos++) { ccra = getencodedchar(encseq,pos,readmode); /* Random access */ ccsr = sequentialgetencodedchar(encseq,esr,pos,readmode); if (ccra != ccsr) { fprintf(stderr,"startpos = " FormatSeqpos " access=%s, mode=%s: position=" FormatSeqpos ": random access (correct) = %u != %u = " " sequential read (wrong)\n", PRINTSeqposcast(startpos), encseqaccessname(encseq), showreadmode(readmode), PRINTSeqposcast(pos), (unsigned int) ccra, (unsigned int) ccsr); exit(GT_EXIT_PROGRAMMING_ERROR); } } }
static void showmatch(void *processinfo,const GtMatch *match) { Showmatchinfo *showmatchinfo = (Showmatchinfo *) processinfo; unsigned long seqnum; Seqpos relpos; if (match->dbabsolute) { Seqinfo seqinfo; seqnum = getencseqfrompos2seqnum(showmatchinfo->encseq,match->dbstartpos); getencseqSeqinfo(&seqinfo,showmatchinfo->encseq,seqnum); gt_assert(seqinfo.seqstartpos <= match->dbstartpos); relpos = match->dbstartpos - seqinfo.seqstartpos; } else { relpos = match->dbstartpos; seqnum = match->dbseqnum; } printf("%lu\t" FormatSeqpos "\t",seqnum,PRINTSeqposcast(relpos)); printf(FormatSeqpos "\t",PRINTSeqposcast(match->dblen)); printf("\t" Formatuint64_t "\t%lu\t%lu\t%lu\n", PRINTuint64_tcast(showmatchinfo->queryunit), match->querystartpos, match->querylen, match->distance); if (showmatchinfo->showalignment) { gt_alignment_show_with_mapped_chars( (const GtAlignment *) match->alignment, showmatchinfo->characters, showmatchinfo->wildcardshow, stdout); } }
static void produceshortoutput(const LTRboundaries *boundaries,Seqpos offset) { /* increase positions by 1 */ printf(FormatSeqpos " ", PRINTSeqposcast( boundaries->leftLTR_5 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast( boundaries->rightLTR_3 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast( (boundaries->rightLTR_3 - boundaries->leftLTR_5 + 1))); printf(FormatSeqpos " ", PRINTSeqposcast( boundaries->leftLTR_5 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast( boundaries->leftLTR_3 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast( (boundaries->leftLTR_3 - boundaries->leftLTR_5 + 1))); printf(FormatSeqpos " ", PRINTSeqposcast( boundaries->rightLTR_5 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast( boundaries->rightLTR_3 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast( (boundaries->rightLTR_3 - boundaries->rightLTR_5 + 1))); /* print similarity */ printf("%.2f ", boundaries->similarity); /* print sequence number */ printf("%lu\n", boundaries->contignumber); }
static void makeerrormsg(const Sequencerange *vala,const Sequencerange *valb, const char *cmpflag) { fprintf(stderr, "(" FormatSeqpos "," FormatSeqpos ") %s (" FormatSeqpos "," FormatSeqpos ")\n", PRINTSeqposcast(vala->leftpos), PRINTSeqposcast(vala->rightpos), cmpflag, PRINTSeqposcast(valb->leftpos), PRINTSeqposcast(valb->rightpos)); }
static void showbucketspec2(const GtBucketspec2 *bucketspec2) { unsigned int idx1, idx2; for (idx1 = 0; idx1 < bucketspec2->numofchars; idx1++) { for (idx2 = 0; idx2 < bucketspec2->numofchars; idx2++) { printf("subbucket[%u][%u]=" FormatSeqpos,idx1,idx2, PRINTSeqposcast(bucketspec2->subbuckettab[idx1][idx2].bucketend)); if (bucketspec2->subbuckettab[idx1][idx2].sorted) { printf(" sorted\n"); } else { printf("\n"); } } printf("superbucket[%u]=" FormatSeqpos "\n",idx1, PRINTSeqposcast(bucketspec2->superbuckettab[idx1].bucketend)); } }
static void testscanatpos(const Encodedsequence *encseq, Readmode readmode, unsigned long scantrials) { Encodedsequencescanstate *esr = NULL; Seqpos startpos, totallength; unsigned long trial; totallength = getencseqtotallength(encseq); srand48(42349421); esr = newEncodedsequencescanstate(); runscanatpostrial(encseq,esr,readmode,0); runscanatpostrial(encseq,esr,readmode,totallength-1); for (trial = 0; trial < scantrials; trial++) { startpos = (Seqpos) (drand48() * (double) totallength); printf("trial %lu at " FormatSeqpos "\n",trial,PRINTSeqposcast(startpos)); runscanatpostrial(encseq,esr,readmode,startpos); } freeEncodedsequencescanstate(&esr); }
void gt_copysortsuffixes(const GtBucketspec2 *bucketspec2, Seqpos *suftab, Verboseinfo *verboseinfo) { Seqpos hardwork = 0, **targetptr; unsigned int idx, idxsource, source, second; #ifdef WITHSUFFIXES { const Seqpos *ptr; for (ptr = suftab; ptr < suftab + bucketspec2->partwidth; ptr++) { showsequenceatstartpos(stdout, ISDIRREVERSE(readmode) ? false : true, ISDIRCOMPLEMENT(readmode) ? true : false, encseq, *ptr); } } #endif targetptr = gt_malloc(sizeof(*targetptr) * bucketspec2->numofchars); for (idxsource = 0; idxsource<bucketspec2->numofchars; idxsource++) { source = bucketspec2->order[idxsource]; for (second = 0; second < bucketspec2->numofchars; second++) { if (!bucketspec2->subbuckettab[source][second].sorted && source != second) { gt_assert(bucketspec2->subbuckettab[source][second].hardworktodo); showverbose(verboseinfo,"hard work for %u %u",source,second); hardwork += getendidx(bucketspec2,source,second) - getstartidx(bucketspec2,source,second); bucketspec2->subbuckettab[source][second].sorted = true; } else { gt_assert(!bucketspec2->subbuckettab[source][second].hardworktodo); } } if (getstartidx(bucketspec2,source,0) < getstartidx(bucketspec2,source,source)) { for (idx = 0; idx < bucketspec2->numofchars; idx++) { targetptr[idx] = suftab + getstartidx(bucketspec2,idx,source); } forwardderive(bucketspec2, targetptr, source, suftab + getstartidx(bucketspec2,source,0)); } if (getendidx(bucketspec2,source,source) < getendidx(bucketspec2,source,bucketspec2->numofchars)) { for (idx = 0; idx < bucketspec2->numofchars; idx++) { targetptr[idx] = suftab + getendidx(bucketspec2,idx,source) - 1; } backwardderive(bucketspec2, targetptr, source, suftab + getendidx(bucketspec2,source,bucketspec2->numofchars) - 1); } for (idx = 0; idx < bucketspec2->numofchars; idx++) { bucketspec2->subbuckettab[idx][source].sorted = true; } bucketspec2->superbuckettab[source].sorted = true; } gt_free(targetptr); showverbose(verboseinfo,"hardwork = " FormatSeqpos " (%.2f)", PRINTSeqposcast(hardwork), (double) hardwork/getencseqtotallength(bucketspec2->encseq)); }
static void producelongutput(const LTRharvestoptions *lo, const LTRboundaries *boundaries, const Encodedsequence *encseq, Seqpos offset) { const GtUchar *characters = getencseqAlphabetcharacters(encseq); printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->leftLTR_5 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->rightLTR_3 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast((boundaries->rightLTR_3 - boundaries->leftLTR_5 + 1))); printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->leftLTR_5 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->leftLTR_3 -offset + 1)); printf(FormatSeqpos " ", PRINTSeqposcast((boundaries->leftLTR_3 - boundaries->leftLTR_5 + 1))); if (lo->minlengthTSD > 1U) { Seqpos j; for (j = 0; j < boundaries->lenleftTSD; j++) { printf("%c",(char) characters[getencodedchar(encseq, boundaries->leftLTR_5 - boundaries->lenleftTSD + j, Forwardmode)]); } printf(" " FormatSeqpos " ", PRINTSeqposcast(boundaries->lenleftTSD)); } if (lo->motif.allowedmismatches < 4U) { printf("%c%c..%c%c ", (char) characters[getencodedchar(encseq,/* Random access */ boundaries->leftLTR_5, Forwardmode)], (char) characters[getencodedchar(encseq,/* Random access */ boundaries->leftLTR_5+1, Forwardmode)], (char) characters[getencodedchar(encseq,/* Random access */ boundaries->leftLTR_3-1, Forwardmode)], (char) characters[getencodedchar(encseq,/* Random access */ boundaries->leftLTR_3, Forwardmode)] ); } /* increase by 1 */ printf(FormatSeqpos " ", PRINTSeqposcast(boundaries->rightLTR_5 -offset + 1)); /* increase by 1 */ printf(FormatSeqpos " ",PRINTSeqposcast(boundaries->rightLTR_3 -offset + 1)); printf(FormatSeqpos " ",PRINTSeqposcast(boundaries->rightLTR_3 - boundaries->rightLTR_5 + 1)); if (lo->minlengthTSD > 1U) { Seqpos j; for (j = 0; j < boundaries->lenrightTSD; j++) { printf("%c", (char) characters[getencodedchar(encseq, boundaries->rightLTR_3+j+1, Forwardmode)]); } printf(" " FormatSeqpos " ",PRINTSeqposcast(boundaries->lenrightTSD)); } if (lo->motif.allowedmismatches < 4U) { printf("%c%c..%c%c", (char) characters[getencodedchar(encseq,/* Randomaccess */ boundaries->rightLTR_5, Forwardmode)], (char) characters[getencodedchar(encseq,/* Randomaccess */ boundaries->rightLTR_5+1, Forwardmode)], (char) characters[getencodedchar(encseq,/* Randomaccess */ boundaries->rightLTR_3-1, Forwardmode)], (char) characters[getencodedchar(encseq,/* Random access */ boundaries->rightLTR_3,/* Randomaccess */ Forwardmode)] ); } /* print similarity */ printf(" %.2f", boundaries->similarity); /* print sequence number */ printf(" %lu\n", boundaries->contignumber); }
/* The following function shows all options that are set by default or from the user on stdout. */ void showuserdefinedoptionsandvalues(const LTRharvestoptions *lo) { printf("# user defined options and values:\n"); if (lo->verbosemode) { printf("# verbosemode: On\n"); } else { printf("# verbosemode: Off\n"); } printf("# indexname: %s\n", gt_str_get(lo->str_indexname)); if (lo->fastaoutput) { printf("# outputfile: %s\n", gt_str_get(lo->str_fastaoutputfilename)); } if (lo->fastaoutputinnerregion) { printf("# outputfile inner region: %s\n", gt_str_get(lo->str_fastaoutputfilenameinnerregion)); } if (lo->gff3output) { printf("# outputfile gff3 format: %s\n", gt_str_get(lo->str_gff3filename)); } printf("# xdropbelowscore: %d\n", lo->xdropbelowscore); printf("# similaritythreshold: %.2f\n", lo->similaritythreshold); printf("# minseedlength: %lu\n", lo->minseedlength); printf("# matchscore: %d\n", lo->arbitscores.mat); printf("# mismatchscore: %d\n", lo->arbitscores.mis); printf("# insertionscore: %d\n", lo->arbitscores.ins); printf("# deletionscore: %d\n", lo->arbitscores.del); printf("# minLTRlength: %lu\n", lo->repeatinfo.lmin); printf("# maxLTRlength: %lu\n", lo->repeatinfo.lmax); printf("# minLTRdistance: %lu\n", lo->repeatinfo.dmin); printf("# maxLTRdistance: %lu\n", lo->repeatinfo.dmax); if (lo->nooverlapallowed) { printf("# overlaps: no\n"); } else { if (lo->bestofoverlap) { printf("# overlaps: best\n"); } else { printf("# overlaps: all\n"); } } printf("# minTSDlength: %u\n", lo->minlengthTSD); printf("# maxTSDlength: %u\n", lo->maxlengthTSD); printf("# palindromic motif: %s\n", gt_str_get(lo->motif.str_motif)); printf("# motifmismatchesallowed: %u\n", lo->motif.allowedmismatches); printf("# vicinity: " FormatSeqpos " nt\n", PRINTSeqposcast(lo->vicinityforcorrectboundaries)); if (lo->repeatinfo.ltrsearchseqrange.start != 0 || lo->repeatinfo.ltrsearchseqrange.end != 0) { printf("# ltrsearchseqrange=(%lu,%lu)\n", PRINTSeqposcast(lo->repeatinfo.ltrsearchseqrange.start), PRINTSeqposcast(lo->repeatinfo.ltrsearchseqrange.end)); } }