void estimate_distinct_kmers(unsigned long genome_size, Bank *Reads) { int size_linearCounter = genome_size * 8; // alloc 8 bits * genome size for counting, i.e. ~ as much as the assembly Bloom size LinearCounter *linearCounter = new LinearCounter(size_linearCounter); bloom_pass_reads(Reads,linearCounter, (BloomCpt * ) NULL, (char*)"%cEstimating number of distinct kmers (%lld reads processed so far)"); long nb_distinct_kmers = linearCounter->count(); if (linearCounter->is_accurate()) printf("Estimated that %ld distinct kmers are in the reads\n",nb_distinct_kmers); else printf("Cannot estimate the number of distinct kmers. Allocate a larger counter\n"); delete linearCounter; }
//------------- Start of function VgaBuf::indicator --------------// // // <int> x1, y1, x2, y2 = coordination of the indicator // <float> curValue = the value of the bar // <float> maxValue = max value, the bar width = maxBarWidth * curValue / maxValue // <int> indiColor = color of the indicator // [int] backColor = background color // (default : vga.color_down) // (-2 if don't paint background color) // void VgaBuf::indicator(int x1, int y1, int x2, int y2, float curValue, float maxValue, int indiColor, int backColor) { if( backColor == -1 ) backColor = color_down; int cutPoint; if( curValue <= 0.0f ) cutPoint = x1; else if( curValue >= maxValue ) cutPoint = x2 + 1; else cutPoint = x1 + int(float(x2 - x1 + 1) * curValue / maxValue); if( cutPoint > x1 ) { int cutHeight = (y1 * 5 + y2 * 3) / 8; // cut at 3/8 of between y1 and y2 err_when( y2 - y1 - 1 < 4 ); LinearCounter brightness; brightness.init( y1, 0, cutHeight, MAX_BRIGHTNESS_ADJUST_DEGREE-2 ); // glowing int y; for( y=y1; y <= cutHeight; y++, brightness.inc() ) { barW_fast( x1, y, cutPoint, y, vga.vga_color_table->get_table(brightness.y)[indiColor]); } brightness.init( cutHeight, MAX_BRIGHTNESS_ADJUST_DEGREE-2, y2, -MAX_BRIGHTNESS_ADJUST_DEGREE/2 ); // for( ; y<=y2; ++y, brightness.inc() ) { barW_fast( x1, y, cutPoint, y, vga.vga_color_table->get_table(brightness.y)[indiColor]); } } if( cutPoint <= x2 ) { bar( cutPoint, y1, x2, y2, backColor ); } }
//------------- Start of function VgaBuf::indicator --------------// // // <int> x1, y1, x2, y2 = coordination of the indicator // <float> curValue = the value of the bar // <float> maxValue = max value, the bar width = maxBarWidth * curValue / maxValue // <int> indiColor = color of the indicator // [int] backColor = background color // (default : vga.color_down) // (-2 if don't paint background color) // void VgaBuf::indicator(int x1, int y1, int x2, int y2, float curValue, float maxValue, int indiColor, int backColor) { if( backColor == -1 ) backColor = color_down; /* if( curValue > maxValue ) curValue = maxValue; if( curValue > 0 ) { int barWidth = (int) ((float)(x2-x1) * curValue / maxValue); int halfHeight = (y2-y1+1)/2-1; int tx2 = x1+barWidth; int y; indiColor+=halfHeight; for( y=y1 ; y<y1+halfHeight ; y++, indiColor-- ) bar( x1, y, tx2, y, indiColor ); for( ; y<=y2 ; y++, indiColor++ ) bar( x1, y, tx2, y, indiColor ); if( backColor != -2 ) // -2 if don't paint background color { if( x1+barWidth < x2 ) bar( x1+barWidth+1, y1, x2, y2, backColor ); } } else { if( backColor != -2 ) // -2 if don't paint background color bar( x1, y1, x2, y2, backColor ); } */ int cutPoint; if( curValue <= 0.0f ) cutPoint = x1; else if( curValue >= maxValue ) cutPoint = x2 + 1; else cutPoint = x1 + int(float(x2 - x1 + 1) * curValue / maxValue); if( cutPoint > x1 ) { if( is_front ) mouse.hide_area( x1, y1, cutPoint, y2 ); int cutHeight = (y1 * 5 + y2 * 3) / 8; // cut at 3/8 of between y1 and y2 err_when( y2 - y1 - 1 < 4 ); LinearCounter brightness; brightness.init( y1, 0, cutHeight, MAX_BRIGHTNESS_ADJUST_DEGREE-2 ); // glowing int y; for( y=y1; y <= cutHeight; y++, brightness.inc() ) { barW_fast( x1, y, cutPoint, y, vga.vga_color_table->get_table(brightness.y)[indiColor]); } brightness.init( cutHeight, MAX_BRIGHTNESS_ADJUST_DEGREE-2, y2, -MAX_BRIGHTNESS_ADJUST_DEGREE/2 ); // for( ; y<=y2; ++y, brightness.inc() ) { barW_fast( x1, y, cutPoint, y, vga.vga_color_table->get_table(brightness.y)[indiColor]); } if( is_front ) mouse.show_area(); } if( cutPoint <= x2 ) { bar( cutPoint, y1, x2, y2, backColor ); } }
uint64_t extrapolate_distinct_kmers_wrapped(unsigned long nbytes_memory, Bank *Reads) { unsigned long size_linearCounter = nbytes_memory * 8L; // alloc 8 bits * nbytes for counting LinearCounter *linearCounter = new LinearCounter(size_linearCounter); int stops = 100000; // variant of bloom_pass_reads int64_t NbRead = 0; int64_t NbInsertedKmers = 0; Reads->rewind_all(); char * rseq; long i; kmer_type kmer, graine, graine_revcomp; long nb_distinct_kmers = 0; long previous_nb_distinct_kmers = 0; uint64_t estimated_nb_reads = Reads->estimate_nb_reads(); bool stop = false; while (Reads->get_next_seq(&rseq,&readlen)) { if (stop) break; for (i=0; i<readlen-sizeKmer+1; i++) { kmer = extractKmerFromRead(rseq,i,&graine,&graine_revcomp); linearCounter->add(kmer); NbInsertedKmers++; if (NbInsertedKmers % stops == 0 && NbRead != 0) { previous_nb_distinct_kmers = nb_distinct_kmers; nb_distinct_kmers = linearCounter->count()*estimated_nb_reads/NbRead; //printf("estimated now: %ld\n",nb_distinct_kmers); // the following condition will grossly over-estimate the number of distinct kmers // I expect the correct result to be in the same order of magnitude // 5% error if (abs((int)(nb_distinct_kmers-previous_nb_distinct_kmers)) < previous_nb_distinct_kmers/20) stop = true; if (!linearCounter->is_accurate()) stop = true; } } NbRead++; if ((NbRead%10000)==0) fprintf (stderr,(char*)"%cExtrapolating number of distinct kmers %lld",13,NbRead); } if (!linearCounter->is_accurate()) { printf("Inaccurate estimation, restarting with %d MB RAM\n",(2*nbytes_memory)/1024/1024); delete linearCounter; return extrapolate_distinct_kmers_wrapped(2*nbytes_memory, Reads); } nb_distinct_kmers = linearCounter->count()*estimated_nb_reads/NbRead; // this is a very rough estimation printf("Linear estimation: ~%ld M distinct kmers are in the reads\n",nb_distinct_kmers/1000000L); delete linearCounter; return nb_distinct_kmers; }