void masking_show_stats (arg_dont_complain(FILE* f)) { #ifdef collect_stats if (f == NULL) return; fprintf (f, " masked bases: %s\n", commatize(maskingStats.maskedBases)); fprintf (f, "-------------------\n"); #endif // collect_stats }
void clsparseDeviceTimer::Print( cl_ulong flopCount, std::string unit ) { const int tableWidth = 60; const int tableHalf = tableWidth / 2; const int tableThird = tableWidth / 3; const int tableFourth = tableWidth / 4; const int tableFifth = tableWidth / 5; for( cl_uint id = 0; id < labelID.size( ); ++id ) { size_t halfString = labelID[ id ].first.size( ) / 2; // Print label of timer, in a header std::cout << std::endl << std::setw( tableHalf + halfString ) << std::setfill( '=' ) << labelID[ id ].first << std::setw( tableHalf - halfString ) << "=" << std::endl; std::cout << std::setfill( ' ' ); std::vector< StatData > mean = getMean( id ); // Print each individual dimension std::stringstream catLengths; for( cl_uint t = 0; t < mean.size( ); ++t ) { cl_double time = mean[ t ].doubleNanoSec; cl_double gFlops = flopCount / time; if( mean[ t ].outEvents.size( ) != 0 ) { std::cout << std::setw( tableFourth ) << "OutEvents:" << std::setw( tableThird ); for( size_t i = 0; i < mean[ t ].outEvents.size( ); ++i ) { std::cout << mean[ t ].outEvents[ i ]( ); if( i < ( mean[ t ].outEvents.size( ) - 1 ) ) { std::cout << "," << std::endl; std::cout << std::setw( tableFourth + tableThird ); } } std::cout << std::endl; } std::cout << std::setw( tableFourth ) << unit << ":" << std::setw( 2 * tableFourth ) << gFlops << std::endl; std::cout << std::setw( tableFourth ) << "Time (ns):" << std::setw( 3 * tableFourth ) << commatize( static_cast<cl_ulong>( time ) ) << std::endl; std::cout << std::endl; } } }
segtable* new_segment_table (u32 size, unspos coverageLimit) { segtable* st; size_t bytesNeeded; // sanity check if (size < 1) suicidef ("in new_segment_table(), size can't be %d", size); // allocate bytesNeeded = segtable_bytes (size); if (bytesNeeded > mallocLimit) goto overflow; st = (segtable*) malloc_or_die ("new_segment_table", bytesNeeded); // initialize st->size = size; st->len = 0; st->haveScores = false; st->coverageLimit = coverageLimit; st->coverage = 0; st->lowScore = worstPossibleScore; return st; // failure exits overflow: suicidef ("internal error, in new_segment_table()\n" "table size (%s) exceeds allocation limit of %s;", commatize(bytesNeeded), commatize(mallocLimit)); return NULL; // (doesn't get here) }
void* realloc_or_die (char* id, void* _p, size_t size) { void* p; // make sure size is legit if (size > mallocLimit) { if (id == NULL) suicidef ("realloc_or_die blocked large request, for %s bytes (max is %s)", commatize(size), commatize(mallocLimit)); else suicidef ("realloc_or_die blocked large request, for %s bytes (max is %s), for %s", commatize(size), commatize(mallocLimit), id); } if (size == 0) size = 1; // allocate the memory p = realloc (_p, size); if (p == NULL) { if (id == NULL) suicidef ("call to realloc failed to allocate %lu bytes", commatize(size)); else suicidef ("call to realloc failed to allocate %lu bytes, for %s", commatize(size), id); } reportRealloc (id, _p, p, size); return p; }
void GpuStatTimer::Print( ) { const int tableWidth = 60; const int tableHalf = tableWidth / 2; const int tableThird = tableWidth / 3; const int tableFourth = tableWidth / 4; const int tableFifth = tableWidth / 5; for( cl_uint id = 0; id < labelID.size( ); ++id ) { size_t halfString = labelID[ id ].first.size( ) / 2; // Print label of timer, in a header std::cout << std::endl << std::setw( tableHalf + halfString ) << std::setfill( '=' ) << labelID[ id ].first << std::setw( tableHalf - halfString ) << "=" << std::endl; tout << std::setfill( _T( ' ' ) ); std::vector< StatData > mean = getMean( id ); // Print each individual dimension tstringstream catLengths; for( cl_uint t = 0; t < mean.size( ); ++t ) { cl_double time = 0; if( mean[ t ].kernel == NULL ) { for( cl_uint m = 0; m < t; ++m ) { if( mean[ m ].plHandle == mean[ t ].planX || mean[ m ].plHandle == mean[ t ].planY || mean[ m ].plHandle == mean[ t ].planZ || mean[ m ].plHandle == mean[ t ].planTX || mean[ m ].plHandle == mean[ t ].planTY || mean[ m ].plHandle == mean[ t ].planTZ || mean[ m ].plHandle == mean[ t ].planRCcopy || mean[ m ].plHandle == mean[ t ].planCopy ) { time += mean[ m ].doubleNanoSec; } } mean[ t ].doubleNanoSec = time; } else { time = mean[ t ].doubleNanoSec; } double gFlops = mean[ t ].calcFlops( ) / time; tout << std::setw( tableFourth ) << _T( "Handle:" ) << std::setw( tableThird ) << mean[ t ].plHandle << std::endl; if( mean[ t ].kernel != 0 ) { tout << std::setw( tableFourth ) << _T( "Kernel:" ) << std::setw( tableThird ) << reinterpret_cast<void*>( mean[ t ].kernel ) << std::endl; } if( ( mean[ t ].planX + mean[ t ].planY + mean[ t ].planZ ) > 0 || ( mean[ t ].planTX + mean[ t ].planTY + mean[ t ].planTZ ) > 0 || ( mean[ t ].planRCcopy + mean[ t ].planCopy ) > 0 ) { tout << std::setw( tableFourth ) << _T( "Child Handles:" ); catLengths.str( _T( "" ) ); catLengths << _T( "(" ); if( mean[ t ].planX != 0 ) catLengths << mean[ t ].planX; if( mean[ t ].planTX != 0 ) { catLengths << _T( "," ); catLengths << mean[ t ].planTX; } if( mean[ t ].planY != 0 ) { catLengths << _T( "," ); catLengths << mean[ t ].planY; } if( mean[ t ].planTY != 0 ) { catLengths << _T( "," ); catLengths << mean[ t ].planTY; } if( mean[ t ].planZ != 0 ) { catLengths << _T( "," ); catLengths << mean[ t ].planZ; } if( mean[ t ].planTZ != 0 ) { catLengths << _T( "," ); catLengths << mean[ t ].planTZ; } if( mean[ t ].planRCcopy != 0 ) { catLengths << _T( "," ); catLengths << mean[ t ].planRCcopy; } if( mean[ t ].planCopy != 0 ) { catLengths << _T( "," ); catLengths << mean[ t ].planCopy; } catLengths << _T( ")" ); tout << std::setw( tableThird ) << catLengths.str( ) << std::endl; } if( mean[ t ].outEvents.size( ) != 0 ) { tout << std::setw( tableFourth ) << _T( "OutEvents:" ) << std::setw( tableThird ); for( size_t i = 0; i < mean[ t ].outEvents.size( ); ++i ) { tout << mean[ t ].outEvents[ i ]; if( i < (mean[ t ].outEvents.size( )-1) ) { tout << _T( "," ) << std::endl; tout << std::setw( tableFourth+tableThird ); } } tout << std::endl; } tout << std::setw( tableFourth ) << _T( "Length:" ); catLengths.str( _T( "" ) ); catLengths << _T( "(" ); for( size_t i = 0; i < mean[ t ].lengths.size( ); ++i ) { catLengths << mean[ t ].lengths.at( i ); if( i < (mean[ t ].lengths.size( )-1) ) catLengths << _T( "," ); } catLengths << _T( ")" ); tout << std::setw( tableThird ) << catLengths.str( ) << std::endl; if( mean[ t ].batchSize > 1 ) { tout << std::setw( tableFourth ) << _T( "Batch:" ) << std::setw( tableThird ) << mean[ t ].batchSize << std::endl; } tout << std::setw( tableFourth ) << _T( "Input Stride:" ); catLengths.str( _T( "" ) ); catLengths << _T( "(" ); for( size_t i = 0; i < mean[ t ].inStride.size( ); ++i ) { catLengths << mean[ t ].inStride.at( i ); if( i < (mean[ t ].inStride.size( )-1) ) catLengths << _T( "," ); } catLengths << _T( ")" ); tout << std::setw( tableThird ) << catLengths.str( ) << std::endl; tout << std::setw( tableFourth ) << _T( "Output Stride:" ); catLengths.str( _T( "" ) ); catLengths << _T( "(" ); for( size_t i = 0; i < mean[ t ].outStride.size( ); ++i ) { catLengths << mean[ t ].outStride.at( i ); if( i < (mean[ t ].outStride.size( )-1) ) catLengths << _T( "," ); } catLengths << _T( ")" ); tout << std::setw( tableThird ) << catLengths.str( ) << std::endl; if( mean[ t ].enqueueWorkSize.size( ) != 0 ) { tout << std::setw( tableFourth ) << _T( "Global Work:" ); catLengths.str( _T( "" ) ); catLengths << _T( "(" ); for( size_t i = 0; i < mean[ t ].enqueueWorkSize.size( ); ++i ) { catLengths << mean[ t ].enqueueWorkSize.at( i ); if( i < (mean[ t ].enqueueWorkSize.size( )-1) ) catLengths << _T( "," ); } catLengths << _T( ")" ); tout << std::setw( tableThird ) << catLengths.str( ) << std::endl; } tout << std::setw( tableFourth ) << _T( "Gflops:" ) << std::setw( 2*tableFourth ) << gFlops << std::endl; tout << std::setw( tableFourth ) << _T( "Time (ns):" ) << std::setw( 3*tableFourth ) << commatize( static_cast< cl_ulong >( time ) ) << std::endl; tout << std::endl; } } }
segtable* add_segment (segtable* st, unspos pos1, unspos pos2, unspos length, score s, int id) { u32 newSize; size_t bytesNeeded; segment* seg, *parent; segment tempSeg; int ix, pIx; int tied, stopped; // fprintf (stderr, "add " unsposSlashSFmt " " unsposFmt " " scoreFmtSimple "; id %d\n", // pos1+1, "+", // pos2+1, ((id & rcf_rev) != 0)? "-" : "+", // length, s, id); ////////// // add the segment to the table, enlarging the table if needed, but // discarding the segment if it is low-scoring and the table has met its // coverage limit ////////// // if the table is already full and this segment scores less than the // lowest score in the table, discard it if ((st->len > 0) && (st->coverageLimit != 0) && (st->coverage >= st->coverageLimit) && (s < st->lowScore)) return st; // if there's no room for the new segment, re-allocate if (st->len >= st->size) { newSize = st->size + 100 + (st->size / 3); bytesNeeded = segtable_bytes (newSize); if (bytesNeeded > mallocLimit) goto overflow; st = (segtable*) realloc_or_die ("add_segment", st, bytesNeeded); st->size = newSize; } // add the segment, by appending it at the end seg = &st->seg[st->len++]; seg->pos1 = pos1; seg->pos2 = pos2; seg->length = length; seg->s = s; seg->id = id; seg->filter = false; seg->scoreCov = (possum) length; st->coverage += length; if ((st->len == 1) || (s < st->lowScore)) st->lowScore = s; ////////// // handle the transition between the two table states // below-the-coverage-limit: table is kept as a simple list // met-the-coverage-limit: table is kept as a proper min-heap ////////// // if this segment leaves us below the limit, we're done if ((st->coverageLimit == 0) || (st->coverage < st->coverageLimit)) return st; // if this is the first time we've reached the limit, sort the segments to // create a proper min-heap, and add the tied-score information // nota bene: if we reach here, st->coverageLimit > 0 and // st->coverage >= st->coverageLimit if (st->coverage - length < st->coverageLimit) { sort_segments (st, qSegmentsByIncreasingScore); record_tie_scores (st); #ifdef debugBinaryHeap fprintf (stderr, "\nafter sort:\n"); dump_segments (stderr, st, NULL, NULL); validate_heap (st, "after sort"); #endif // debugBinaryHeap goto prune; } ////////// // maintain the min-heap property ////////// #ifdef debugBinaryHeap //fprintf (stderr, "\nbefore percolation:\n"); //dump_segments (stderr, st, NULL, NULL); #endif // debugBinaryHeap // the rest of the list is a proper min-heap, so percolate the new segment // up the tree, while maintaining the tied-score information // nota bene: if we reach here, length >= 2 tied = false; for (ix=st->len-1 ; ix>0 ; ) { pIx = (ix-1) / 2; seg = &st->seg[ix]; parent = &st->seg[pIx]; if (seg->s >= parent->s) { tied = (seg->s == parent->s); break; } // swap this segment with its parent, and adjust old parent's tied-score // subheap tempSeg = *seg; *seg = *parent; *parent = tempSeg; record_tie_score (st, ix); ix = pIx; } record_tie_score (st, ix); // if the new segment tied an existing score, we must continue to percolate // the tied-score info up the tree if (tied) { stopped = false; for (ix=(ix-1)/2 ; ix>0 ; ix=(ix-1)/2) { if (!record_tie_score (st, ix)) { stopped = true; break; } } if (!stopped) record_tie_score (st, 0); } #ifdef debugBinaryHeap fprintf (stderr, "\nafter percolation:\n"); dump_segments (stderr, st, NULL, NULL); validate_heap (st, "after percolation"); #endif // debugBinaryHeap ////////// // remove low-scoring segments ////////// prune: // if removing the minimum scoring subheap would bring us below the // limit, no pruning is necessary if (st->coverage - st->seg[0].scoreCov < st->coverageLimit) return st; // otherwise, we must remove subheaps as long as doing so leaves us at or // above the limit while (st->coverage - st->seg[0].scoreCov >= st->coverageLimit) { s = st->seg[0].s; while (st->seg[0].s == s) { remove_root (st); #ifdef debugBinaryHeap fprintf (stderr, "\nafter a pruning:\n"); dump_segments (stderr, st, NULL, NULL); validate_heap (st, "after pruning"); #endif // debugBinaryHeap } } st->lowScore = st->seg[0].s; #ifdef debugBinaryHeap fprintf (stderr, "\nafter pruning:\n"); dump_segments (stderr, st, NULL, NULL); validate_heap (st, "after pruning"); #endif // debugBinaryHeap return st; // failure exits #define suggestions " consider using lastz_m40," \ " or setting max_malloc_index for a special build," \ " or raising scoring threshold (--hspthresh or --exact)," \ " or break your target sequence into smaller pieces" overflow: suicidef ("in add_segment()\n" "table size (%s for %s segments) exceeds allocation limit of %s;\n" suggestions, commatize(bytesNeeded), commatize(newSize), commatize(mallocLimit)); return NULL; // (doesn't get here) }