int ReadHolder::averageRepeatLength() { int sum = 0; unsigned int final_index = getStartStopListSize() - 2; for (unsigned int i = 0; i < final_index; i+=2) { sum += (int)repeatStringAt(i).length(); } return sum/numRepeats(); }
std::string ReadHolder::DRLowLexi(void) { //----- // Orientate a READ based on low lexi of the interalised DR // std::string tmp_dr; std::string rev_comp; int num_repeats = numRepeats(); // make sure that tere is 4 elements in the array, if not you can only cut one if (num_repeats == 1) { tmp_dr = repeatStringAt(0); rev_comp = reverseComplement(tmp_dr); } else if (2 == num_repeats) { // choose the dr that is not a partial ( no start at 0 or end at length) // take the second if (RH_StartStops.front() == 0) { tmp_dr = repeatStringAt(2); rev_comp = reverseComplement(tmp_dr); } // take the first else if (RH_StartStops.back() == static_cast<unsigned int>(RH_Seq.length())) { tmp_dr = repeatStringAt(0); rev_comp = reverseComplement(tmp_dr); } // if they both are then just take whichever is longer else { int lenA = RH_StartStops.at(1) - RH_StartStops.at(0); int lenB = RH_StartStops.at(3) - RH_StartStops.at(2); if (lenA > lenB) { tmp_dr = repeatStringAt(0); rev_comp = reverseComplement(tmp_dr); } else { tmp_dr = repeatStringAt(2); rev_comp = reverseComplement(tmp_dr); } } } // long read more than two repeats else { // take the second tmp_dr = repeatStringAt(2); rev_comp = reverseComplement(tmp_dr); } if (tmp_dr < rev_comp) { // the direct repeat is in it lowest lexicographical form RH_WasLowLexi = true; #ifdef DEBUG logInfo("DR in low lexi"<<endl<<RH_Seq, 9); #endif return tmp_dr; } else { reverseComplementSeq(); RH_WasLowLexi = false; #ifdef DEBUG logInfo("DR not in low lexi"<<endl<<RH_Seq, 9); #endif return rev_comp; } }
// CompressHelper //------------------------------------------------------------------------------ void TestCompressor::CompressHelper( const char * fileName ) const { // read some test data into a file AutoPtr< void > data; size_t dataSize; { FileStream fs; TEST_ASSERT( fs.Open( fileName ) ); dataSize = (size_t)fs.GetFileSize(); data = (char *)ALLOC( dataSize ); TEST_ASSERT( (uint32_t)fs.Read( data.Get(), dataSize ) == dataSize ); } OUTPUT( "File : %s\n", fileName ); OUTPUT( "Size : %u\n", (uint32_t)dataSize ); // compress the data to obtain size Compressor comp; comp.Compress( data.Get(), dataSize ); size_t compressedSize = comp.GetResultSize(); AutoPtr< char > compressedData( (char *)ALLOC( compressedSize ) ); memcpy( compressedData.Get(), comp.GetResult(), compressedSize ); float compressedPerc = ( (float)compressedSize / (float)dataSize ) * 100.0f; OUTPUT( "Compressed Size: %u (%2.1f%% of original)\n", (uint32_t)compressedSize, compressedPerc ); // decompress to check we get original data back Compressor decomp; decomp.Decompress( compressedData.Get() ); size_t uncompressedSize = decomp.GetResultSize(); TEST_ASSERT( uncompressedSize == dataSize ); for ( size_t i=0; i<uncompressedSize; ++i ) { TEST_ASSERT( ( (char *)data.Get() )[ i ] == ( (char *)decomp.GetResult() )[ i ] ); } // speed checks //-------------- const float TIME_TO_REPEAT( 0.3f ); // compress the data several times to get more stable throughput value Timer t; uint32_t numRepeats( 0 ); while ( t.GetElapsed() < TIME_TO_REPEAT ) { Compressor c; c.Compress( data.Get(), dataSize ); TEST_ASSERT( c.GetResultSize() == compressedSize ); ++numRepeats; } float compressTimeTaken = t.GetElapsed(); double compressThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)numRepeats ) / compressTimeTaken ) / 1024.0; OUTPUT( " Comp Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)compressThroughputMBs, compressTimeTaken, numRepeats ); // decompress the data Timer t2; numRepeats = 0; while ( t2.GetElapsed() < TIME_TO_REPEAT ) { Compressor d; d.Decompress( compressedData.Get() ); TEST_ASSERT( d.GetResultSize() == dataSize ); ++numRepeats; } float decompressTimeTaken = t2.GetElapsed(); double decompressThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)numRepeats ) / decompressTimeTaken ) / 1024.0; OUTPUT( " Decomp Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)decompressThroughputMBs, decompressTimeTaken, numRepeats ); // time memcpy to compare with Timer t0; numRepeats = 0; while ( t0.GetElapsed() < TIME_TO_REPEAT ) { char * mem = (char *)ALLOC( dataSize ); memcpy( mem, data.Get(), dataSize ); FREE( mem ); ++numRepeats; } float memcpyTimeTaken = t0.GetElapsed(); double memcpyThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)numRepeats ) / memcpyTimeTaken ) / 1024.0; OUTPUT( " MemCpy Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)memcpyThroughputMBs, memcpyTimeTaken, numRepeats ); }