Esempio n. 1
0
int ReadHolder::averageRepeatLength()
{
    int sum = 0;
    unsigned int final_index = getStartStopListSize() - 2;
    for (unsigned int i = 0; i < final_index; i+=2)
    {
        sum += (int)repeatStringAt(i).length();
    }
    return sum/numRepeats();
}
Esempio n. 2
0
std::string ReadHolder::DRLowLexi(void)
{
    //-----
    // Orientate a READ based on low lexi of the interalised DR
    //
    
    std::string tmp_dr;
    std::string rev_comp;
    
    int num_repeats = numRepeats();
    // make sure that tere is 4 elements in the array, if not you can only cut one
    if (num_repeats == 1)
    {
        tmp_dr = repeatStringAt(0);
        rev_comp = reverseComplement(tmp_dr);
    }
    else if (2 == num_repeats)
    {
        // choose the dr that is not a partial ( no start at 0 or end at length)
        
        // take the second
        if (RH_StartStops.front() == 0)
        {
            tmp_dr = repeatStringAt(2);
            rev_comp = reverseComplement(tmp_dr);
        }
        
        // take the first
        else if (RH_StartStops.back() == static_cast<unsigned int>(RH_Seq.length()))
        {
            tmp_dr = repeatStringAt(0);
            rev_comp = reverseComplement(tmp_dr);
        }
        // if they both are then just take whichever is longer
        else
        {
            int lenA = RH_StartStops.at(1) - RH_StartStops.at(0);
            int lenB = RH_StartStops.at(3) - RH_StartStops.at(2);
            
            if (lenA > lenB)
            {
                tmp_dr = repeatStringAt(0);
                rev_comp = reverseComplement(tmp_dr);
            }
            else
            {
                tmp_dr = repeatStringAt(2);
                rev_comp = reverseComplement(tmp_dr);
            }
        }
    }
    // long read more than two repeats
    else
    {
        // take the second
        tmp_dr = repeatStringAt(2);
        rev_comp = reverseComplement(tmp_dr);

    }
    
    if (tmp_dr < rev_comp)
    {
        // the direct repeat is in it lowest lexicographical form
        RH_WasLowLexi = true;
#ifdef DEBUG
        logInfo("DR in low lexi"<<endl<<RH_Seq, 9);
#endif
        return tmp_dr;
    }
    else
    {
        reverseComplementSeq();
        RH_WasLowLexi = false;
#ifdef DEBUG
        logInfo("DR not in low lexi"<<endl<<RH_Seq, 9);
#endif
        return rev_comp;
    }
}
Esempio n. 3
0
// CompressHelper
//------------------------------------------------------------------------------
void TestCompressor::CompressHelper( const char * fileName ) const
{
    // read some test data into a file
    AutoPtr< void > data;
    size_t dataSize;
    {
        FileStream fs;
        TEST_ASSERT( fs.Open( fileName ) );
        dataSize = (size_t)fs.GetFileSize();
        data = (char *)ALLOC( dataSize );
        TEST_ASSERT( (uint32_t)fs.Read( data.Get(), dataSize ) == dataSize );
    }

    OUTPUT( "File           : %s\n", fileName );
    OUTPUT( "Size           : %u\n", (uint32_t)dataSize );

    // compress the data to obtain size
    Compressor comp;
    comp.Compress( data.Get(), dataSize );
    size_t compressedSize = comp.GetResultSize();
    AutoPtr< char > compressedData( (char *)ALLOC( compressedSize ) );
    memcpy( compressedData.Get(), comp.GetResult(), compressedSize );
    float compressedPerc = ( (float)compressedSize / (float)dataSize ) * 100.0f;
    OUTPUT( "Compressed Size: %u (%2.1f%% of original)\n", (uint32_t)compressedSize, compressedPerc );

    // decompress to check we get original data back
    Compressor decomp;
    decomp.Decompress( compressedData.Get() );
    size_t uncompressedSize = decomp.GetResultSize();
    TEST_ASSERT( uncompressedSize == dataSize );
    for ( size_t i=0; i<uncompressedSize; ++i )
    {
        TEST_ASSERT( ( (char *)data.Get() )[ i ] == ( (char *)decomp.GetResult() )[ i ] );
    }

    // speed checks
    //--------------
    const float TIME_TO_REPEAT( 0.3f );

    // compress the data several times to get more stable throughput value
    Timer t;
    uint32_t numRepeats( 0 );
    while ( t.GetElapsed() < TIME_TO_REPEAT )
    {
        Compressor c;
        c.Compress( data.Get(), dataSize );
        TEST_ASSERT( c.GetResultSize() == compressedSize );
        ++numRepeats;
    }
    float compressTimeTaken = t.GetElapsed();
    double compressThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)numRepeats ) / compressTimeTaken ) / 1024.0;
    OUTPUT( "     Comp Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)compressThroughputMBs, compressTimeTaken, numRepeats );

    // decompress the data
    Timer t2;
    numRepeats = 0;
    while ( t2.GetElapsed() < TIME_TO_REPEAT )
    {
        Compressor d;
        d.Decompress( compressedData.Get() );
        TEST_ASSERT( d.GetResultSize() == dataSize );
        ++numRepeats;
    }
    float decompressTimeTaken = t2.GetElapsed();
    double decompressThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)numRepeats ) / decompressTimeTaken ) / 1024.0;
    OUTPUT( "   Decomp Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)decompressThroughputMBs, decompressTimeTaken, numRepeats );

    // time memcpy to compare with
    Timer t0;
    numRepeats = 0;
    while ( t0.GetElapsed() < TIME_TO_REPEAT )
    {
        char * mem = (char *)ALLOC( dataSize );
        memcpy( mem, data.Get(), dataSize );
        FREE( mem );
        ++numRepeats;
    }
    float memcpyTimeTaken = t0.GetElapsed();
    double memcpyThroughputMBs = ( ( (double)dataSize / 1024.0 * (double)numRepeats ) / memcpyTimeTaken ) / 1024.0;
    OUTPUT( "   MemCpy Speed: %2.1f MB/s - %2.3fs (%u repeats)\n", (float)memcpyThroughputMBs, memcpyTimeTaken, numRepeats );
}