예제 #1
0
    // Sets current cursor to the given sample offset.
    // If offset is in the middle of the sequence, the next sequence is picked up.
    // If there is no sequence, an offset outside the sweep is returned.
    size_t SequenceRandomizer::Seek(size_t sweepSampleOffset, size_t sweep)
    {
        // Determine sample range that is randomized within the chunk window.
        size_t randomizeWindowBeginInSamples = 0;
        size_t randomizedWindowEndInSamples = 0;
        if (!m_randomizedChunkInfo.empty())
        {
            randomizeWindowBeginInSamples = m_randomizedChunkInfo.front().start;
            randomizedWindowEndInSamples = m_randomizedChunkInfo.back().start + m_randomizedChunkInfo.back().numberOfSamples;
        }

        if (sweepSampleOffset < randomizeWindowBeginInSamples)
        {
            // The requested offset is before the earliest randomized sequences we still have.
            // Need to start over.
            Reset(sweep + 1);
        }
        else if (sweepSampleOffset < randomizedWindowEndInSamples)
        {
            // The requested offset is within the randomized window.
            // We change the current chunk cursor to contain the requested offset.
            size_t index;
            for (index = 0; index < m_randomizedChunkInfo.size(); index++)
            {
                if (m_randomizedChunkInfo[index].start <= sweepSampleOffset &&
                    sweepSampleOffset < (m_randomizedChunkInfo[index].start + m_randomizedChunkInfo[index].numberOfSamples))
                {
                    break;
                }
            }
            assert(index != m_randomizedChunkInfo.size());

            m_currentChunkCursor = m_chunkWindowBegin + index;
            m_currentSequenceCursor = m_randomizedChunks[m_currentChunkCursor].m_sequencePositionStart;
            m_currentSampleCursor = m_randomizedChunkInfo[index].start;

            // TODO most of the time, we can advance to the right sequence here
            // (unless we need to go past the randomized chunk window)
        }

        // Advance sequence by sequence until the desire offset is reached.
        // TODO perhaps optimize this
        while (m_currentSampleCursor < sweepSampleOffset)
        {
            GetNextSequenceDescriptions(1);
        }

        return m_currentSampleCursor;
    }
예제 #2
0
    // Sets current cursor to the given sample offset.
    // If offset is in the middle of the sequence, the next sequence is picked up.
    // If there is no sequence, an offset outside the sweep is returned.
    size_t SequenceRandomizer::Seek(size_t sweepSampleOffset, size_t sweep)
    {
        // Determine sample range that is randomized within the chunk window.
        size_t randomizeWindowBeginInSamples = 0;
        size_t randomizedWindowEndInSamples = 0;
        if (!m_randomizedChunkInfo.empty())
        {
            randomizeWindowBeginInSamples = m_randomizedChunkInfo.front().start;
            randomizedWindowEndInSamples = m_randomizedChunkInfo.back().start + m_randomizedChunkInfo.back().numberOfSamples;
        }

        if (m_verbosity)
            fprintf(stderr, "SequenceRandomizer::Seek(): seeking offset %" PRIu64 " in sweep %" PRIu64 "\n",
                sweepSampleOffset,
                sweep);

        if (sweepSampleOffset < randomizeWindowBeginInSamples)
        {
            // The requested offset is before the earliest randomized sequences we still have.
            // Need to start over.
            if (m_verbosity)
                fprintf(stderr, "SequenceRandomizer::Seek(): starting over \n");

            Reset(sweep);
        }
        else if (sweepSampleOffset < randomizedWindowEndInSamples)
        {
            // The requested offset is within the randomized window.
            // We change the current chunk cursor to contain the requested offset.
            if (m_verbosity)
                fprintf(stderr, "SequenceRandomizer::Seek(): offset is within randomized window\n");
            size_t index;
            for (index = 0; index < m_randomizedChunkInfo.size(); index++)
            {
                if (m_randomizedChunkInfo[index].start <= sweepSampleOffset &&
                    sweepSampleOffset < (m_randomizedChunkInfo[index].start + m_randomizedChunkInfo[index].numberOfSamples))
                {
                    break;
                }
            }
            assert(index != m_randomizedChunkInfo.size());

            m_currentChunkCursor = m_chunkWindowBegin + index;
            m_currentSequenceCursor = m_randomizedChunks[m_currentChunkCursor].m_sequencePositionStart;
            m_currentSampleCursor = m_randomizedChunkInfo[index].start;

            // TODO most of the time, we can advance to the right sequence here
            // (unless we need to go past the randomized chunk window)
        }

        // Advance sequence by sequence until the desire offset is reached.
        if (m_verbosity)
            fprintf(stderr, "SequenceRandomizer::Seek(): advancing cursor from %" PRIu64 " to %" PRIu64 "\n",
                m_currentSampleCursor,
                sweepSampleOffset);

        ClosedOpenChunkInterval window;
        GetNextSequenceDescriptions([&](const RandomizedSequenceDescription&) { return m_currentSampleCursor < sweepSampleOffset; }, window);

        return m_currentSampleCursor;
    }
예제 #3
0
std::pair<size_t, size_t> BlockRandomizer::LoadSequenceData(size_t globalSampleCount, size_t localSampleCount, Sequences& sequences, bool atLeastOneSequenceNeeded)
{
    ClosedOpenChunkInterval windowRange;    
    m_sequenceBuffer.clear();
    size_t numGlobalSamples = 0, numLocalSamples = 0; // actual number of samples to load (filled in from the sequence descriptions) 
    bool endOfSweep, endOfEpoch;
    std::tie(endOfSweep, endOfEpoch, numGlobalSamples, numLocalSamples) = GetNextSequenceDescriptions(globalSampleCount, localSampleCount, m_sequenceBuffer, windowRange, atLeastOneSequenceNeeded);
    sequences.m_endOfSweep |= endOfSweep;
    sequences.m_endOfEpoch |= endOfEpoch;
    
    assert(atLeastOneSequenceNeeded || (numGlobalSamples <= globalSampleCount && numLocalSamples <= localSampleCount));

    if (numGlobalSamples == 0)
    {
        assert(!atLeastOneSequenceNeeded || sequences.m_endOfEpoch);
        return {0, 0};
    }

    // Retrieve new data chunks if required.
    LoadDataChunks(windowRange);

    auto& data = sequences.m_data;
    size_t offset = 0;

    if (data.empty())
    {
        data.resize(m_streams.size(), std::vector<SequenceDataPtr>(m_sequenceBuffer.size()));
    }
    else
    {
        // sequence data is not empty, we're appending new items to exiting 
        // sequence data vectors.
        offset = data.front().size();
        for (auto& sequenceDataVector : data)
        {
            // make sure that all streams contain the same number of sequences
            assert(sequenceDataVector.size() == offset); 
            sequenceDataVector.resize(offset + m_sequenceBuffer.size());
        }
    }

    auto process = [&](int i) -> void {
        const auto& description = m_sequenceBuffer[i];
        std::vector<SequenceDataPtr> sequenceData;
        auto it = m_chunks.find(description.m_chunk->m_original->m_id);
        if (it == m_chunks.end())
        {
            LogicError("Invalid chunk requested.");
        }

        it->second->GetSequence(description.m_indexInOriginalChunk, sequenceData);
        for (int j = 0; j < m_streams.size(); ++j)
        {
            assert(offset + i < data[j].size());
            data[j][offset + i] = sequenceData[j];
        }
    };

    if (m_multithreadedGetNextSequences)
    {
        ExceptionCapture capture;
#pragma omp parallel for schedule(dynamic)
        for (int i = 0; i < m_sequenceBuffer.size(); ++i)
            capture.SafeRun(process, i);
        capture.RethrowIfHappened();
    }
    else
    {
        for (int i = 0; i < m_sequenceBuffer.size(); ++i)
            process(i);
    }

    // Now it is safe to start the new chunk prefetch.
    ChunkIdType chunkToPrefetchNext = GetChunkToPrefetch(windowRange);
    Prefetch(chunkToPrefetchNext);

    return { numGlobalSamples, numLocalSamples };
}
예제 #4
0
// Gets next sequences not exceeding sampleCount.
Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
{
    // Get next sequence descriptions.
    Sequences result;
    std::vector<RandomizedSequenceDescription> sequences;
    result.m_endOfEpoch = GetNextSequenceDescriptions(sampleCount, sequences);
    if (sequences.size() == 0)
    {
        return result;
    }

    // Decimate.
    std::vector<RandomizedSequenceDescription> decimated;
    decimated.reserve(sequences.size());
    Decimate(sequences, decimated);
    if (decimated.size() == 0)
    {
        return result;
    }

    if (m_verbosity >= Debug)
        fprintf(stderr, "BlockRandomizer::GetNextSequences(): getting %" PRIu64 " out of %" PRIu64 " sequences for %" PRIu64 " requested samples in sweep %" PRIu64 "\n",
            sequences.size(),
            decimated.size(),
            sampleCount,
            m_sweep);

    result.m_data.resize(m_streams.size(), std::vector<SequenceDataPtr>(decimated.size()));

    auto process = [&](int i) -> void {
        const auto& description = decimated[i];
        std::vector<SequenceDataPtr> sequence;
        auto it = m_chunks.find(description.m_chunk->m_chunkId);
        if (it == m_chunks.end())
        {
            LogicError("Invalid chunk requested.");
        }

        it->second->GetSequence(description.m_id, sequence);
        for (int j = 0; j < m_streams.size(); ++j)
        {
            result.m_data[j][i] = sequence[j];
        }
    };

    // TODO: This will be changed, when we move transformers under the randomizer, should not deal with multithreading here.
    if (m_multithreadedGetNextSequences)
    {
#pragma omp parallel for schedule(dynamic)
        for (int i = 0; i < decimated.size(); ++i)
            process(i);
    }
    else
    {
        for (int i = 0; i < decimated.size(); ++i)
            process(i);
    }

    m_sequenceRandomizer->ReleaseChunks();
    return result;
}