// Sets current cursor to the given sample offset. // If offset is in the middle of the sequence, the next sequence is picked up. // If there is no sequence, an offset outside the sweep is returned. size_t SequenceRandomizer::Seek(size_t sweepSampleOffset, size_t sweep) { // Determine sample range that is randomized within the chunk window. size_t randomizeWindowBeginInSamples = 0; size_t randomizedWindowEndInSamples = 0; if (!m_randomizedChunkInfo.empty()) { randomizeWindowBeginInSamples = m_randomizedChunkInfo.front().start; randomizedWindowEndInSamples = m_randomizedChunkInfo.back().start + m_randomizedChunkInfo.back().numberOfSamples; } if (sweepSampleOffset < randomizeWindowBeginInSamples) { // The requested offset is before the earliest randomized sequences we still have. // Need to start over. Reset(sweep + 1); } else if (sweepSampleOffset < randomizedWindowEndInSamples) { // The requested offset is within the randomized window. // We change the current chunk cursor to contain the requested offset. size_t index; for (index = 0; index < m_randomizedChunkInfo.size(); index++) { if (m_randomizedChunkInfo[index].start <= sweepSampleOffset && sweepSampleOffset < (m_randomizedChunkInfo[index].start + m_randomizedChunkInfo[index].numberOfSamples)) { break; } } assert(index != m_randomizedChunkInfo.size()); m_currentChunkCursor = m_chunkWindowBegin + index; m_currentSequenceCursor = m_randomizedChunks[m_currentChunkCursor].m_sequencePositionStart; m_currentSampleCursor = m_randomizedChunkInfo[index].start; // TODO most of the time, we can advance to the right sequence here // (unless we need to go past the randomized chunk window) } // Advance sequence by sequence until the desire offset is reached. // TODO perhaps optimize this while (m_currentSampleCursor < sweepSampleOffset) { GetNextSequenceDescriptions(1); } return m_currentSampleCursor; }
// Sets current cursor to the given sample offset. // If offset is in the middle of the sequence, the next sequence is picked up. // If there is no sequence, an offset outside the sweep is returned. size_t SequenceRandomizer::Seek(size_t sweepSampleOffset, size_t sweep) { // Determine sample range that is randomized within the chunk window. size_t randomizeWindowBeginInSamples = 0; size_t randomizedWindowEndInSamples = 0; if (!m_randomizedChunkInfo.empty()) { randomizeWindowBeginInSamples = m_randomizedChunkInfo.front().start; randomizedWindowEndInSamples = m_randomizedChunkInfo.back().start + m_randomizedChunkInfo.back().numberOfSamples; } if (m_verbosity) fprintf(stderr, "SequenceRandomizer::Seek(): seeking offset %" PRIu64 " in sweep %" PRIu64 "\n", sweepSampleOffset, sweep); if (sweepSampleOffset < randomizeWindowBeginInSamples) { // The requested offset is before the earliest randomized sequences we still have. // Need to start over. if (m_verbosity) fprintf(stderr, "SequenceRandomizer::Seek(): starting over \n"); Reset(sweep); } else if (sweepSampleOffset < randomizedWindowEndInSamples) { // The requested offset is within the randomized window. // We change the current chunk cursor to contain the requested offset. if (m_verbosity) fprintf(stderr, "SequenceRandomizer::Seek(): offset is within randomized window\n"); size_t index; for (index = 0; index < m_randomizedChunkInfo.size(); index++) { if (m_randomizedChunkInfo[index].start <= sweepSampleOffset && sweepSampleOffset < (m_randomizedChunkInfo[index].start + m_randomizedChunkInfo[index].numberOfSamples)) { break; } } assert(index != m_randomizedChunkInfo.size()); m_currentChunkCursor = m_chunkWindowBegin + index; m_currentSequenceCursor = m_randomizedChunks[m_currentChunkCursor].m_sequencePositionStart; m_currentSampleCursor = m_randomizedChunkInfo[index].start; // TODO most of the time, we can advance to the right sequence here // (unless we need to go past the randomized chunk window) } // Advance sequence by sequence until the desire offset is reached. if (m_verbosity) fprintf(stderr, "SequenceRandomizer::Seek(): advancing cursor from %" PRIu64 " to %" PRIu64 "\n", m_currentSampleCursor, sweepSampleOffset); ClosedOpenChunkInterval window; GetNextSequenceDescriptions([&](const RandomizedSequenceDescription&) { return m_currentSampleCursor < sweepSampleOffset; }, window); return m_currentSampleCursor; }
std::pair<size_t, size_t> BlockRandomizer::LoadSequenceData(size_t globalSampleCount, size_t localSampleCount, Sequences& sequences, bool atLeastOneSequenceNeeded) { ClosedOpenChunkInterval windowRange; m_sequenceBuffer.clear(); size_t numGlobalSamples = 0, numLocalSamples = 0; // actual number of samples to load (filled in from the sequence descriptions) bool endOfSweep, endOfEpoch; std::tie(endOfSweep, endOfEpoch, numGlobalSamples, numLocalSamples) = GetNextSequenceDescriptions(globalSampleCount, localSampleCount, m_sequenceBuffer, windowRange, atLeastOneSequenceNeeded); sequences.m_endOfSweep |= endOfSweep; sequences.m_endOfEpoch |= endOfEpoch; assert(atLeastOneSequenceNeeded || (numGlobalSamples <= globalSampleCount && numLocalSamples <= localSampleCount)); if (numGlobalSamples == 0) { assert(!atLeastOneSequenceNeeded || sequences.m_endOfEpoch); return {0, 0}; } // Retrieve new data chunks if required. LoadDataChunks(windowRange); auto& data = sequences.m_data; size_t offset = 0; if (data.empty()) { data.resize(m_streams.size(), std::vector<SequenceDataPtr>(m_sequenceBuffer.size())); } else { // sequence data is not empty, we're appending new items to exiting // sequence data vectors. offset = data.front().size(); for (auto& sequenceDataVector : data) { // make sure that all streams contain the same number of sequences assert(sequenceDataVector.size() == offset); sequenceDataVector.resize(offset + m_sequenceBuffer.size()); } } auto process = [&](int i) -> void { const auto& description = m_sequenceBuffer[i]; std::vector<SequenceDataPtr> sequenceData; auto it = m_chunks.find(description.m_chunk->m_original->m_id); if (it == m_chunks.end()) { LogicError("Invalid chunk requested."); } it->second->GetSequence(description.m_indexInOriginalChunk, sequenceData); for (int j = 0; j < m_streams.size(); ++j) { assert(offset + i < data[j].size()); data[j][offset + i] = sequenceData[j]; } }; if (m_multithreadedGetNextSequences) { ExceptionCapture capture; #pragma omp parallel for schedule(dynamic) for (int i = 0; i < m_sequenceBuffer.size(); ++i) capture.SafeRun(process, i); capture.RethrowIfHappened(); } else { for (int i = 0; i < m_sequenceBuffer.size(); ++i) process(i); } // Now it is safe to start the new chunk prefetch. ChunkIdType chunkToPrefetchNext = GetChunkToPrefetch(windowRange); Prefetch(chunkToPrefetchNext); return { numGlobalSamples, numLocalSamples }; }
// Gets next sequences not exceeding sampleCount. Sequences BlockRandomizer::GetNextSequences(size_t sampleCount) { // Get next sequence descriptions. Sequences result; std::vector<RandomizedSequenceDescription> sequences; result.m_endOfEpoch = GetNextSequenceDescriptions(sampleCount, sequences); if (sequences.size() == 0) { return result; } // Decimate. std::vector<RandomizedSequenceDescription> decimated; decimated.reserve(sequences.size()); Decimate(sequences, decimated); if (decimated.size() == 0) { return result; } if (m_verbosity >= Debug) fprintf(stderr, "BlockRandomizer::GetNextSequences(): getting %" PRIu64 " out of %" PRIu64 " sequences for %" PRIu64 " requested samples in sweep %" PRIu64 "\n", sequences.size(), decimated.size(), sampleCount, m_sweep); result.m_data.resize(m_streams.size(), std::vector<SequenceDataPtr>(decimated.size())); auto process = [&](int i) -> void { const auto& description = decimated[i]; std::vector<SequenceDataPtr> sequence; auto it = m_chunks.find(description.m_chunk->m_chunkId); if (it == m_chunks.end()) { LogicError("Invalid chunk requested."); } it->second->GetSequence(description.m_id, sequence); for (int j = 0; j < m_streams.size(); ++j) { result.m_data[j][i] = sequence[j]; } }; // TODO: This will be changed, when we move transformers under the randomizer, should not deal with multithreading here. if (m_multithreadedGetNextSequences) { #pragma omp parallel for schedule(dynamic) for (int i = 0; i < decimated.size(); ++i) process(i); } else { for (int i = 0; i < decimated.size(); ++i) process(i); } m_sequenceRandomizer->ReleaseChunks(); return result; }