StreamMinibatchPtr SequencePacker::PackStreamMinibatch(const std::vector<SequenceDataPtr>& sequences, size_t streamId) { // Create sequence info for each sequences that we have got from the transformer. std::vector<MBLayout::SequenceInfo> inputSequences; for (size_t index = 0; index < sequences.size(); ++index) { MBLayout::SequenceInfo info; // In each minibatch sequence ids should be unique. // They have to match between different input streams in the same minibatch. // We are using sequence index in the set of received sequences. // TODO: should we use m_key as sequence id and pass it with data? info.seqId = index; info.tBegin = 0; info.tEnd = sequences[index]->m_numberOfSamples; inputSequences.push_back(info); } std::vector<std::pair<size_t, size_t>> placement; std::vector<size_t> rowAllocations; // Creating the minibatch layout. MBLayoutPtr layout = std::make_shared<MBLayout>(); layout->InitAsPackedSequences(inputSequences, placement, rowAllocations); // Allocating necessary data buffer for the stream. size_t sampleSize = GetSampleSize(m_inputStreams[streamId]); size_t totalNumberOfSamplesInBytes = layout->GetNumCols() * sampleSize; if (m_streamBufferSizes[streamId] < totalNumberOfSamplesInBytes) { m_streamBuffers[streamId] = AllocateBuffer(layout->GetNumCols(), sampleSize); m_streamBufferSizes[streamId] = totalNumberOfSamplesInBytes; } // Packing the actual data. StorageType storageType = m_inputStreams[streamId]->m_storageType; size_t elementSize = GetSizeByType(m_inputStreams[streamId]->m_elementType); const auto& packedSequences = layout->GetAllSequences(); char* streamBuffer = m_streamBuffers[streamId].get(); for (const auto& sequence : packedSequences) { if (sequence.seqId == GAP_SEQUENCE_ID) continue; const auto& data = sequences[sequence.seqId]; // Packing the sequence for (size_t sampleIndex = 0; sampleIndex < sequence.GetNumTimeSteps(); ++sampleIndex) { char* destination = streamBuffer + layout->GetColumnIndex(sequence, sampleIndex) * sampleSize; if (storageType == StorageType::dense) { PackDenseSample(destination, data, sampleIndex, elementSize, sampleSize); } else // sparse { assert(storageType == StorageType::sparse_csc); PackSparseSample(destination, data, sampleIndex, elementSize, sampleSize); } } } // Ok, minibatch is ready, give it out. StreamMinibatchPtr result = std::make_shared<StreamMinibatch>(); result->m_data = m_streamBuffers[streamId].get(); result->m_layout = layout; return result; }